[Midnightbsd-cvs] src [7911] U trunk/sys/ofed/include/rdma: sync with freebsd rev 244242
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Wed Sep 14 15:35:22 EDT 2016
Revision: 7911
http://svnweb.midnightbsd.org/src/?rev=7911
Author: laffer1
Date: 2016-09-14 15:35:22 -0400 (Wed, 14 Sep 2016)
Log Message:
-----------
sync with freebsd rev 244242
Revision Links:
--------------
http://svnweb.midnightbsd.org/src/?rev=244242
Modified Paths:
--------------
trunk/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig
trunk/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
trunk/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/main.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
trunk/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/srq.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/user.h
trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c
trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h
trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c
trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c
trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
trunk/sys/ofed/drivers/infiniband/util/madeye.c
trunk/sys/ofed/drivers/net/mlx4/Makefile
trunk/sys/ofed/drivers/net/mlx4/alloc.c
trunk/sys/ofed/drivers/net/mlx4/catas.c
trunk/sys/ofed/drivers/net/mlx4/cmd.c
trunk/sys/ofed/drivers/net/mlx4/cq.c
trunk/sys/ofed/drivers/net/mlx4/en_cq.c
trunk/sys/ofed/drivers/net/mlx4/en_main.c
trunk/sys/ofed/drivers/net/mlx4/en_netdev.c
trunk/sys/ofed/drivers/net/mlx4/en_port.c
trunk/sys/ofed/drivers/net/mlx4/en_port.h
trunk/sys/ofed/drivers/net/mlx4/en_resources.c
trunk/sys/ofed/drivers/net/mlx4/en_rx.c
trunk/sys/ofed/drivers/net/mlx4/en_tx.c
trunk/sys/ofed/drivers/net/mlx4/eq.c
trunk/sys/ofed/drivers/net/mlx4/fw.c
trunk/sys/ofed/drivers/net/mlx4/fw.h
trunk/sys/ofed/drivers/net/mlx4/icm.c
trunk/sys/ofed/drivers/net/mlx4/icm.h
trunk/sys/ofed/drivers/net/mlx4/intf.c
trunk/sys/ofed/drivers/net/mlx4/main.c
trunk/sys/ofed/drivers/net/mlx4/mcg.c
trunk/sys/ofed/drivers/net/mlx4/mlx4.h
trunk/sys/ofed/drivers/net/mlx4/mlx4_en.h
trunk/sys/ofed/drivers/net/mlx4/mr.c
trunk/sys/ofed/drivers/net/mlx4/pd.c
trunk/sys/ofed/drivers/net/mlx4/port.c
trunk/sys/ofed/drivers/net/mlx4/profile.c
trunk/sys/ofed/drivers/net/mlx4/qp.c
trunk/sys/ofed/drivers/net/mlx4/reset.c
trunk/sys/ofed/drivers/net/mlx4/sense.c
trunk/sys/ofed/drivers/net/mlx4/srq.c
trunk/sys/ofed/include/asm/atomic-long.h
trunk/sys/ofed/include/asm/atomic.h
trunk/sys/ofed/include/asm/byteorder.h
trunk/sys/ofed/include/asm/fcntl.h
trunk/sys/ofed/include/asm/io.h
trunk/sys/ofed/include/asm/pgtable.h
trunk/sys/ofed/include/asm/types.h
trunk/sys/ofed/include/asm/uaccess.h
trunk/sys/ofed/include/linux/bitops.h
trunk/sys/ofed/include/linux/cdev.h
trunk/sys/ofed/include/linux/compat.h
trunk/sys/ofed/include/linux/compiler.h
trunk/sys/ofed/include/linux/completion.h
trunk/sys/ofed/include/linux/delay.h
trunk/sys/ofed/include/linux/device.h
trunk/sys/ofed/include/linux/dma-attrs.h
trunk/sys/ofed/include/linux/dma-mapping.h
trunk/sys/ofed/include/linux/dmapool.h
trunk/sys/ofed/include/linux/err.h
trunk/sys/ofed/include/linux/errno.h
trunk/sys/ofed/include/linux/file.h
trunk/sys/ofed/include/linux/fs.h
trunk/sys/ofed/include/linux/gfp.h
trunk/sys/ofed/include/linux/hardirq.h
trunk/sys/ofed/include/linux/idr.h
trunk/sys/ofed/include/linux/if_arp.h
trunk/sys/ofed/include/linux/if_ether.h
trunk/sys/ofed/include/linux/if_vlan.h
trunk/sys/ofed/include/linux/in.h
trunk/sys/ofed/include/linux/in6.h
trunk/sys/ofed/include/linux/inetdevice.h
trunk/sys/ofed/include/linux/interrupt.h
trunk/sys/ofed/include/linux/io-mapping.h
trunk/sys/ofed/include/linux/io.h
trunk/sys/ofed/include/linux/ioctl.h
trunk/sys/ofed/include/linux/jiffies.h
trunk/sys/ofed/include/linux/kdev_t.h
trunk/sys/ofed/include/linux/kernel.h
trunk/sys/ofed/include/linux/kobject.h
trunk/sys/ofed/include/linux/kref.h
trunk/sys/ofed/include/linux/kthread.h
trunk/sys/ofed/include/linux/linux_compat.c
trunk/sys/ofed/include/linux/linux_idr.c
trunk/sys/ofed/include/linux/linux_radix.c
trunk/sys/ofed/include/linux/list.h
trunk/sys/ofed/include/linux/lockdep.h
trunk/sys/ofed/include/linux/log2.h
trunk/sys/ofed/include/linux/miscdevice.h
trunk/sys/ofed/include/linux/mlx4/cmd.h
trunk/sys/ofed/include/linux/mlx4/cq.h
trunk/sys/ofed/include/linux/mlx4/device.h
trunk/sys/ofed/include/linux/mlx4/doorbell.h
trunk/sys/ofed/include/linux/mlx4/driver.h
trunk/sys/ofed/include/linux/mlx4/qp.h
trunk/sys/ofed/include/linux/mlx4/srq.h
trunk/sys/ofed/include/linux/mm.h
trunk/sys/ofed/include/linux/module.h
trunk/sys/ofed/include/linux/moduleparam.h
trunk/sys/ofed/include/linux/mutex.h
trunk/sys/ofed/include/linux/net.h
trunk/sys/ofed/include/linux/netdevice.h
trunk/sys/ofed/include/linux/notifier.h
trunk/sys/ofed/include/linux/page.h
trunk/sys/ofed/include/linux/pci.h
trunk/sys/ofed/include/linux/poll.h
trunk/sys/ofed/include/linux/radix-tree.h
trunk/sys/ofed/include/linux/random.h
trunk/sys/ofed/include/linux/rbtree.h
trunk/sys/ofed/include/linux/rwlock.h
trunk/sys/ofed/include/linux/rwsem.h
trunk/sys/ofed/include/linux/scatterlist.h
trunk/sys/ofed/include/linux/sched.h
trunk/sys/ofed/include/linux/semaphore.h
trunk/sys/ofed/include/linux/slab.h
trunk/sys/ofed/include/linux/socket.h
trunk/sys/ofed/include/linux/spinlock.h
trunk/sys/ofed/include/linux/string.h
trunk/sys/ofed/include/linux/sysfs.h
trunk/sys/ofed/include/linux/timer.h
trunk/sys/ofed/include/linux/types.h
trunk/sys/ofed/include/linux/uaccess.h
trunk/sys/ofed/include/linux/vmalloc.h
trunk/sys/ofed/include/linux/wait.h
trunk/sys/ofed/include/linux/workqueue.h
trunk/sys/ofed/include/net/ip.h
trunk/sys/ofed/include/net/ipv6.h
trunk/sys/ofed/include/net/netevent.h
trunk/sys/ofed/include/net/tcp.h
trunk/sys/ofed/include/rdma/ib_addr.h
trunk/sys/ofed/include/rdma/ib_cm.h
trunk/sys/ofed/include/rdma/ib_mad.h
trunk/sys/ofed/include/rdma/ib_sa.h
trunk/sys/ofed/include/rdma/ib_smi.h
trunk/sys/ofed/include/rdma/ib_umem.h
trunk/sys/ofed/include/rdma/ib_user_cm.h
trunk/sys/ofed/include/rdma/ib_user_verbs.h
trunk/sys/ofed/include/rdma/ib_verbs.h
trunk/sys/ofed/include/rdma/iw_cm.h
trunk/sys/ofed/include/rdma/sdp_socket.h
Added Paths:
-----------
trunk/sys/ofed/drivers/infiniband/core/
trunk/sys/ofed/drivers/infiniband/core/Makefile
trunk/sys/ofed/drivers/infiniband/core/addr.c
trunk/sys/ofed/drivers/infiniband/core/agent.c
trunk/sys/ofed/drivers/infiniband/core/agent.h
trunk/sys/ofed/drivers/infiniband/core/cache.c
trunk/sys/ofed/drivers/infiniband/core/cm.c
trunk/sys/ofed/drivers/infiniband/core/cm_msgs.h
trunk/sys/ofed/drivers/infiniband/core/cma.c
trunk/sys/ofed/drivers/infiniband/core/core_priv.h
trunk/sys/ofed/drivers/infiniband/core/device.c
trunk/sys/ofed/drivers/infiniband/core/fmr_pool.c
trunk/sys/ofed/drivers/infiniband/core/iwcm.c
trunk/sys/ofed/drivers/infiniband/core/iwcm.h
trunk/sys/ofed/drivers/infiniband/core/local_sa.c
trunk/sys/ofed/drivers/infiniband/core/mad.c
trunk/sys/ofed/drivers/infiniband/core/mad_priv.h
trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.c
trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.h
trunk/sys/ofed/drivers/infiniband/core/multicast.c
trunk/sys/ofed/drivers/infiniband/core/notice.c
trunk/sys/ofed/drivers/infiniband/core/packer.c
trunk/sys/ofed/drivers/infiniband/core/sa.h
trunk/sys/ofed/drivers/infiniband/core/sa_query.c
trunk/sys/ofed/drivers/infiniband/core/smi.c
trunk/sys/ofed/drivers/infiniband/core/smi.h
trunk/sys/ofed/drivers/infiniband/core/sysfs.c
trunk/sys/ofed/drivers/infiniband/core/ucm.c
trunk/sys/ofed/drivers/infiniband/core/ucma.c
trunk/sys/ofed/drivers/infiniband/core/ud_header.c
trunk/sys/ofed/drivers/infiniband/core/umem.c
trunk/sys/ofed/drivers/infiniband/core/user_mad.c
trunk/sys/ofed/drivers/infiniband/core/uverbs.h
trunk/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
trunk/sys/ofed/drivers/infiniband/core/uverbs_main.c
trunk/sys/ofed/drivers/infiniband/core/uverbs_marshall.c
trunk/sys/ofed/drivers/infiniband/core/verbs.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/cm.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
trunk/sys/ofed/drivers/net/mlx4/mlx4_stats.h
trunk/sys/ofed/drivers/net/mlx4/resource_tracker.c
trunk/sys/ofed/drivers/net/mlx4/sys_tune.c
trunk/sys/ofed/drivers/net/mlx4/utils.c
trunk/sys/ofed/drivers/net/mlx4/utils.h
trunk/sys/ofed/include/linux/cache.h
trunk/sys/ofed/include/linux/clocksource.h
trunk/sys/ofed/include/linux/etherdevice.h
trunk/sys/ofed/include/linux/kmod.h
trunk/sys/ofed/include/linux/ktime.h
trunk/sys/ofed/include/linux/math64.h
trunk/sys/ofed/include/linux/printk.h
trunk/sys/ofed/include/net/if_inet6.h
trunk/sys/ofed/include/rdma/ib_pma.h
Property Changed:
----------------
trunk/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig
trunk/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
trunk/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/main.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
trunk/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/srq.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/user.h
trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.c
trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/Kconfig
trunk/sys/ofed/drivers/infiniband/hw/mthca/Makefile
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_catas.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cq.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_dev.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_doorbell.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_eq.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mad.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mcg.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mr.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_pd.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_srq.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_user.h
trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_wqe.h
trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Kconfig
trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile
trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
trunk/sys/ofed/drivers/infiniband/ulp/sdp/Kconfig
trunk/sys/ofed/drivers/infiniband/ulp/sdp/Makefile
trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h
trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_dbg.h
trunk/sys/ofed/drivers/net/mlx4/en_port.h
trunk/sys/ofed/drivers/net/mlx4/fw.h
trunk/sys/ofed/drivers/net/mlx4/icm.h
trunk/sys/ofed/drivers/net/mlx4/mlx4.h
trunk/sys/ofed/drivers/net/mlx4/mlx4_en.h
trunk/sys/ofed/include/asm/atomic-long.h
trunk/sys/ofed/include/asm/atomic.h
trunk/sys/ofed/include/asm/byteorder.h
trunk/sys/ofed/include/asm/current.h
trunk/sys/ofed/include/asm/fcntl.h
trunk/sys/ofed/include/asm/io.h
trunk/sys/ofed/include/asm/page.h
trunk/sys/ofed/include/asm/pgtable.h
trunk/sys/ofed/include/asm/semaphore.h
trunk/sys/ofed/include/asm/system.h
trunk/sys/ofed/include/asm/types.h
trunk/sys/ofed/include/asm/uaccess.h
trunk/sys/ofed/include/linux/linux_compat.c
trunk/sys/ofed/include/linux/linux_idr.c
trunk/sys/ofed/include/linux/linux_radix.c
trunk/sys/ofed/include/linux/mlx4/cmd.h
trunk/sys/ofed/include/linux/mlx4/cq.h
trunk/sys/ofed/include/linux/mlx4/device.h
trunk/sys/ofed/include/linux/mlx4/doorbell.h
trunk/sys/ofed/include/linux/mlx4/driver.h
trunk/sys/ofed/include/linux/mlx4/qp.h
trunk/sys/ofed/include/linux/mlx4/srq.h
trunk/sys/ofed/include/rdma/Kbuild
trunk/sys/ofed/include/rdma/ib_addr.h
trunk/sys/ofed/include/rdma/ib_cache.h
trunk/sys/ofed/include/rdma/ib_cm.h
trunk/sys/ofed/include/rdma/ib_fmr_pool.h
trunk/sys/ofed/include/rdma/ib_mad.h
trunk/sys/ofed/include/rdma/ib_marshall.h
trunk/sys/ofed/include/rdma/ib_pack.h
trunk/sys/ofed/include/rdma/ib_sa.h
trunk/sys/ofed/include/rdma/ib_smi.h
trunk/sys/ofed/include/rdma/ib_umem.h
trunk/sys/ofed/include/rdma/ib_user_cm.h
trunk/sys/ofed/include/rdma/ib_user_mad.h
trunk/sys/ofed/include/rdma/ib_user_sa.h
trunk/sys/ofed/include/rdma/ib_user_verbs.h
trunk/sys/ofed/include/rdma/ib_verbs.h
trunk/sys/ofed/include/rdma/iw_cm.h
trunk/sys/ofed/include/rdma/rdma_cm.h
trunk/sys/ofed/include/rdma/rdma_cm_ib.h
trunk/sys/ofed/include/rdma/rdma_user_cm.h
trunk/sys/ofed/include/rdma/sdp_socket.h
Added: trunk/sys/ofed/drivers/infiniband/core/Makefile
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/Makefile (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/Makefile 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,32 @@
+infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
+user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
+
+obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
+ ib_cm.o iw_cm.o $(infiniband-y)
+obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
+obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
+ $(user_access-y)
+
+ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
+ device.o fmr_pool.o cache.o
+ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+
+ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
+
+ib_sa-y := sa_query.o multicast.o notice.o local_sa.o
+
+ib_cm-y := cm.o
+
+iw_cm-y := iwcm.o
+
+rdma_cm-y := cma.o
+
+rdma_ucm-y := ucma.o
+
+ib_addr-y := addr.o
+
+ib_umad-y := user_mad.o
+
+ib_ucm-y := ucm.o
+
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
Property changes on: trunk/sys/ofed/drivers/infiniband/core/Makefile
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/addr.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/addr.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/addr.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,644 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mutex.h>
+#include <linux/inetdevice.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <net/route.h>
+#include <net/netevent.h>
+#include <rdma/ib_addr.h>
+#include <netinet/if_ether.h>
+
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("IB Address Translation");
+MODULE_LICENSE("Dual BSD/GPL");
+
+struct addr_req {
+ struct list_head list;
+ struct sockaddr_storage src_addr;
+ struct sockaddr_storage dst_addr;
+ struct rdma_dev_addr *addr;
+ struct rdma_addr_client *client;
+ void *context;
+ void (*callback)(int status, struct sockaddr *src_addr,
+ struct rdma_dev_addr *addr, void *context);
+ unsigned long timeout;
+ int status;
+};
+
+static void process_req(struct work_struct *work);
+
+static DEFINE_MUTEX(lock);
+static LIST_HEAD(req_list);
+static struct delayed_work work;
+static struct workqueue_struct *addr_wq;
+
+void rdma_addr_register_client(struct rdma_addr_client *client)
+{
+ atomic_set(&client->refcount, 1);
+ init_completion(&client->comp);
+}
+EXPORT_SYMBOL(rdma_addr_register_client);
+
+static inline void put_client(struct rdma_addr_client *client)
+{
+ if (atomic_dec_and_test(&client->refcount))
+ complete(&client->comp);
+}
+
+void rdma_addr_unregister_client(struct rdma_addr_client *client)
+{
+ put_client(client);
+ wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(rdma_addr_unregister_client);
+
+#ifdef __linux__
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+ const unsigned char *dst_dev_addr)
+{
+ dev_addr->dev_type = dev->type;
+ memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+ memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
+ if (dst_dev_addr)
+ memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
+ dev_addr->bound_dev_if = dev->ifindex;
+ return 0;
+}
+#else
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
+ const unsigned char *dst_dev_addr)
+{
+ if (dev->if_type == IFT_INFINIBAND)
+ dev_addr->dev_type = ARPHRD_INFINIBAND;
+ else if (dev->if_type == IFT_ETHER)
+ dev_addr->dev_type = ARPHRD_ETHER;
+ else
+ dev_addr->dev_type = 0;
+ memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
+ memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr),
+ dev->if_addrlen);
+ if (dst_dev_addr)
+ memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen);
+ dev_addr->bound_dev_if = dev->if_index;
+ return 0;
+}
+#endif
+EXPORT_SYMBOL(rdma_copy_addr);
+
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+{
+ struct net_device *dev;
+ int ret = -EADDRNOTAVAIL;
+
+ if (dev_addr->bound_dev_if) {
+ dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_put(dev);
+ return ret;
+ }
+
+ switch (addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ dev = ip_dev_find(NULL,
+ ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+
+ if (!dev)
+ return ret;
+
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_put(dev);
+ break;
+#endif
+
+#if defined(INET6)
+ case AF_INET6:
+#ifdef __linux__
+ read_lock(&dev_base_lock);
+ for_each_netdev(&init_net, dev) {
+ if (ipv6_chk_addr(&init_net,
+ &((struct sockaddr_in6 *) addr)->sin6_addr,
+ dev, 1)) {
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ break;
+ }
+ }
+ read_unlock(&dev_base_lock);
+#else
+ {
+ struct sockaddr_in6 *sin6;
+ struct ifaddr *ifa;
+ in_port_t port;
+
+ sin6 = (struct sockaddr_in6 *)addr;
+ port = sin6->sin6_port;
+ sin6->sin6_port = 0;
+ ifa = ifa_ifwithaddr(addr);
+ sin6->sin6_port = port;
+ if (ifa == NULL) {
+ ret = -ENODEV;
+ break;
+ }
+ ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
+ ifa_free(ifa);
+ break;
+ }
+#endif
+ break;
+#endif
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rdma_translate_ip);
+
+static void set_timeout(unsigned long time)
+{
+ unsigned long delay;
+
+ delay = time - jiffies;
+ if ((long)delay <= 0)
+ delay = 1;
+
+ mod_delayed_work(addr_wq, &work, delay);
+}
+
+static void queue_req(struct addr_req *req)
+{
+ struct addr_req *temp_req;
+
+ mutex_lock(&lock);
+ list_for_each_entry_reverse(temp_req, &req_list, list) {
+ if (time_after_eq(req->timeout, temp_req->timeout))
+ break;
+ }
+
+ list_add(&req->list, &temp_req->list);
+
+ if (req_list.next == &req->list)
+ set_timeout(req->timeout);
+ mutex_unlock(&lock);
+}
+
+#ifdef __linux__
+static int addr4_resolve(struct sockaddr_in *src_in,
+ struct sockaddr_in *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ __be32 src_ip = src_in->sin_addr.s_addr;
+ __be32 dst_ip = dst_in->sin_addr.s_addr;
+ struct flowi fl;
+ struct rtable *rt;
+ struct neighbour *neigh;
+ int ret;
+
+ memset(&fl, 0, sizeof fl);
+ fl.nl_u.ip4_u.daddr = dst_ip;
+ fl.nl_u.ip4_u.saddr = src_ip;
+ fl.oif = addr->bound_dev_if;
+
+ ret = ip_route_output_key(&init_net, &rt, &fl);
+ if (ret)
+ goto out;
+
+ src_in->sin_family = AF_INET;
+ src_in->sin_addr.s_addr = rt->rt_src;
+
+ if (rt->idev->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
+
+ /* If the device does ARP internally, return 'done' */
+ if (rt->idev->dev->flags & IFF_NOARP) {
+ rdma_copy_addr(addr, rt->idev->dev, NULL);
+ goto put;
+ }
+
+ neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
+ if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+ neigh_event_send(rt->u.dst.neighbour, NULL);
+ ret = -ENODATA;
+ if (neigh)
+ goto release;
+ goto put;
+ }
+
+ ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+release:
+ neigh_release(neigh);
+put:
+ ip_rt_put(rt);
+out:
+ return ret;
+}
+
+#if defined(INET6)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ struct flowi fl;
+ struct neighbour *neigh;
+ struct dst_entry *dst;
+ int ret;
+
+ memset(&fl, 0, sizeof fl);
+ ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
+ ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
+ fl.oif = addr->bound_dev_if;
+
+ dst = ip6_route_output(&init_net, NULL, &fl);
+ if ((ret = dst->error))
+ goto put;
+
+ if (ipv6_addr_any(&fl.fl6_src)) {
+ ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+ &fl.fl6_dst, 0, &fl.fl6_src);
+ if (ret)
+ goto put;
+
+ src_in->sin6_family = AF_INET6;
+ ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
+ }
+
+ if (dst->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
+
+ /* If the device does ARP internally, return 'done' */
+ if (dst->dev->flags & IFF_NOARP) {
+ ret = rdma_copy_addr(addr, dst->dev, NULL);
+ goto put;
+ }
+
+ neigh = dst->neighbour;
+ if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+ neigh_event_send(dst->neighbour, NULL);
+ ret = -ENODATA;
+ goto put;
+ }
+
+ ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
+put:
+ dst_release(dst);
+ return ret;
+}
+#else
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ return -EADDRNOTAVAIL;
+}
+#endif
+
+#else
+#include <netinet/if_ether.h>
+
+static int addr_resolve(struct sockaddr *src_in,
+ struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
+ struct ifaddr *ifa;
+ struct ifnet *ifp;
+#if defined(INET) || defined(INET6)
+ struct llentry *lle;
+#endif
+ struct rtentry *rte;
+ in_port_t port;
+ u_char edst[MAX_ADDR_LEN];
+ int multi;
+ int bcast;
+ int error = 0;
+
+ /*
+ * Determine whether the address is unicast, multicast, or broadcast
+ * and whether the source interface is valid.
+ */
+ multi = 0;
+ bcast = 0;
+ sin = NULL;
+ sin6 = NULL;
+ ifp = NULL;
+ rte = NULL;
+ switch (dst_in->sa_family) {
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)dst_in;
+ if (sin->sin_addr.s_addr == INADDR_BROADCAST)
+ bcast = 1;
+ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ multi = 1;
+ sin = (struct sockaddr_in *)src_in;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
+ /*
+ * Address comparison fails if the port is set
+ * cache it here to be restored later.
+ */
+ port = sin->sin_port;
+ sin->sin_port = 0;
+ memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+ } else
+ src_in = NULL;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)dst_in;
+ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ multi = 1;
+ sin6 = (struct sockaddr_in6 *)src_in;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ port = sin6->sin6_port;
+ sin6->sin6_port = 0;
+ } else
+ src_in = NULL;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ /*
+ * If we have a source address to use look it up first and verify
+ * that it is a local interface.
+ */
+ if (src_in) {
+ ifa = ifa_ifwithaddr(src_in);
+ if (sin)
+ sin->sin_port = port;
+ if (sin6)
+ sin6->sin6_port = port;
+ if (ifa == NULL)
+ return -ENETUNREACH;
+ ifp = ifa->ifa_ifp;
+ ifa_free(ifa);
+ if (bcast || multi)
+ goto mcast;
+ }
+ /*
+ * Make sure the route exists and has a valid link.
+ */
+ rte = rtalloc1(dst_in, 1, 0);
+ if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) {
+ if (rte)
+ RTFREE_LOCKED(rte);
+ return -EHOSTUNREACH;
+ }
+ /*
+ * If it's not multicast or broadcast and the route doesn't match the
+ * requested interface return unreachable. Otherwise fetch the
+ * correct interface pointer and unlock the route.
+ */
+ if (multi || bcast) {
+ if (ifp == NULL)
+ ifp = rte->rt_ifp;
+ RTFREE_LOCKED(rte);
+ } else if (ifp && ifp != rte->rt_ifp) {
+ RTFREE_LOCKED(rte);
+ return -ENETUNREACH;
+ } else {
+ if (ifp == NULL)
+ ifp = rte->rt_ifp;
+ RT_UNLOCK(rte);
+ }
+mcast:
+ if (bcast)
+ return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr);
+ if (multi) {
+ struct sockaddr *llsa;
+
+ error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
+ if (error)
+ return -error;
+ error = rdma_copy_addr(addr, ifp,
+ LLADDR((struct sockaddr_dl *)llsa));
+ free(llsa, M_IFMADDR);
+ return error;
+ }
+ /*
+ * Resolve the link local address.
+ */
+ switch (dst_in->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle);
+ break;
+#endif
+ default:
+ /* XXX: Shouldn't happen. */
+ error = -EINVAL;
+ }
+ RTFREE(rte);
+ if (error == 0)
+ return rdma_copy_addr(addr, ifp, edst);
+ if (error == EWOULDBLOCK)
+ return -ENODATA;
+ return -error;
+}
+
+#endif
+
+static void process_req(struct work_struct *work)
+{
+ struct addr_req *req, *temp_req;
+ struct sockaddr *src_in, *dst_in;
+ struct list_head done_list;
+
+ INIT_LIST_HEAD(&done_list);
+
+ mutex_lock(&lock);
+ list_for_each_entry_safe(req, temp_req, &req_list, list) {
+ if (req->status == -ENODATA) {
+ src_in = (struct sockaddr *) &req->src_addr;
+ dst_in = (struct sockaddr *) &req->dst_addr;
+ req->status = addr_resolve(src_in, dst_in, req->addr);
+ if (req->status && time_after_eq(jiffies, req->timeout))
+ req->status = -ETIMEDOUT;
+ else if (req->status == -ENODATA)
+ continue;
+ }
+ list_move_tail(&req->list, &done_list);
+ }
+
+ if (!list_empty(&req_list)) {
+ req = list_entry(req_list.next, struct addr_req, list);
+ set_timeout(req->timeout);
+ }
+ mutex_unlock(&lock);
+
+ list_for_each_entry_safe(req, temp_req, &done_list, list) {
+ list_del(&req->list);
+ req->callback(req->status, (struct sockaddr *) &req->src_addr,
+ req->addr, req->context);
+ put_client(req->client);
+ kfree(req);
+ }
+}
+
+int rdma_resolve_ip(struct rdma_addr_client *client,
+ struct sockaddr *src_addr, struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr, int timeout_ms,
+ void (*callback)(int status, struct sockaddr *src_addr,
+ struct rdma_dev_addr *addr, void *context),
+ void *context)
+{
+ struct sockaddr *src_in, *dst_in;
+ struct addr_req *req;
+ int ret = 0;
+
+ req = kzalloc(sizeof *req, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ src_in = (struct sockaddr *) &req->src_addr;
+ dst_in = (struct sockaddr *) &req->dst_addr;
+
+ if (src_addr) {
+ if (src_addr->sa_family != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ memcpy(src_in, src_addr, ip_addr_size(src_addr));
+ } else {
+ src_in->sa_family = dst_addr->sa_family;
+ }
+
+ memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
+ req->addr = addr;
+ req->callback = callback;
+ req->context = context;
+ req->client = client;
+ atomic_inc(&client->refcount);
+
+ req->status = addr_resolve(src_in, dst_in, addr);
+ switch (req->status) {
+ case 0:
+ req->timeout = jiffies;
+ queue_req(req);
+ break;
+ case -ENODATA:
+ req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
+ queue_req(req);
+ break;
+ default:
+ ret = req->status;
+ atomic_dec(&client->refcount);
+ goto err;
+ }
+ return ret;
+err:
+ kfree(req);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_ip);
+
+void rdma_addr_cancel(struct rdma_dev_addr *addr)
+{
+ struct addr_req *req, *temp_req;
+
+ mutex_lock(&lock);
+ list_for_each_entry_safe(req, temp_req, &req_list, list) {
+ if (req->addr == addr) {
+ req->status = -ECANCELED;
+ req->timeout = jiffies;
+ list_move(&req->list, &req_list);
+ set_timeout(req->timeout);
+ break;
+ }
+ }
+ mutex_unlock(&lock);
+}
+EXPORT_SYMBOL(rdma_addr_cancel);
+
+static int netevent_callback(struct notifier_block *self, unsigned long event,
+ void *ctx)
+{
+ if (event == NETEVENT_NEIGH_UPDATE) {
+#ifdef __linux__
+ struct neighbour *neigh = ctx;
+
+ if (neigh->nud_state & NUD_VALID) {
+ set_timeout(jiffies);
+ }
+#else
+ set_timeout(jiffies);
+#endif
+ }
+ return 0;
+}
+
+static struct notifier_block nb = {
+ .notifier_call = netevent_callback
+};
+
+static int __init addr_init(void)
+{
+ INIT_DELAYED_WORK(&work, process_req);
+ addr_wq = create_singlethread_workqueue("ib_addr");
+ if (!addr_wq)
+ return -ENOMEM;
+
+ register_netevent_notifier(&nb);
+ return 0;
+}
+
+static void __exit addr_cleanup(void)
+{
+ unregister_netevent_notifier(&nb);
+ destroy_workqueue(addr_wq);
+}
+
+module_init(addr_init);
+module_exit(addr_cleanup);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/addr.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/agent.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/agent.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/agent.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "agent.h"
+#include "smi.h"
+#include "mad_priv.h"
+
+#define SPFX "ib_agent: "
+
+struct ib_agent_port_private {
+ struct list_head port_list;
+ struct ib_mad_agent *agent[2];
+};
+
+static DEFINE_SPINLOCK(ib_agent_port_list_lock);
+static LIST_HEAD(ib_agent_port_list);
+
+static struct ib_agent_port_private *
+__ib_get_agent_port(struct ib_device *device, int port_num)
+{
+ struct ib_agent_port_private *entry;
+
+ list_for_each_entry(entry, &ib_agent_port_list, port_list) {
+ if (entry->agent[1]->device == device &&
+ entry->agent[1]->port_num == port_num)
+ return entry;
+ }
+ return NULL;
+}
+
+static struct ib_agent_port_private *
+ib_get_agent_port(struct ib_device *device, int port_num)
+{
+ struct ib_agent_port_private *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_agent_port_list_lock, flags);
+ entry = __ib_get_agent_port(device, port_num);
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+ return entry;
+}
+
+void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+ struct ib_wc *wc, struct ib_device *device,
+ int port_num, int qpn)
+{
+ struct ib_agent_port_private *port_priv;
+ struct ib_mad_agent *agent;
+ struct ib_mad_send_buf *send_buf;
+ struct ib_ah *ah;
+ struct ib_mad_send_wr_private *mad_send_wr;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ port_priv = ib_get_agent_port(device, 0);
+ else
+ port_priv = ib_get_agent_port(device, port_num);
+
+ if (!port_priv) {
+ printk(KERN_ERR SPFX "Unable to find port agent\n");
+ return;
+ }
+
+ agent = port_priv->agent[qpn];
+ ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
+ if (IS_ERR(ah)) {
+ printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n",
+ PTR_ERR(ah));
+ return;
+ }
+
+ send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0,
+ IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+ GFP_KERNEL);
+ if (IS_ERR(send_buf)) {
+ printk(KERN_ERR SPFX "ib_create_send_mad error\n");
+ goto err1;
+ }
+
+ memcpy(send_buf->mad, mad, sizeof *mad);
+ send_buf->ah = ah;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH) {
+ mad_send_wr = container_of(send_buf,
+ struct ib_mad_send_wr_private,
+ send_buf);
+ mad_send_wr->send_wr.wr.ud.port_num = port_num;
+ }
+
+ if (ib_post_send_mad(send_buf, NULL)) {
+ printk(KERN_ERR SPFX "ib_post_send_mad error\n");
+ goto err2;
+ }
+ return;
+err2:
+ ib_free_send_mad(send_buf);
+err1:
+ ib_destroy_ah(ah);
+}
+
+static void agent_send_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ ib_destroy_ah(mad_send_wc->send_buf->ah);
+ ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+int ib_agent_port_open(struct ib_device *device, int port_num)
+{
+ struct ib_agent_port_private *port_priv;
+ unsigned long flags;
+ int ret;
+
+ /* Create new device info */
+ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
+ if (!port_priv) {
+ printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
+ ret = -ENOMEM;
+ goto error1;
+ }
+
+ if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) {
+ /* Obtain send only MAD agent for SMI QP */
+ port_priv->agent[0] = ib_register_mad_agent(device, port_num,
+ IB_QPT_SMI, NULL, 0,
+ &agent_send_handler,
+ NULL, NULL);
+ if (IS_ERR(port_priv->agent[0])) {
+ ret = PTR_ERR(port_priv->agent[0]);
+ goto error2;
+ }
+ }
+
+ /* Obtain send only MAD agent for GSI QP */
+ port_priv->agent[1] = ib_register_mad_agent(device, port_num,
+ IB_QPT_GSI, NULL, 0,
+ &agent_send_handler,
+ NULL, NULL);
+ if (IS_ERR(port_priv->agent[1])) {
+ ret = PTR_ERR(port_priv->agent[1]);
+ goto error3;
+ }
+
+ spin_lock_irqsave(&ib_agent_port_list_lock, flags);
+ list_add_tail(&port_priv->port_list, &ib_agent_port_list);
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+
+ return 0;
+
+error3:
+ if (port_priv->agent[0])
+ ib_unregister_mad_agent(port_priv->agent[0]);
+error2:
+ kfree(port_priv);
+error1:
+ return ret;
+}
+
+int ib_agent_port_close(struct ib_device *device, int port_num)
+{
+ struct ib_agent_port_private *port_priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_agent_port_list_lock, flags);
+ port_priv = __ib_get_agent_port(device, port_num);
+ if (port_priv == NULL) {
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+ printk(KERN_ERR SPFX "Port %d not found\n", port_num);
+ return -ENODEV;
+ }
+ list_del(&port_priv->port_list);
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+
+ ib_unregister_mad_agent(port_priv->agent[1]);
+ if (port_priv->agent[0])
+ ib_unregister_mad_agent(port_priv->agent[0]);
+
+ kfree(port_priv);
+ return 0;
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/core/agent.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/agent.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/agent.h (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/agent.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __AGENT_H_
+#define __AGENT_H_
+
+#include <linux/err.h>
+#include <rdma/ib_mad.h>
+
+extern int ib_agent_port_open(struct ib_device *device, int port_num);
+
+extern int ib_agent_port_close(struct ib_device *device, int port_num);
+
+extern void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+ struct ib_wc *wc, struct ib_device *device,
+ int port_num, int qpn);
+
+#endif /* __AGENT_H_ */
Property changes on: trunk/sys/ofed/drivers/infiniband/core/agent.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/cache.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/cache.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/cache.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include <rdma/ib_cache.h>
+
+#include "core_priv.h"
+
+struct ib_pkey_cache {
+ int table_len;
+ u16 table[0];
+};
+
+struct ib_gid_cache {
+ int table_len;
+ union ib_gid table[0];
+};
+
+struct ib_update_work {
+ struct work_struct work;
+ struct ib_device *device;
+ u8 port_num;
+};
+
+static inline int start_port(struct ib_device *device)
+{
+ return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
+}
+
+static inline int end_port(struct ib_device *device)
+{
+ return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+ 0 : device->phys_port_cnt;
+}
+
+int ib_get_cached_gid(struct ib_device *device,
+ u8 port_num,
+ int index,
+ union ib_gid *gid)
+{
+ struct ib_gid_cache *cache;
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.gid_cache[port_num - start_port(device)];
+
+ if (index < 0 || index >= cache->table_len)
+ ret = -EINVAL;
+ else
+ *gid = cache->table[index];
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_gid);
+
+int ib_find_cached_gid(struct ib_device *device,
+ union ib_gid *gid,
+ u8 *port_num,
+ u16 *index)
+{
+ struct ib_gid_cache *cache;
+ unsigned long flags;
+ int p, i;
+ int ret = -ENOENT;
+
+ *port_num = -1;
+ if (index)
+ *index = -1;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ cache = device->cache.gid_cache[p];
+ for (i = 0; i < cache->table_len; ++i) {
+ if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
+ *port_num = p + start_port(device);
+ if (index)
+ *index = i;
+ ret = 0;
+ goto found;
+ }
+ }
+ }
+found:
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_find_cached_gid);
+
+int ib_get_cached_pkey(struct ib_device *device,
+ u8 port_num,
+ int index,
+ u16 *pkey)
+{
+ struct ib_pkey_cache *cache;
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
+
+ if (index < 0 || index >= cache->table_len)
+ ret = -EINVAL;
+ else
+ *pkey = cache->table[index];
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_pkey);
+
+int ib_find_cached_pkey(struct ib_device *device,
+ u8 port_num,
+ u16 pkey,
+ u16 *index)
+{
+ struct ib_pkey_cache *cache;
+ unsigned long flags;
+ int i;
+ int ret = -ENOENT;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
+
+ *index = -1;
+
+ for (i = 0; i < cache->table_len; ++i)
+ if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+ *index = i;
+ ret = 0;
+ break;
+ }
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_find_cached_pkey);
+
+int ib_get_cached_lmc(struct ib_device *device,
+ u8 port_num,
+ u8 *lmc)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+ *lmc = device->cache.lmc_cache[port_num - start_port(device)];
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_lmc);
+
+static void ib_cache_update(struct ib_device *device,
+ u8 port)
+{
+ struct ib_port_attr *tprops = NULL;
+ struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
+ struct ib_gid_cache *gid_cache = NULL, *old_gid_cache;
+ int i;
+ int ret;
+
+ tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
+ if (!tprops)
+ return;
+
+ ret = ib_query_port(device, port, tprops);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_port failed (%d) for %s\n",
+ ret, device->name);
+ goto err;
+ }
+
+ pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
+ sizeof *pkey_cache->table, GFP_KERNEL);
+ if (!pkey_cache)
+ goto err;
+
+ pkey_cache->table_len = tprops->pkey_tbl_len;
+
+ gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len *
+ sizeof *gid_cache->table, GFP_KERNEL);
+ if (!gid_cache)
+ goto err;
+
+ gid_cache->table_len = tprops->gid_tbl_len;
+
+ for (i = 0; i < pkey_cache->table_len; ++i) {
+ ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
+ goto err;
+ }
+ }
+
+ for (i = 0; i < gid_cache->table_len; ++i) {
+ ret = ib_query_gid(device, port, i, gid_cache->table + i);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
+ goto err;
+ }
+ }
+
+ write_lock_irq(&device->cache.lock);
+
+ old_pkey_cache = device->cache.pkey_cache[port - start_port(device)];
+ old_gid_cache = device->cache.gid_cache [port - start_port(device)];
+
+ device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
+ device->cache.gid_cache [port - start_port(device)] = gid_cache;
+
+ device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;
+
+ write_unlock_irq(&device->cache.lock);
+
+ kfree(old_pkey_cache);
+ kfree(old_gid_cache);
+ kfree(tprops);
+ return;
+
+err:
+ kfree(pkey_cache);
+ kfree(gid_cache);
+ kfree(tprops);
+}
+
+static void ib_cache_task(struct work_struct *_work)
+{
+ struct ib_update_work *work =
+ container_of(_work, struct ib_update_work, work);
+
+ ib_cache_update(work->device, work->port_num);
+ kfree(work);
+}
+
+static void ib_cache_event(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct ib_update_work *work;
+
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_GID_CHANGE) {
+ work = kmalloc(sizeof *work, GFP_ATOMIC);
+ if (work) {
+ INIT_WORK(&work->work, ib_cache_task);
+ work->device = event->device;
+ work->port_num = event->element.port_num;
+ schedule_work(&work->work);
+ }
+ }
+}
+
+static void ib_cache_setup_one(struct ib_device *device)
+{
+ int p;
+
+ rwlock_init(&device->cache.lock);
+
+ device->cache.pkey_cache =
+ kmalloc(sizeof *device->cache.pkey_cache *
+ (end_port(device) - start_port(device) + 1), GFP_KERNEL);
+ device->cache.gid_cache =
+ kmalloc(sizeof *device->cache.gid_cache *
+ (end_port(device) - start_port(device) + 1), GFP_KERNEL);
+
+ device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
+ (end_port(device) -
+ start_port(device) + 1),
+ GFP_KERNEL);
+
+ if (!device->cache.pkey_cache || !device->cache.gid_cache ||
+ !device->cache.lmc_cache) {
+ printk(KERN_WARNING "Couldn't allocate cache "
+ "for %s\n", device->name);
+ goto err;
+ }
+
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ device->cache.pkey_cache[p] = NULL;
+ device->cache.gid_cache [p] = NULL;
+ ib_cache_update(device, p + start_port(device));
+ }
+
+ INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+ device, ib_cache_event);
+ if (ib_register_event_handler(&device->cache.event_handler))
+ goto err_cache;
+
+ return;
+
+err_cache:
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ kfree(device->cache.pkey_cache[p]);
+ kfree(device->cache.gid_cache[p]);
+ }
+
+err:
+ kfree(device->cache.pkey_cache);
+ kfree(device->cache.gid_cache);
+ kfree(device->cache.lmc_cache);
+}
+
+static void ib_cache_cleanup_one(struct ib_device *device)
+{
+ int p;
+
+ ib_unregister_event_handler(&device->cache.event_handler);
+ flush_scheduled_work();
+
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ kfree(device->cache.pkey_cache[p]);
+ kfree(device->cache.gid_cache[p]);
+ }
+
+ kfree(device->cache.pkey_cache);
+ kfree(device->cache.gid_cache);
+ kfree(device->cache.lmc_cache);
+}
+
+static struct ib_client cache_client = {
+ .name = "cache",
+ .add = ib_cache_setup_one,
+ .remove = ib_cache_cleanup_one
+};
+
+int __init ib_cache_setup(void)
+{
+ return ib_register_client(&cache_client);
+}
+
+void __exit ib_cache_cleanup(void)
+{
+ ib_unregister_client(&cache_client);
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/core/cache.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/cm.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/cm.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/cm.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,3897 @@
+/*
+ * Copyright (c) 2004-2007 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/random.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/workqueue.h>
+#include <linux/kdev_t.h>
+#include <linux/string.h>
+
+#include <asm/atomic-long.h>
+
+#include <rdma/ib_cache.h>
+#include <rdma/ib_cm.h>
+#include "cm_msgs.h"
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("InfiniBand CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+#define PFX "ib_cm: "
+
+/*
+ * Limit CM message timeouts to something reasonable:
+ * 8 seconds per message, with up to 15 retries
+ */
+static int max_timeout = 21;
+module_param(max_timeout, int, 0644);
+MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout "
+ "(default=21, or ~8 seconds)");
+
+static void cm_add_one(struct ib_device *device);
+static void cm_remove_one(struct ib_device *device);
+
+static struct ib_client cm_client = {
+ .name = "cm",
+ .add = cm_add_one,
+ .remove = cm_remove_one
+};
+
+static struct ib_cm {
+ spinlock_t lock;
+ struct list_head device_list;
+ rwlock_t device_lock;
+ struct rb_root listen_service_table;
+ u64 listen_service_id;
+ /* struct rb_root peer_service_table; todo: fix peer to peer */
+ struct rb_root remote_qp_table;
+ struct rb_root remote_id_table;
+ struct rb_root remote_sidr_table;
+ struct idr local_id_table;
+ __be32 random_id_operand;
+ struct list_head timewait_list;
+ struct workqueue_struct *wq;
+} cm;
+
+/* Counter indexes ordered by attribute ID */
+enum {
+ CM_REQ_COUNTER,
+ CM_MRA_COUNTER,
+ CM_REJ_COUNTER,
+ CM_REP_COUNTER,
+ CM_RTU_COUNTER,
+ CM_DREQ_COUNTER,
+ CM_DREP_COUNTER,
+ CM_SIDR_REQ_COUNTER,
+ CM_SIDR_REP_COUNTER,
+ CM_LAP_COUNTER,
+ CM_APR_COUNTER,
+ CM_ATTR_COUNT,
+ CM_ATTR_ID_OFFSET = 0x0010,
+};
+
+enum {
+ CM_XMIT,
+ CM_XMIT_RETRIES,
+ CM_RECV,
+ CM_RECV_DUPLICATES,
+ CM_COUNTER_GROUPS
+};
+
+static char const counter_group_names[CM_COUNTER_GROUPS]
+ [sizeof("cm_rx_duplicates")] = {
+ "cm_tx_msgs", "cm_tx_retries",
+ "cm_rx_msgs", "cm_rx_duplicates"
+};
+
+struct cm_counter_group {
+ struct kobject obj;
+ atomic_long_t counter[CM_ATTR_COUNT];
+};
+
+struct cm_counter_attribute {
+ struct attribute attr;
+ int index;
+};
+
+#define CM_COUNTER_ATTR(_name, _index) \
+struct cm_counter_attribute cm_##_name##_counter_attr = { \
+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
+ .index = _index \
+}
+
+static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
+static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
+static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
+static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
+static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
+static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
+static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
+static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
+static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
+static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
+static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
+
+static struct attribute *cm_counter_default_attrs[] = {
+ &cm_req_counter_attr.attr,
+ &cm_mra_counter_attr.attr,
+ &cm_rej_counter_attr.attr,
+ &cm_rep_counter_attr.attr,
+ &cm_rtu_counter_attr.attr,
+ &cm_dreq_counter_attr.attr,
+ &cm_drep_counter_attr.attr,
+ &cm_sidr_req_counter_attr.attr,
+ &cm_sidr_rep_counter_attr.attr,
+ &cm_lap_counter_attr.attr,
+ &cm_apr_counter_attr.attr,
+ NULL
+};
+
+struct cm_port {
+ struct cm_device *cm_dev;
+ struct ib_mad_agent *mad_agent;
+ struct kobject port_obj;
+ u8 port_num;
+ struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
+};
+
+struct cm_device {
+ struct list_head list;
+ struct ib_device *ib_device;
+ struct device *device;
+ u8 ack_delay;
+ struct cm_port *port[0];
+};
+
+struct cm_av {
+ struct cm_port *port;
+ union ib_gid dgid;
+ struct ib_ah_attr ah_attr;
+ u16 pkey_index;
+ u8 timeout;
+};
+
+struct cm_work {
+ struct delayed_work work;
+ struct list_head list;
+ struct cm_port *port;
+ struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */
+ __be32 local_id; /* Established / timewait */
+ __be32 remote_id;
+ struct ib_cm_event cm_event;
+ struct ib_sa_path_rec path[0];
+};
+
+struct cm_timewait_info {
+ struct cm_work work; /* Must be first. */
+ struct list_head list;
+ struct rb_node remote_qp_node;
+ struct rb_node remote_id_node;
+ __be64 remote_ca_guid;
+ __be32 remote_qpn;
+ u8 inserted_remote_qp;
+ u8 inserted_remote_id;
+};
+
+struct cm_id_private {
+ struct ib_cm_id id;
+
+ struct rb_node service_node;
+ struct rb_node sidr_id_node;
+ spinlock_t lock; /* Do not acquire inside cm.lock */
+ struct completion comp;
+ atomic_t refcount;
+
+ struct ib_mad_send_buf *msg;
+ struct cm_timewait_info *timewait_info;
+ /* todo: use alternate port on send failure */
+ struct cm_av av;
+ struct cm_av alt_av;
+ struct ib_cm_compare_data *compare_data;
+
+ void *private_data;
+ __be64 tid;
+ __be32 local_qpn;
+ __be32 remote_qpn;
+ enum ib_qp_type qp_type;
+ __be32 sq_psn;
+ __be32 rq_psn;
+ int timeout_ms;
+ enum ib_mtu path_mtu;
+ __be16 pkey;
+ u8 private_data_len;
+ u8 max_cm_retries;
+ u8 peer_to_peer;
+ u8 responder_resources;
+ u8 initiator_depth;
+ u8 retry_count;
+ u8 rnr_retry_count;
+ u8 service_timeout;
+ u8 target_ack_delay;
+
+ struct list_head work_list;
+ atomic_t work_count;
+};
+
+static void cm_work_handler(struct work_struct *work);
+
+static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
+{
+ if (atomic_dec_and_test(&cm_id_priv->refcount))
+ complete(&cm_id_priv->comp);
+}
+
+static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
+ struct ib_mad_send_buf **msg)
+{
+ struct ib_mad_agent *mad_agent;
+ struct ib_mad_send_buf *m;
+ struct ib_ah *ah;
+
+ mad_agent = cm_id_priv->av.port->mad_agent;
+ ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
+ if (IS_ERR(ah))
+ return PTR_ERR(ah);
+
+ m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
+ cm_id_priv->av.pkey_index,
+ 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+ GFP_ATOMIC);
+ if (IS_ERR(m)) {
+ ib_destroy_ah(ah);
+ return PTR_ERR(m);
+ }
+
+ /* Timeout set by caller if response is expected. */
+ m->ah = ah;
+ m->retries = cm_id_priv->max_cm_retries;
+
+ atomic_inc(&cm_id_priv->refcount);
+ m->context[0] = cm_id_priv;
+ *msg = m;
+ return 0;
+}
+
+static int cm_alloc_response_msg(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ struct ib_mad_send_buf **msg)
+{
+ struct ib_mad_send_buf *m;
+ struct ib_ah *ah;
+
+ ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
+ mad_recv_wc->recv_buf.grh, port->port_num);
+ if (IS_ERR(ah))
+ return PTR_ERR(ah);
+
+ m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
+ 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+ GFP_ATOMIC);
+ if (IS_ERR(m)) {
+ ib_destroy_ah(ah);
+ return PTR_ERR(m);
+ }
+ m->ah = ah;
+ *msg = m;
+ return 0;
+}
+
+static void cm_free_msg(struct ib_mad_send_buf *msg)
+{
+ ib_destroy_ah(msg->ah);
+ if (msg->context[0])
+ cm_deref_id(msg->context[0]);
+ ib_free_send_mad(msg);
+}
+
+static void * cm_copy_private_data(const void *private_data,
+ u8 private_data_len)
+{
+ void *data;
+
+ if (!private_data || !private_data_len)
+ return NULL;
+
+ data = kmemdup(private_data, private_data_len, GFP_KERNEL);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ return data;
+}
+
+static void cm_set_private_data(struct cm_id_private *cm_id_priv,
+ void *private_data, u8 private_data_len)
+{
+ if (cm_id_priv->private_data && cm_id_priv->private_data_len)
+ kfree(cm_id_priv->private_data);
+
+ cm_id_priv->private_data = private_data;
+ cm_id_priv->private_data_len = private_data_len;
+}
+
+static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
+ struct ib_grh *grh, struct cm_av *av)
+{
+ av->port = port;
+ av->pkey_index = wc->pkey_index;
+ ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
+ grh, &av->ah_attr);
+}
+
+static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
+{
+ struct cm_device *cm_dev;
+ struct cm_port *port = NULL;
+ unsigned long flags;
+ int ret;
+ u8 p;
+
+ read_lock_irqsave(&cm.device_lock, flags);
+ list_for_each_entry(cm_dev, &cm.device_list, list) {
+ if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
+ &p, NULL)) {
+ port = cm_dev->port[p-1];
+ break;
+ }
+ }
+ read_unlock_irqrestore(&cm.device_lock, flags);
+
+ if (!port)
+ return -EINVAL;
+
+ ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
+ be16_to_cpu(path->pkey), &av->pkey_index);
+ if (ret)
+ return ret;
+
+ av->port = port;
+ ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
+ &av->ah_attr);
+ av->timeout = path->packet_life_time + 1;
+ return 0;
+}
+
+static int cm_alloc_id(struct cm_id_private *cm_id_priv)
+{
+ unsigned long flags;
+ int ret, id;
+ static int next_id;
+
+ do {
+ spin_lock_irqsave(&cm.lock, flags);
+ ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
+ next_id, &id);
+ if (!ret)
+ next_id = ((unsigned) id + 1) & MAX_ID_MASK;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+
+ cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
+ return ret;
+}
+
+static void cm_free_id(__be32 local_id)
+{
+ spin_lock_irq(&cm.lock);
+ idr_remove(&cm.local_id_table,
+ (__force int) (local_id ^ cm.random_id_operand));
+ spin_unlock_irq(&cm.lock);
+}
+
+static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
+{
+ struct cm_id_private *cm_id_priv;
+
+ cm_id_priv = idr_find(&cm.local_id_table,
+ (__force int) (local_id ^ cm.random_id_operand));
+ if (cm_id_priv) {
+ if (cm_id_priv->id.remote_id == remote_id)
+ atomic_inc(&cm_id_priv->refcount);
+ else
+ cm_id_priv = NULL;
+ }
+
+ return cm_id_priv;
+}
+
+static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
+{
+ struct cm_id_private *cm_id_priv;
+
+ spin_lock_irq(&cm.lock);
+ cm_id_priv = cm_get_id(local_id, remote_id);
+ spin_unlock_irq(&cm.lock);
+
+ return cm_id_priv;
+}
+
+static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
+{
+ int i;
+
+ for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
+ ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
+ ((unsigned long *) mask)[i];
+}
+
+static int cm_compare_data(struct ib_cm_compare_data *src_data,
+ struct ib_cm_compare_data *dst_data)
+{
+ u8 src[IB_CM_COMPARE_SIZE];
+ u8 dst[IB_CM_COMPARE_SIZE];
+
+ if (!src_data || !dst_data)
+ return 0;
+
+ cm_mask_copy(src, src_data->data, dst_data->mask);
+ cm_mask_copy(dst, dst_data->data, src_data->mask);
+ return memcmp(src, dst, IB_CM_COMPARE_SIZE);
+}
+
+static int cm_compare_private_data(u8 *private_data,
+ struct ib_cm_compare_data *dst_data)
+{
+ u8 src[IB_CM_COMPARE_SIZE];
+
+ if (!dst_data)
+ return 0;
+
+ cm_mask_copy(src, private_data, dst_data->mask);
+ return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
+}
+
+/*
+ * Trivial helpers to strip endian annotation and compare; the
+ * endianness doesn't actually matter since we just need a stable
+ * order for the RB tree.
+ */
+static int be32_lt(__be32 a, __be32 b)
+{
+ return (__force u32) a < (__force u32) b;
+}
+
+static int be32_gt(__be32 a, __be32 b)
+{
+ return (__force u32) a > (__force u32) b;
+}
+
+static int be64_lt(__be64 a, __be64 b)
+{
+ return (__force u64) a < (__force u64) b;
+}
+
+static int be64_gt(__be64 a, __be64 b)
+{
+ return (__force u64) a > (__force u64) b;
+}
+
+static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
+{
+ struct rb_node **link = &cm.listen_service_table.rb_node;
+ struct rb_node *parent = NULL;
+ struct cm_id_private *cur_cm_id_priv;
+ __be64 service_id = cm_id_priv->id.service_id;
+ __be64 service_mask = cm_id_priv->id.service_mask;
+ int data_cmp;
+
+ while (*link) {
+ parent = *link;
+ cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
+ service_node);
+ data_cmp = cm_compare_data(cm_id_priv->compare_data,
+ cur_cm_id_priv->compare_data);
+ if ((cur_cm_id_priv->id.service_mask & service_id) ==
+ (service_mask & cur_cm_id_priv->id.service_id) &&
+ (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
+ !data_cmp)
+ return cur_cm_id_priv;
+
+ if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
+ link = &(*link)->rb_left;
+ else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
+ link = &(*link)->rb_right;
+ else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
+ link = &(*link)->rb_left;
+ else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
+ link = &(*link)->rb_right;
+ else if (data_cmp < 0)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+ rb_link_node(&cm_id_priv->service_node, parent, link);
+ rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
+ return NULL;
+}
+
+static struct cm_id_private * cm_find_listen(struct ib_device *device,
+ __be64 service_id,
+ u8 *private_data)
+{
+ struct rb_node *node = cm.listen_service_table.rb_node;
+ struct cm_id_private *cm_id_priv;
+ int data_cmp;
+
+ while (node) {
+ cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
+ data_cmp = cm_compare_private_data(private_data,
+ cm_id_priv->compare_data);
+ if ((cm_id_priv->id.service_mask & service_id) ==
+ cm_id_priv->id.service_id &&
+ (cm_id_priv->id.device == device) && !data_cmp)
+ return cm_id_priv;
+
+ if (device < cm_id_priv->id.device)
+ node = node->rb_left;
+ else if (device > cm_id_priv->id.device)
+ node = node->rb_right;
+ else if (be64_lt(service_id, cm_id_priv->id.service_id))
+ node = node->rb_left;
+ else if (be64_gt(service_id, cm_id_priv->id.service_id))
+ node = node->rb_right;
+ else if (data_cmp < 0)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return NULL;
+}
+
+static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
+ *timewait_info)
+{
+ struct rb_node **link = &cm.remote_id_table.rb_node;
+ struct rb_node *parent = NULL;
+ struct cm_timewait_info *cur_timewait_info;
+ __be64 remote_ca_guid = timewait_info->remote_ca_guid;
+ __be32 remote_id = timewait_info->work.remote_id;
+
+ while (*link) {
+ parent = *link;
+ cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
+ remote_id_node);
+ if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
+ link = &(*link)->rb_left;
+ else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
+ link = &(*link)->rb_right;
+ else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
+ link = &(*link)->rb_left;
+ else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
+ link = &(*link)->rb_right;
+ else
+ return cur_timewait_info;
+ }
+ timewait_info->inserted_remote_id = 1;
+ rb_link_node(&timewait_info->remote_id_node, parent, link);
+ rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
+ return NULL;
+}
+
+static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
+ __be32 remote_id)
+{
+ struct rb_node *node = cm.remote_id_table.rb_node;
+ struct cm_timewait_info *timewait_info;
+
+ while (node) {
+ timewait_info = rb_entry(node, struct cm_timewait_info,
+ remote_id_node);
+ if (be32_lt(remote_id, timewait_info->work.remote_id))
+ node = node->rb_left;
+ else if (be32_gt(remote_id, timewait_info->work.remote_id))
+ node = node->rb_right;
+ else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
+ node = node->rb_left;
+ else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
+ node = node->rb_right;
+ else
+ return timewait_info;
+ }
+ return NULL;
+}
+
+static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
+ *timewait_info)
+{
+ struct rb_node **link = &cm.remote_qp_table.rb_node;
+ struct rb_node *parent = NULL;
+ struct cm_timewait_info *cur_timewait_info;
+ __be64 remote_ca_guid = timewait_info->remote_ca_guid;
+ __be32 remote_qpn = timewait_info->remote_qpn;
+
+ while (*link) {
+ parent = *link;
+ cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
+ remote_qp_node);
+ if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
+ link = &(*link)->rb_left;
+ else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
+ link = &(*link)->rb_right;
+ else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
+ link = &(*link)->rb_left;
+ else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
+ link = &(*link)->rb_right;
+ else
+ return cur_timewait_info;
+ }
+ timewait_info->inserted_remote_qp = 1;
+ rb_link_node(&timewait_info->remote_qp_node, parent, link);
+ rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
+ return NULL;
+}
+
+static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
+ *cm_id_priv)
+{
+ struct rb_node **link = &cm.remote_sidr_table.rb_node;
+ struct rb_node *parent = NULL;
+ struct cm_id_private *cur_cm_id_priv;
+ union ib_gid *port_gid = &cm_id_priv->av.dgid;
+ __be32 remote_id = cm_id_priv->id.remote_id;
+
+ while (*link) {
+ parent = *link;
+ cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
+ sidr_id_node);
+ if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
+ link = &(*link)->rb_left;
+ else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
+ link = &(*link)->rb_right;
+ else {
+ int cmp;
+ cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
+ sizeof *port_gid);
+ if (cmp < 0)
+ link = &(*link)->rb_left;
+ else if (cmp > 0)
+ link = &(*link)->rb_right;
+ else
+ return cur_cm_id_priv;
+ }
+ }
+ rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
+ rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+ return NULL;
+}
+
+static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
+ enum ib_cm_sidr_status status)
+{
+ struct ib_cm_sidr_rep_param param;
+
+ memset(¶m, 0, sizeof param);
+ param.status = status;
+ ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m);
+}
+
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context)
+{
+ struct cm_id_private *cm_id_priv;
+ int ret;
+
+ cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
+ if (!cm_id_priv)
+ return ERR_PTR(-ENOMEM);
+
+ cm_id_priv->id.state = IB_CM_IDLE;
+ cm_id_priv->id.device = device;
+ cm_id_priv->id.cm_handler = cm_handler;
+ cm_id_priv->id.context = context;
+ cm_id_priv->id.remote_cm_qpn = 1;
+ ret = cm_alloc_id(cm_id_priv);
+ if (ret)
+ goto error;
+
+ spin_lock_init(&cm_id_priv->lock);
+ init_completion(&cm_id_priv->comp);
+ INIT_LIST_HEAD(&cm_id_priv->work_list);
+ atomic_set(&cm_id_priv->work_count, -1);
+ atomic_set(&cm_id_priv->refcount, 1);
+ return &cm_id_priv->id;
+
+error:
+ kfree(cm_id_priv);
+ return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL(ib_create_cm_id);
+
+static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
+{
+ struct cm_work *work;
+
+ if (list_empty(&cm_id_priv->work_list))
+ return NULL;
+
+ work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
+ list_del(&work->list);
+ return work;
+}
+
+static void cm_free_work(struct cm_work *work)
+{
+ if (work->mad_recv_wc)
+ ib_free_recv_mad(work->mad_recv_wc);
+ kfree(work);
+}
+
+static inline int cm_convert_to_ms(int iba_time)
+{
+ /* approximate conversion to ms from 4.096us x 2^iba_time */
+ return 1 << max(iba_time - 8, 0);
+}
+
+/*
+ * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
+ * Because of how ack_timeout is stored, adding one doubles the timeout.
+ * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
+ * increment it (round up) only if the other is within 50%.
+ */
+static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
+{
+ int ack_timeout = packet_life_time + 1;
+
+ if (ack_timeout >= ca_ack_delay)
+ ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
+ else
+ ack_timeout = ca_ack_delay +
+ (ack_timeout >= (ca_ack_delay - 1));
+
+ return min(31, ack_timeout);
+}
+
+static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
+{
+ if (timewait_info->inserted_remote_id) {
+ rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
+ timewait_info->inserted_remote_id = 0;
+ }
+
+ if (timewait_info->inserted_remote_qp) {
+ rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
+ timewait_info->inserted_remote_qp = 0;
+ }
+}
+
+static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
+{
+ struct cm_timewait_info *timewait_info;
+
+ timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
+ if (!timewait_info)
+ return ERR_PTR(-ENOMEM);
+
+ timewait_info->work.local_id = local_id;
+ INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
+ timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
+ return timewait_info;
+}
+
+static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
+{
+ int wait_time;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm.lock, flags);
+ cm_cleanup_timewait(cm_id_priv->timewait_info);
+ list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
+ spin_unlock_irqrestore(&cm.lock, flags);
+
+ /*
+ * The cm_id could be destroyed by the user before we exit timewait.
+ * To protect against this, we search for the cm_id after exiting
+ * timewait before notifying the user that we've exited timewait.
+ */
+ cm_id_priv->id.state = IB_CM_TIMEWAIT;
+ wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
+ queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
+ msecs_to_jiffies(wait_time));
+ cm_id_priv->timewait_info = NULL;
+}
+
+static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
+{
+ unsigned long flags;
+
+ cm_id_priv->id.state = IB_CM_IDLE;
+ if (cm_id_priv->timewait_info) {
+ spin_lock_irqsave(&cm.lock, flags);
+ cm_cleanup_timewait(cm_id_priv->timewait_info);
+ spin_unlock_irqrestore(&cm.lock, flags);
+ kfree(cm_id_priv->timewait_info);
+ cm_id_priv->timewait_info = NULL;
+ }
+}
+
+static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_work *work;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+retest:
+ spin_lock_irq(&cm_id_priv->lock);
+ switch (cm_id->state) {
+ case IB_CM_LISTEN:
+ cm_id->state = IB_CM_IDLE;
+ spin_unlock_irq(&cm_id_priv->lock);
+ spin_lock_irq(&cm.lock);
+ rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
+ spin_unlock_irq(&cm.lock);
+ break;
+ case IB_CM_SIDR_REQ_SENT:
+ cm_id->state = IB_CM_IDLE;
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ spin_unlock_irq(&cm_id_priv->lock);
+ break;
+ case IB_CM_SIDR_REQ_RCVD:
+ spin_unlock_irq(&cm_id_priv->lock);
+ cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
+ break;
+ case IB_CM_REQ_SENT:
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ spin_unlock_irq(&cm_id_priv->lock);
+ ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
+ &cm_id_priv->id.device->node_guid,
+ sizeof cm_id_priv->id.device->node_guid,
+ NULL, 0);
+ break;
+ case IB_CM_REQ_RCVD:
+ if (err == -ENOMEM) {
+ /* Do not reject to allow future retries. */
+ cm_reset_to_idle(cm_id_priv);
+ spin_unlock_irq(&cm_id_priv->lock);
+ } else {
+ spin_unlock_irq(&cm_id_priv->lock);
+ ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+ NULL, 0, NULL, 0);
+ }
+ break;
+ case IB_CM_MRA_REQ_RCVD:
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ /* Fall through */
+ case IB_CM_MRA_REQ_SENT:
+ case IB_CM_REP_RCVD:
+ case IB_CM_MRA_REP_SENT:
+ spin_unlock_irq(&cm_id_priv->lock);
+ ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+ NULL, 0, NULL, 0);
+ break;
+ case IB_CM_ESTABLISHED:
+ spin_unlock_irq(&cm_id_priv->lock);
+ ib_send_cm_dreq(cm_id, NULL, 0);
+ goto retest;
+ case IB_CM_DREQ_SENT:
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ cm_enter_timewait(cm_id_priv);
+ spin_unlock_irq(&cm_id_priv->lock);
+ break;
+ case IB_CM_DREQ_RCVD:
+ spin_unlock_irq(&cm_id_priv->lock);
+ ib_send_cm_drep(cm_id, NULL, 0);
+ break;
+ default:
+ spin_unlock_irq(&cm_id_priv->lock);
+ break;
+ }
+
+ cm_free_id(cm_id->local_id);
+ cm_deref_id(cm_id_priv);
+ wait_for_completion(&cm_id_priv->comp);
+ while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
+ cm_free_work(work);
+ kfree(cm_id_priv->compare_data);
+ kfree(cm_id_priv->private_data);
+ kfree(cm_id_priv);
+}
+
+void ib_destroy_cm_id(struct ib_cm_id *cm_id)
+{
+ cm_destroy_id(cm_id, 0);
+}
+EXPORT_SYMBOL(ib_destroy_cm_id);
+
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
+ struct ib_cm_compare_data *compare_data)
+{
+ struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+ unsigned long flags;
+ int ret = 0;
+
+ service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
+ service_id &= service_mask;
+ if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
+ (service_id != IB_CM_ASSIGN_SERVICE_ID))
+ return -EINVAL;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ if (cm_id->state != IB_CM_IDLE)
+ return -EINVAL;
+
+ if (compare_data) {
+ cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
+ GFP_KERNEL);
+ if (!cm_id_priv->compare_data)
+ return -ENOMEM;
+ cm_mask_copy(cm_id_priv->compare_data->data,
+ compare_data->data, compare_data->mask);
+ memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
+ IB_CM_COMPARE_SIZE);
+ }
+
+ cm_id->state = IB_CM_LISTEN;
+
+ spin_lock_irqsave(&cm.lock, flags);
+ if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
+ cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
+ cm_id->service_mask = ~cpu_to_be64(0);
+ } else {
+ cm_id->service_id = service_id;
+ cm_id->service_mask = service_mask;
+ }
+ cur_cm_id_priv = cm_insert_listen(cm_id_priv);
+ spin_unlock_irqrestore(&cm.lock, flags);
+
+ if (cur_cm_id_priv) {
+ cm_id->state = IB_CM_IDLE;
+ kfree(cm_id_priv->compare_data);
+ cm_id_priv->compare_data = NULL;
+ ret = -EBUSY;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(ib_cm_listen);
+
+static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
+ enum cm_msg_sequence msg_seq)
+{
+ u64 hi_tid, low_tid;
+
+ hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
+ low_tid = (u64) ((__force u32)cm_id_priv->id.local_id |
+ (msg_seq << 30));
+ return cpu_to_be64(hi_tid | low_tid);
+}
+
+static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
+ __be16 attr_id, __be64 tid)
+{
+ hdr->base_version = IB_MGMT_BASE_VERSION;
+ hdr->mgmt_class = IB_MGMT_CLASS_CM;
+ hdr->class_version = IB_CM_CLASS_VERSION;
+ hdr->method = IB_MGMT_METHOD_SEND;
+ hdr->attr_id = attr_id;
+ hdr->tid = tid;
+}
+
+static void cm_format_req(struct cm_req_msg *req_msg,
+ struct cm_id_private *cm_id_priv,
+ struct ib_cm_req_param *param)
+{
+ struct ib_sa_path_rec *pri_path = param->primary_path;
+ struct ib_sa_path_rec *alt_path = param->alternate_path;
+
+ cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
+ cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
+
+ req_msg->local_comm_id = cm_id_priv->id.local_id;
+ req_msg->service_id = param->service_id;
+ req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+ cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
+ cm_req_set_resp_res(req_msg, param->responder_resources);
+ cm_req_set_init_depth(req_msg, param->initiator_depth);
+ cm_req_set_remote_resp_timeout(req_msg,
+ param->remote_cm_response_timeout);
+ if (param->remote_cm_response_timeout > (u8) max_timeout) {
+ printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > "
+ "%d, decreasing\n", param->remote_cm_response_timeout,
+ max_timeout);
+ cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout);
+ }
+ cm_req_set_qp_type(req_msg, param->qp_type);
+ cm_req_set_flow_ctrl(req_msg, param->flow_control);
+ cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
+ cm_req_set_local_resp_timeout(req_msg,
+ param->local_cm_response_timeout);
+ if (param->local_cm_response_timeout > (u8) max_timeout) {
+ printk(KERN_WARNING PFX "req local_cm_response_timeout %d > "
+ "%d, decreasing\n", param->local_cm_response_timeout,
+ max_timeout);
+ cm_req_set_local_resp_timeout(req_msg, (u8) max_timeout);
+ }
+ cm_req_set_retry_count(req_msg, param->retry_count);
+ req_msg->pkey = param->primary_path->pkey;
+ cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
+ cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
+ cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
+ cm_req_set_srq(req_msg, param->srq);
+
+ if (pri_path->hop_limit <= 1) {
+ req_msg->primary_local_lid = pri_path->slid;
+ req_msg->primary_remote_lid = pri_path->dlid;
+ } else {
+ /* Work-around until there's a way to obtain remote LID info */
+ req_msg->primary_local_lid = IB_LID_PERMISSIVE;
+ req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
+ }
+ req_msg->primary_local_gid = pri_path->sgid;
+ req_msg->primary_remote_gid = pri_path->dgid;
+ cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
+ cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
+ req_msg->primary_traffic_class = pri_path->traffic_class;
+ req_msg->primary_hop_limit = pri_path->hop_limit;
+ cm_req_set_primary_sl(req_msg, pri_path->sl);
+ cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
+ cm_req_set_primary_local_ack_timeout(req_msg,
+ cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
+ pri_path->packet_life_time));
+
+ if (alt_path) {
+ if (alt_path->hop_limit <= 1) {
+ req_msg->alt_local_lid = alt_path->slid;
+ req_msg->alt_remote_lid = alt_path->dlid;
+ } else {
+ req_msg->alt_local_lid = IB_LID_PERMISSIVE;
+ req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
+ }
+ req_msg->alt_local_gid = alt_path->sgid;
+ req_msg->alt_remote_gid = alt_path->dgid;
+ cm_req_set_alt_flow_label(req_msg,
+ alt_path->flow_label);
+ cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
+ req_msg->alt_traffic_class = alt_path->traffic_class;
+ req_msg->alt_hop_limit = alt_path->hop_limit;
+ cm_req_set_alt_sl(req_msg, alt_path->sl);
+ cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
+ cm_req_set_alt_local_ack_timeout(req_msg,
+ cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
+ alt_path->packet_life_time));
+ }
+
+ if (param->private_data && param->private_data_len)
+ memcpy(req_msg->private_data, param->private_data,
+ param->private_data_len);
+}
+
+static int cm_validate_req_param(struct ib_cm_req_param *param)
+{
+ /* peer-to-peer not supported */
+ if (param->peer_to_peer)
+ return -EINVAL;
+
+ if (!param->primary_path)
+ return -EINVAL;
+
+ if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
+ return -EINVAL;
+
+ if (param->private_data &&
+ param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
+ return -EINVAL;
+
+ if (param->alternate_path &&
+ (param->alternate_path->pkey != param->primary_path->pkey ||
+ param->alternate_path->mtu != param->primary_path->mtu))
+ return -EINVAL;
+
+ return 0;
+}
+
+int ib_send_cm_req(struct ib_cm_id *cm_id,
+ struct ib_cm_req_param *param)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_req_msg *req_msg;
+ unsigned long flags;
+ int ret;
+
+ ret = cm_validate_req_param(param);
+ if (ret)
+ return ret;
+
+ /* Verify that we're not in timewait. */
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state != IB_CM_IDLE) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ ret = -EINVAL;
+ goto out;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
+ id.local_id);
+ if (IS_ERR(cm_id_priv->timewait_info)) {
+ ret = PTR_ERR(cm_id_priv->timewait_info);
+ goto out;
+ }
+
+ ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
+ if (ret)
+ goto error1;
+ if (param->alternate_path) {
+ ret = cm_init_av_by_path(param->alternate_path,
+ &cm_id_priv->alt_av);
+ if (ret)
+ goto error1;
+ }
+ cm_id->service_id = param->service_id;
+ cm_id->service_mask = ~cpu_to_be64(0);
+ cm_id_priv->timeout_ms = cm_convert_to_ms(
+ param->primary_path->packet_life_time) * 2 +
+ cm_convert_to_ms(
+ param->remote_cm_response_timeout);
+ if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
+ printk(KERN_WARNING PFX "req timeout_ms %d > %d, decreasing\n",
+ cm_id_priv->timeout_ms, cm_convert_to_ms(max_timeout));
+ cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+ }
+ cm_id_priv->max_cm_retries = param->max_cm_retries;
+ cm_id_priv->initiator_depth = param->initiator_depth;
+ cm_id_priv->responder_resources = param->responder_resources;
+ cm_id_priv->retry_count = param->retry_count;
+ cm_id_priv->path_mtu = param->primary_path->mtu;
+ cm_id_priv->pkey = param->primary_path->pkey;
+ cm_id_priv->qp_type = param->qp_type;
+
+ ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
+ if (ret)
+ goto error1;
+
+ req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
+ cm_format_req(req_msg, cm_id_priv, param);
+ cm_id_priv->tid = req_msg->hdr.tid;
+ cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
+ cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
+
+ cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
+ cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = ib_post_send_mad(cm_id_priv->msg, NULL);
+ if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ goto error2;
+ }
+ BUG_ON(cm_id->state != IB_CM_IDLE);
+ cm_id->state = IB_CM_REQ_SENT;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return 0;
+
+error2: cm_free_msg(cm_id_priv->msg);
+error1: kfree(cm_id_priv->timewait_info);
+out: return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_req);
+
+static int cm_issue_rej(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ enum ib_cm_rej_reason reason,
+ enum cm_msg_response msg_rejected,
+ void *ari, u8 ari_length)
+{
+ struct ib_mad_send_buf *msg = NULL;
+ struct cm_rej_msg *rej_msg, *rcv_msg;
+ int ret;
+
+ ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+ if (ret)
+ return ret;
+
+ /* We just need common CM header information. Cast to any message. */
+ rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
+ rej_msg = (struct cm_rej_msg *) msg->mad;
+
+ cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
+ rej_msg->remote_comm_id = rcv_msg->local_comm_id;
+ rej_msg->local_comm_id = rcv_msg->remote_comm_id;
+ cm_rej_set_msg_rejected(rej_msg, msg_rejected);
+ rej_msg->reason = cpu_to_be16(reason);
+
+ if (ari && ari_length) {
+ cm_rej_set_reject_info_len(rej_msg, ari_length);
+ memcpy(rej_msg->ari, ari, ari_length);
+ }
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ cm_free_msg(msg);
+
+ return ret;
+}
+
+static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
+ __be32 local_qpn, __be32 remote_qpn)
+{
+ return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
+ ((local_ca_guid == remote_ca_guid) &&
+ (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
+}
+
+static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
+ struct ib_sa_path_rec *primary_path,
+ struct ib_sa_path_rec *alt_path)
+{
+ memset(primary_path, 0, sizeof *primary_path);
+ primary_path->dgid = req_msg->primary_local_gid;
+ primary_path->sgid = req_msg->primary_remote_gid;
+ primary_path->dlid = req_msg->primary_local_lid;
+ primary_path->slid = req_msg->primary_remote_lid;
+ primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
+ primary_path->hop_limit = req_msg->primary_hop_limit;
+ primary_path->traffic_class = req_msg->primary_traffic_class;
+ primary_path->reversible = 1;
+ primary_path->pkey = req_msg->pkey;
+ primary_path->sl = cm_req_get_primary_sl(req_msg);
+ primary_path->mtu_selector = IB_SA_EQ;
+ primary_path->mtu = cm_req_get_path_mtu(req_msg);
+ primary_path->rate_selector = IB_SA_EQ;
+ primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
+ primary_path->packet_life_time_selector = IB_SA_EQ;
+ primary_path->packet_life_time =
+ cm_req_get_primary_local_ack_timeout(req_msg);
+ primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
+
+ if (req_msg->alt_local_lid) {
+ memset(alt_path, 0, sizeof *alt_path);
+ alt_path->dgid = req_msg->alt_local_gid;
+ alt_path->sgid = req_msg->alt_remote_gid;
+ alt_path->dlid = req_msg->alt_local_lid;
+ alt_path->slid = req_msg->alt_remote_lid;
+ alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
+ alt_path->hop_limit = req_msg->alt_hop_limit;
+ alt_path->traffic_class = req_msg->alt_traffic_class;
+ alt_path->reversible = 1;
+ alt_path->pkey = req_msg->pkey;
+ alt_path->sl = cm_req_get_alt_sl(req_msg);
+ alt_path->mtu_selector = IB_SA_EQ;
+ alt_path->mtu = cm_req_get_path_mtu(req_msg);
+ alt_path->rate_selector = IB_SA_EQ;
+ alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
+ alt_path->packet_life_time_selector = IB_SA_EQ;
+ alt_path->packet_life_time =
+ cm_req_get_alt_local_ack_timeout(req_msg);
+ alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
+ }
+}
+
+static void cm_format_req_event(struct cm_work *work,
+ struct cm_id_private *cm_id_priv,
+ struct ib_cm_id *listen_id)
+{
+ struct cm_req_msg *req_msg;
+ struct ib_cm_req_event_param *param;
+
+ req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
+ param = &work->cm_event.param.req_rcvd;
+ param->listen_id = listen_id;
+ param->port = cm_id_priv->av.port->port_num;
+ param->primary_path = &work->path[0];
+ if (req_msg->alt_local_lid)
+ param->alternate_path = &work->path[1];
+ else
+ param->alternate_path = NULL;
+ param->remote_ca_guid = req_msg->local_ca_guid;
+ param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
+ param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
+ param->qp_type = cm_req_get_qp_type(req_msg);
+ param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
+ param->responder_resources = cm_req_get_init_depth(req_msg);
+ param->initiator_depth = cm_req_get_resp_res(req_msg);
+ param->local_cm_response_timeout =
+ cm_req_get_remote_resp_timeout(req_msg);
+ param->flow_control = cm_req_get_flow_ctrl(req_msg);
+ param->remote_cm_response_timeout =
+ cm_req_get_local_resp_timeout(req_msg);
+ param->retry_count = cm_req_get_retry_count(req_msg);
+ param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
+ param->srq = cm_req_get_srq(req_msg);
+ work->cm_event.private_data = &req_msg->private_data;
+}
+
+static void cm_process_work(struct cm_id_private *cm_id_priv,
+ struct cm_work *work)
+{
+ int ret;
+
+ /* We will typically only have the current event to report. */
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
+ cm_free_work(work);
+
+ while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
+ spin_lock_irq(&cm_id_priv->lock);
+ work = cm_dequeue_work(cm_id_priv);
+ spin_unlock_irq(&cm_id_priv->lock);
+ BUG_ON(!work);
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
+ &work->cm_event);
+ cm_free_work(work);
+ }
+ cm_deref_id(cm_id_priv);
+ if (ret)
+ cm_destroy_id(&cm_id_priv->id, ret);
+}
+
+static void cm_format_mra(struct cm_mra_msg *mra_msg,
+ struct cm_id_private *cm_id_priv,
+ enum cm_msg_response msg_mraed, u8 service_timeout,
+ const void *private_data, u8 private_data_len)
+{
+ cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
+ cm_mra_set_msg_mraed(mra_msg, msg_mraed);
+ mra_msg->local_comm_id = cm_id_priv->id.local_id;
+ mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ cm_mra_set_service_timeout(mra_msg, service_timeout);
+
+ if (private_data && private_data_len)
+ memcpy(mra_msg->private_data, private_data, private_data_len);
+}
+
+static void cm_format_rej(struct cm_rej_msg *rej_msg,
+ struct cm_id_private *cm_id_priv,
+ enum ib_cm_rej_reason reason,
+ void *ari,
+ u8 ari_length,
+ const void *private_data,
+ u8 private_data_len)
+{
+ cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
+ rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
+
+ switch(cm_id_priv->id.state) {
+ case IB_CM_REQ_RCVD:
+ rej_msg->local_comm_id = 0;
+ cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+ break;
+ case IB_CM_MRA_REQ_SENT:
+ rej_msg->local_comm_id = cm_id_priv->id.local_id;
+ cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+ break;
+ case IB_CM_REP_RCVD:
+ case IB_CM_MRA_REP_SENT:
+ rej_msg->local_comm_id = cm_id_priv->id.local_id;
+ cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
+ break;
+ default:
+ rej_msg->local_comm_id = cm_id_priv->id.local_id;
+ cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
+ break;
+ }
+
+ rej_msg->reason = cpu_to_be16(reason);
+ if (ari && ari_length) {
+ cm_rej_set_reject_info_len(rej_msg, ari_length);
+ memcpy(rej_msg->ari, ari, ari_length);
+ }
+
+ if (private_data && private_data_len)
+ memcpy(rej_msg->private_data, private_data, private_data_len);
+}
+
+static void cm_dup_req_handler(struct cm_work *work,
+ struct cm_id_private *cm_id_priv)
+{
+ struct ib_mad_send_buf *msg = NULL;
+ int ret;
+
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_REQ_COUNTER]);
+
+ /* Quick state check to discard duplicate REQs. */
+ if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
+ return;
+
+ ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
+ if (ret)
+ return;
+
+ spin_lock_irq(&cm_id_priv->lock);
+ switch (cm_id_priv->id.state) {
+ case IB_CM_MRA_REQ_SENT:
+ cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+ CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
+ cm_id_priv->private_data,
+ cm_id_priv->private_data_len);
+ break;
+ case IB_CM_TIMEWAIT:
+ cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
+ IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
+ break;
+ default:
+ goto unlock;
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto free;
+ return;
+
+unlock: spin_unlock_irq(&cm_id_priv->lock);
+free: cm_free_msg(msg);
+}
+
+static struct cm_id_private * cm_match_req(struct cm_work *work,
+ struct cm_id_private *cm_id_priv)
+{
+ struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
+ struct cm_timewait_info *timewait_info;
+ struct cm_req_msg *req_msg;
+
+ req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
+
+ /* Check for possible duplicate REQ. */
+ spin_lock_irq(&cm.lock);
+ timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
+ if (timewait_info) {
+ cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+ spin_unlock_irq(&cm.lock);
+ if (cur_cm_id_priv) {
+ cm_dup_req_handler(work, cur_cm_id_priv);
+ cm_deref_id(cur_cm_id_priv);
+ }
+ return NULL;
+ }
+
+ /* Check for stale connections. */
+ timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
+ if (timewait_info) {
+ cm_cleanup_timewait(cm_id_priv->timewait_info);
+ spin_unlock_irq(&cm.lock);
+ cm_issue_rej(work->port, work->mad_recv_wc,
+ IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
+ NULL, 0);
+ return NULL;
+ }
+
+ /* Find matching listen request. */
+ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+ req_msg->service_id,
+ req_msg->private_data);
+ if (!listen_cm_id_priv) {
+ cm_cleanup_timewait(cm_id_priv->timewait_info);
+ spin_unlock_irq(&cm.lock);
+ cm_issue_rej(work->port, work->mad_recv_wc,
+ IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
+ NULL, 0);
+ goto out;
+ }
+ atomic_inc(&listen_cm_id_priv->refcount);
+ atomic_inc(&cm_id_priv->refcount);
+ cm_id_priv->id.state = IB_CM_REQ_RCVD;
+ atomic_inc(&cm_id_priv->work_count);
+ spin_unlock_irq(&cm.lock);
+out:
+ return listen_cm_id_priv;
+}
+
+/*
+ * Work-around for inter-subnet connections. If the LIDs are permissive,
+ * we need to override the LID/SL data in the REQ with the LID information
+ * in the work completion.
+ */
+static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
+{
+ if (!cm_req_get_primary_subnet_local(req_msg)) {
+ if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
+ req_msg->primary_local_lid = cpu_to_be16(wc->slid);
+ cm_req_set_primary_sl(req_msg, wc->sl);
+ }
+
+ if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
+ req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+ }
+
+ if (!cm_req_get_alt_subnet_local(req_msg)) {
+ if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
+ req_msg->alt_local_lid = cpu_to_be16(wc->slid);
+ cm_req_set_alt_sl(req_msg, wc->sl);
+ }
+
+ if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
+ req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+ }
+}
+
+static int cm_req_handler(struct cm_work *work)
+{
+ struct ib_cm_id *cm_id;
+ struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
+ struct cm_req_msg *req_msg;
+ int ret;
+
+ req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
+
+ cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id))
+ return PTR_ERR(cm_id);
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ cm_id_priv->id.remote_id = req_msg->local_comm_id;
+ cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
+ id.local_id);
+ if (IS_ERR(cm_id_priv->timewait_info)) {
+ ret = PTR_ERR(cm_id_priv->timewait_info);
+ goto destroy;
+ }
+ cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
+ cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
+ cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
+
+ listen_cm_id_priv = cm_match_req(work, cm_id_priv);
+ if (!listen_cm_id_priv) {
+ ret = -EINVAL;
+ kfree(cm_id_priv->timewait_info);
+ goto destroy;
+ }
+
+ cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = listen_cm_id_priv->id.context;
+ cm_id_priv->id.service_id = req_msg->service_id;
+ cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+
+ cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
+ cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ if (ret) {
+ ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0, &work->path[0].sgid);
+ ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ &work->path[0].sgid, sizeof work->path[0].sgid,
+ NULL, 0);
+ goto rejected;
+ }
+ if (req_msg->alt_local_lid) {
+ ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
+ if (ret) {
+ ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+ &work->path[0].sgid,
+ sizeof work->path[0].sgid, NULL, 0);
+ goto rejected;
+ }
+ }
+ cm_id_priv->tid = req_msg->hdr.tid;
+ cm_id_priv->timeout_ms = cm_convert_to_ms(
+ cm_req_get_local_resp_timeout(req_msg));
+ if (cm_req_get_local_resp_timeout(req_msg) > (u8) max_timeout) {
+ printk(KERN_WARNING PFX "rcvd cm_local_resp_timeout %d > %d, "
+ "decreasing used timeout_ms\n",
+ cm_req_get_local_resp_timeout(req_msg), max_timeout);
+ cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+ }
+
+ cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
+ cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
+ cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
+ cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
+ cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
+ cm_id_priv->pkey = req_msg->pkey;
+ cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
+ cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
+ cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
+ cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+
+ cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
+ cm_process_work(cm_id_priv, work);
+ cm_deref_id(listen_cm_id_priv);
+ return 0;
+
+rejected:
+ atomic_dec(&cm_id_priv->refcount);
+ cm_deref_id(listen_cm_id_priv);
+destroy:
+ ib_destroy_cm_id(cm_id);
+ return ret;
+}
+
+static void cm_format_rep(struct cm_rep_msg *rep_msg,
+ struct cm_id_private *cm_id_priv,
+ struct ib_cm_rep_param *param)
+{
+ cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
+ rep_msg->local_comm_id = cm_id_priv->id.local_id;
+ rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+ cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
+ rep_msg->resp_resources = param->responder_resources;
+ rep_msg->initiator_depth = param->initiator_depth;
+ cm_rep_set_target_ack_delay(rep_msg,
+ cm_id_priv->av.port->cm_dev->ack_delay);
+ cm_rep_set_failover(rep_msg, param->failover_accepted);
+ cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
+ cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
+ cm_rep_set_srq(rep_msg, param->srq);
+ rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+
+ if (param->private_data && param->private_data_len)
+ memcpy(rep_msg->private_data, param->private_data,
+ param->private_data_len);
+}
+
+int ib_send_cm_rep(struct ib_cm_id *cm_id,
+ struct ib_cm_rep_param *param)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ struct cm_rep_msg *rep_msg;
+ unsigned long flags;
+ int ret;
+
+ if (param->private_data &&
+ param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
+ return -EINVAL;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state != IB_CM_REQ_RCVD &&
+ cm_id->state != IB_CM_MRA_REQ_SENT) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (ret)
+ goto out;
+
+ rep_msg = (struct cm_rep_msg *) msg->mad;
+ cm_format_rep(rep_msg, cm_id_priv, param);
+ msg->timeout_ms = cm_id_priv->timeout_ms;
+ msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_free_msg(msg);
+ return ret;
+ }
+
+ cm_id->state = IB_CM_REP_SENT;
+ cm_id_priv->msg = msg;
+ cm_id_priv->initiator_depth = param->initiator_depth;
+ cm_id_priv->responder_resources = param->responder_resources;
+ cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
+ cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
+
+out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_rep);
+
+static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
+ struct cm_id_private *cm_id_priv,
+ const void *private_data,
+ u8 private_data_len)
+{
+ cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
+ rtu_msg->local_comm_id = cm_id_priv->id.local_id;
+ rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
+
+ if (private_data && private_data_len)
+ memcpy(rtu_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_rtu(struct ib_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ unsigned long flags;
+ void *data;
+ int ret;
+
+ if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
+ return -EINVAL;
+
+ data = cm_copy_private_data(private_data, private_data_len);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state != IB_CM_REP_RCVD &&
+ cm_id->state != IB_CM_MRA_REP_SENT) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (ret)
+ goto error;
+
+ cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
+ private_data, private_data_len);
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_free_msg(msg);
+ kfree(data);
+ return ret;
+ }
+
+ cm_id->state = IB_CM_ESTABLISHED;
+ cm_set_private_data(cm_id_priv, data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return 0;
+
+error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ kfree(data);
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_rtu);
+
+static void cm_format_rep_event(struct cm_work *work)
+{
+ struct cm_rep_msg *rep_msg;
+ struct ib_cm_rep_event_param *param;
+
+ rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
+ param = &work->cm_event.param.rep_rcvd;
+ param->remote_ca_guid = rep_msg->local_ca_guid;
+ param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
+ param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
+ param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
+ param->responder_resources = rep_msg->initiator_depth;
+ param->initiator_depth = rep_msg->resp_resources;
+ param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
+ param->failover_accepted = cm_rep_get_failover(rep_msg);
+ param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
+ param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
+ param->srq = cm_rep_get_srq(rep_msg);
+ work->cm_event.private_data = &rep_msg->private_data;
+}
+
+static void cm_dup_rep_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_rep_msg *rep_msg;
+ struct ib_mad_send_buf *msg = NULL;
+ int ret;
+
+ rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
+ rep_msg->local_comm_id);
+ if (!cm_id_priv)
+ return;
+
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_REP_COUNTER]);
+ ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
+ if (ret)
+ goto deref;
+
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
+ cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
+ cm_id_priv->private_data,
+ cm_id_priv->private_data_len);
+ else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
+ cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+ CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
+ cm_id_priv->private_data,
+ cm_id_priv->private_data_len);
+ else
+ goto unlock;
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto free;
+ goto deref;
+
+unlock: spin_unlock_irq(&cm_id_priv->lock);
+free: cm_free_msg(msg);
+deref: cm_deref_id(cm_id_priv);
+}
+
+static int cm_rep_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_rep_msg *rep_msg;
+ int ret;
+
+ rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
+ if (!cm_id_priv) {
+ cm_dup_rep_handler(work);
+ return -EINVAL;
+ }
+
+ cm_format_rep_event(work);
+
+ spin_lock_irq(&cm_id_priv->lock);
+ switch (cm_id_priv->id.state) {
+ case IB_CM_REQ_SENT:
+ case IB_CM_MRA_REQ_RCVD:
+ break;
+ default:
+ spin_unlock_irq(&cm_id_priv->lock);
+ ret = -EINVAL;
+ goto error;
+ }
+
+ cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
+ cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
+ cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+
+ spin_lock(&cm.lock);
+ /* Check for duplicate REP. */
+ if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
+ ret = -EINVAL;
+ goto error;
+ }
+ /* Check for a stale connection. */
+ if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
+ rb_erase(&cm_id_priv->timewait_info->remote_id_node,
+ &cm.remote_id_table);
+ cm_id_priv->timewait_info->inserted_remote_id = 0;
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
+ cm_issue_rej(work->port, work->mad_recv_wc,
+ IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
+ NULL, 0);
+ ret = -EINVAL;
+ goto error;
+ }
+ spin_unlock(&cm.lock);
+
+ cm_id_priv->id.state = IB_CM_REP_RCVD;
+ cm_id_priv->id.remote_id = rep_msg->local_comm_id;
+ cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+ cm_id_priv->initiator_depth = rep_msg->resp_resources;
+ cm_id_priv->responder_resources = rep_msg->initiator_depth;
+ cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
+ cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
+ cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
+ cm_id_priv->av.timeout =
+ cm_ack_timeout(cm_id_priv->target_ack_delay,
+ cm_id_priv->av.timeout - 1);
+ cm_id_priv->alt_av.timeout =
+ cm_ack_timeout(cm_id_priv->target_ack_delay,
+ cm_id_priv->alt_av.timeout - 1);
+
+ /* todo: handle peer_to_peer */
+
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+
+error:
+ cm_deref_id(cm_id_priv);
+ return ret;
+}
+
+static int cm_establish_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ int ret;
+
+ /* See comment in cm_establish about lookup. */
+ cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
+ if (!cm_id_priv)
+ return -EINVAL;
+
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ goto out;
+ }
+
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+out:
+ cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+static int cm_rtu_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_rtu_msg *rtu_msg;
+ int ret;
+
+ rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
+ rtu_msg->local_comm_id);
+ if (!cm_id_priv)
+ return -EINVAL;
+
+ work->cm_event.private_data = &rtu_msg->private_data;
+
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state != IB_CM_REP_SENT &&
+ cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_RTU_COUNTER]);
+ goto out;
+ }
+ cm_id_priv->id.state = IB_CM_ESTABLISHED;
+
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+out:
+ cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
+ struct cm_id_private *cm_id_priv,
+ const void *private_data,
+ u8 private_data_len)
+{
+ cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
+ cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
+ dreq_msg->local_comm_id = cm_id_priv->id.local_id;
+ dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
+
+ if (private_data && private_data_len)
+ memcpy(dreq_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_dreq(struct ib_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ unsigned long flags;
+ int ret;
+
+ if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
+ return -EINVAL;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state != IB_CM_ESTABLISHED) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (ret) {
+ cm_enter_timewait(cm_id_priv);
+ goto out;
+ }
+
+ cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
+ private_data, private_data_len);
+ msg->timeout_ms = cm_id_priv->timeout_ms;
+ msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ cm_enter_timewait(cm_id_priv);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_free_msg(msg);
+ return ret;
+ }
+
+ cm_id->state = IB_CM_DREQ_SENT;
+ cm_id_priv->msg = msg;
+out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_dreq);
+
+static void cm_format_drep(struct cm_drep_msg *drep_msg,
+ struct cm_id_private *cm_id_priv,
+ const void *private_data,
+ u8 private_data_len)
+{
+ cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
+ drep_msg->local_comm_id = cm_id_priv->id.local_id;
+ drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
+
+ if (private_data && private_data_len)
+ memcpy(drep_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_drep(struct ib_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ unsigned long flags;
+ void *data;
+ int ret;
+
+ if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
+ return -EINVAL;
+
+ data = cm_copy_private_data(private_data, private_data_len);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state != IB_CM_DREQ_RCVD) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ kfree(data);
+ return -EINVAL;
+ }
+
+ cm_set_private_data(cm_id_priv, data, private_data_len);
+ cm_enter_timewait(cm_id_priv);
+
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (ret)
+ goto out;
+
+ cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
+ private_data, private_data_len);
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_free_msg(msg);
+ return ret;
+ }
+
+out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_drep);
+
+static int cm_issue_drep(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_mad_send_buf *msg = NULL;
+ struct cm_dreq_msg *dreq_msg;
+ struct cm_drep_msg *drep_msg;
+ int ret;
+
+ ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+ if (ret)
+ return ret;
+
+ dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
+ drep_msg = (struct cm_drep_msg *) msg->mad;
+
+ cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
+ drep_msg->remote_comm_id = dreq_msg->local_comm_id;
+ drep_msg->local_comm_id = dreq_msg->remote_comm_id;
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ cm_free_msg(msg);
+
+ return ret;
+}
+
+static int cm_dreq_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_dreq_msg *dreq_msg;
+ struct ib_mad_send_buf *msg = NULL;
+ int ret;
+
+ dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
+ dreq_msg->local_comm_id);
+ if (!cm_id_priv) {
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_DREQ_COUNTER]);
+ cm_issue_drep(work->port, work->mad_recv_wc);
+ return -EINVAL;
+ }
+
+ work->cm_event.private_data = &dreq_msg->private_data;
+
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
+ goto unlock;
+
+ switch (cm_id_priv->id.state) {
+ case IB_CM_REP_SENT:
+ case IB_CM_DREQ_SENT:
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ break;
+ case IB_CM_ESTABLISHED:
+ case IB_CM_MRA_REP_RCVD:
+ break;
+ case IB_CM_TIMEWAIT:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_DREQ_COUNTER]);
+ if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+ goto unlock;
+
+ cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
+ cm_id_priv->private_data,
+ cm_id_priv->private_data_len);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ib_post_send_mad(msg, NULL))
+ cm_free_msg(msg);
+ goto deref;
+ case IB_CM_DREQ_RCVD:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_DREQ_COUNTER]);
+ goto unlock;
+ default:
+ goto unlock;
+ }
+ cm_id_priv->id.state = IB_CM_DREQ_RCVD;
+ cm_id_priv->tid = dreq_msg->hdr.tid;
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+
+unlock: spin_unlock_irq(&cm_id_priv->lock);
+deref: cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+static int cm_drep_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_drep_msg *drep_msg;
+ int ret;
+
+ drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
+ drep_msg->local_comm_id);
+ if (!cm_id_priv)
+ return -EINVAL;
+
+ work->cm_event.private_data = &drep_msg->private_data;
+
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
+ cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ goto out;
+ }
+ cm_enter_timewait(cm_id_priv);
+
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+out:
+ cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+int ib_send_cm_rej(struct ib_cm_id *cm_id,
+ enum ib_cm_rej_reason reason,
+ void *ari,
+ u8 ari_length,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ unsigned long flags;
+ int ret;
+
+ if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
+ (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
+ return -EINVAL;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id->state) {
+ case IB_CM_REQ_SENT:
+ case IB_CM_MRA_REQ_RCVD:
+ case IB_CM_REQ_RCVD:
+ case IB_CM_MRA_REQ_SENT:
+ case IB_CM_REP_RCVD:
+ case IB_CM_MRA_REP_SENT:
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (!ret)
+ cm_format_rej((struct cm_rej_msg *) msg->mad,
+ cm_id_priv, reason, ari, ari_length,
+ private_data, private_data_len);
+
+ cm_reset_to_idle(cm_id_priv);
+ break;
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (!ret)
+ cm_format_rej((struct cm_rej_msg *) msg->mad,
+ cm_id_priv, reason, ari, ari_length,
+ private_data, private_data_len);
+
+ cm_enter_timewait(cm_id_priv);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (ret)
+ goto out;
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ cm_free_msg(msg);
+
+out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_rej);
+
+static void cm_format_rej_event(struct cm_work *work)
+{
+ struct cm_rej_msg *rej_msg;
+ struct ib_cm_rej_event_param *param;
+
+ rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
+ param = &work->cm_event.param.rej_rcvd;
+ param->ari = rej_msg->ari;
+ param->ari_length = cm_rej_get_reject_info_len(rej_msg);
+ param->reason = __be16_to_cpu(rej_msg->reason);
+ work->cm_event.private_data = &rej_msg->private_data;
+}
+
+static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
+{
+ struct cm_timewait_info *timewait_info;
+ struct cm_id_private *cm_id_priv;
+ __be32 remote_id;
+
+ remote_id = rej_msg->local_comm_id;
+
+ if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
+ spin_lock_irq(&cm.lock);
+ timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
+ remote_id);
+ if (!timewait_info) {
+ spin_unlock_irq(&cm.lock);
+ return NULL;
+ }
+ cm_id_priv = idr_find(&cm.local_id_table, (__force int)
+ (timewait_info->work.local_id ^
+ cm.random_id_operand));
+ if (cm_id_priv) {
+ if (cm_id_priv->id.remote_id == remote_id)
+ atomic_inc(&cm_id_priv->refcount);
+ else
+ cm_id_priv = NULL;
+ }
+ spin_unlock_irq(&cm.lock);
+ } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
+ cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
+ else
+ cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
+
+ return cm_id_priv;
+}
+
+static int cm_rej_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_rej_msg *rej_msg;
+ int ret;
+
+ rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_rejected_id(rej_msg);
+ if (!cm_id_priv)
+ return -EINVAL;
+
+ cm_format_rej_event(work);
+
+ spin_lock_irq(&cm_id_priv->lock);
+ switch (cm_id_priv->id.state) {
+ case IB_CM_REQ_SENT:
+ case IB_CM_MRA_REQ_RCVD:
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ /* fall through */
+ case IB_CM_REQ_RCVD:
+ case IB_CM_MRA_REQ_SENT:
+ if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
+ cm_enter_timewait(cm_id_priv);
+ else
+ cm_reset_to_idle(cm_id_priv);
+ break;
+ case IB_CM_DREQ_SENT:
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ /* fall through */
+ case IB_CM_REP_RCVD:
+ case IB_CM_MRA_REP_SENT:
+ case IB_CM_ESTABLISHED:
+ cm_enter_timewait(cm_id_priv);
+ break;
+ default:
+ spin_unlock_irq(&cm_id_priv->lock);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+out:
+ cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+int ib_send_cm_mra(struct ib_cm_id *cm_id,
+ u8 service_timeout,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ enum ib_cm_state cm_state;
+ enum ib_cm_lap_state lap_state;
+ enum cm_msg_response msg_response;
+ void *data;
+ unsigned long flags;
+ int ret;
+
+ if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
+ return -EINVAL;
+
+ data = cm_copy_private_data(private_data, private_data_len);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch(cm_id_priv->id.state) {
+ case IB_CM_REQ_RCVD:
+ cm_state = IB_CM_MRA_REQ_SENT;
+ lap_state = cm_id->lap_state;
+ msg_response = CM_MSG_RESPONSE_REQ;
+ break;
+ case IB_CM_REP_RCVD:
+ cm_state = IB_CM_MRA_REP_SENT;
+ lap_state = cm_id->lap_state;
+ msg_response = CM_MSG_RESPONSE_REP;
+ break;
+ case IB_CM_ESTABLISHED:
+ if (cm_id->lap_state == IB_CM_LAP_RCVD) {
+ cm_state = cm_id->state;
+ lap_state = IB_CM_MRA_LAP_SENT;
+ msg_response = CM_MSG_RESPONSE_OTHER;
+ break;
+ }
+ default:
+ ret = -EINVAL;
+ goto error1;
+ }
+
+ if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (ret)
+ goto error1;
+
+ cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+ msg_response, service_timeout,
+ private_data, private_data_len);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto error2;
+ }
+
+ cm_id->state = cm_state;
+ cm_id->lap_state = lap_state;
+ cm_id_priv->service_timeout = service_timeout;
+ cm_set_private_data(cm_id_priv, data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return 0;
+
+error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ kfree(data);
+ return ret;
+
+error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ kfree(data);
+ cm_free_msg(msg);
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_mra);
+
+static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
+{
+ switch (cm_mra_get_msg_mraed(mra_msg)) {
+ case CM_MSG_RESPONSE_REQ:
+ return cm_acquire_id(mra_msg->remote_comm_id, 0);
+ case CM_MSG_RESPONSE_REP:
+ case CM_MSG_RESPONSE_OTHER:
+ return cm_acquire_id(mra_msg->remote_comm_id,
+ mra_msg->local_comm_id);
+ default:
+ return NULL;
+ }
+}
+
+static int cm_mra_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_mra_msg *mra_msg;
+ int timeout, ret;
+
+ mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_mraed_id(mra_msg);
+ if (!cm_id_priv)
+ return -EINVAL;
+
+ work->cm_event.private_data = &mra_msg->private_data;
+ work->cm_event.param.mra_rcvd.service_timeout =
+ cm_mra_get_service_timeout(mra_msg);
+ timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
+ cm_convert_to_ms(cm_id_priv->av.timeout);
+ if (timeout > cm_convert_to_ms(max_timeout)) {
+ printk(KERN_WARNING PFX "calculated mra timeout %d > %d, "
+ "decreasing used timeout_ms\n", timeout,
+ cm_convert_to_ms(max_timeout));
+ timeout = cm_convert_to_ms(max_timeout);
+ }
+
+ spin_lock_irq(&cm_id_priv->lock);
+ switch (cm_id_priv->id.state) {
+ case IB_CM_REQ_SENT:
+ if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
+ ib_modify_mad(cm_id_priv->av.port->mad_agent,
+ cm_id_priv->msg, timeout))
+ goto out;
+ cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
+ break;
+ case IB_CM_REP_SENT:
+ if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
+ ib_modify_mad(cm_id_priv->av.port->mad_agent,
+ cm_id_priv->msg, timeout))
+ goto out;
+ cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
+ break;
+ case IB_CM_ESTABLISHED:
+ if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
+ cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
+ ib_modify_mad(cm_id_priv->av.port->mad_agent,
+ cm_id_priv->msg, timeout)) {
+ if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ atomic_long_inc(&work->port->
+ counter_group[CM_RECV_DUPLICATES].
+ counter[CM_MRA_COUNTER]);
+ goto out;
+ }
+ cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
+ break;
+ case IB_CM_MRA_REQ_RCVD:
+ case IB_CM_MRA_REP_RCVD:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_MRA_COUNTER]);
+ /* fall through */
+ default:
+ goto out;
+ }
+
+ cm_id_priv->msg->context[1] = (void *) (unsigned long)
+ cm_id_priv->id.state;
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+out:
+ spin_unlock_irq(&cm_id_priv->lock);
+ cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+static void cm_format_lap(struct cm_lap_msg *lap_msg,
+ struct cm_id_private *cm_id_priv,
+ struct ib_sa_path_rec *alternate_path,
+ const void *private_data,
+ u8 private_data_len)
+{
+ cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
+ cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
+ lap_msg->local_comm_id = cm_id_priv->id.local_id;
+ lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
+ /* todo: need remote CM response timeout */
+ cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
+ lap_msg->alt_local_lid = alternate_path->slid;
+ lap_msg->alt_remote_lid = alternate_path->dlid;
+ lap_msg->alt_local_gid = alternate_path->sgid;
+ lap_msg->alt_remote_gid = alternate_path->dgid;
+ cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
+ cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
+ lap_msg->alt_hop_limit = alternate_path->hop_limit;
+ cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
+ cm_lap_set_sl(lap_msg, alternate_path->sl);
+ cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
+ cm_lap_set_local_ack_timeout(lap_msg,
+ cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
+ alternate_path->packet_life_time));
+
+ if (private_data && private_data_len)
+ memcpy(lap_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_lap(struct ib_cm_id *cm_id,
+ struct ib_sa_path_rec *alternate_path,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ unsigned long flags;
+ int ret;
+
+ if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
+ return -EINVAL;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state != IB_CM_ESTABLISHED ||
+ (cm_id->lap_state != IB_CM_LAP_UNINIT &&
+ cm_id->lap_state != IB_CM_LAP_IDLE)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
+ if (ret)
+ goto out;
+ cm_id_priv->alt_av.timeout =
+ cm_ack_timeout(cm_id_priv->target_ack_delay,
+ cm_id_priv->alt_av.timeout - 1);
+
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (ret)
+ goto out;
+
+ cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
+ alternate_path, private_data, private_data_len);
+ msg->timeout_ms = cm_id_priv->timeout_ms;
+ msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_free_msg(msg);
+ return ret;
+ }
+
+ cm_id->lap_state = IB_CM_LAP_SENT;
+ cm_id_priv->msg = msg;
+
+out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_lap);
+
+static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
+ struct ib_sa_path_rec *path,
+ struct cm_lap_msg *lap_msg)
+{
+ memset(path, 0, sizeof *path);
+ path->dgid = lap_msg->alt_local_gid;
+ path->sgid = lap_msg->alt_remote_gid;
+ path->dlid = lap_msg->alt_local_lid;
+ path->slid = lap_msg->alt_remote_lid;
+ path->flow_label = cm_lap_get_flow_label(lap_msg);
+ path->hop_limit = lap_msg->alt_hop_limit;
+ path->traffic_class = cm_lap_get_traffic_class(lap_msg);
+ path->reversible = 1;
+ path->pkey = cm_id_priv->pkey;
+ path->sl = cm_lap_get_sl(lap_msg);
+ path->mtu_selector = IB_SA_EQ;
+ path->mtu = cm_id_priv->path_mtu;
+ path->rate_selector = IB_SA_EQ;
+ path->rate = cm_lap_get_packet_rate(lap_msg);
+ path->packet_life_time_selector = IB_SA_EQ;
+ path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
+ path->packet_life_time -= (path->packet_life_time > 0);
+}
+
+static int cm_lap_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_lap_msg *lap_msg;
+ struct ib_cm_lap_event_param *param;
+ struct ib_mad_send_buf *msg = NULL;
+ int ret;
+
+ /* todo: verify LAP request and send reject APR if invalid. */
+ lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
+ lap_msg->local_comm_id);
+ if (!cm_id_priv)
+ return -EINVAL;
+
+ param = &work->cm_event.param.lap_rcvd;
+ param->alternate_path = &work->path[0];
+ cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
+ work->cm_event.private_data = &lap_msg->private_data;
+
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
+ goto unlock;
+
+ switch (cm_id_priv->id.lap_state) {
+ case IB_CM_LAP_UNINIT:
+ case IB_CM_LAP_IDLE:
+ break;
+ case IB_CM_MRA_LAP_SENT:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_LAP_COUNTER]);
+ if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+ goto unlock;
+
+ cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+ CM_MSG_RESPONSE_OTHER,
+ cm_id_priv->service_timeout,
+ cm_id_priv->private_data,
+ cm_id_priv->private_data_len);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ib_post_send_mad(msg, NULL))
+ cm_free_msg(msg);
+ goto deref;
+ case IB_CM_LAP_RCVD:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_LAP_COUNTER]);
+ goto unlock;
+ default:
+ goto unlock;
+ }
+
+ cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
+ cm_id_priv->tid = lap_msg->hdr.tid;
+ cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+
+unlock: spin_unlock_irq(&cm_id_priv->lock);
+deref: cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+static void cm_format_apr(struct cm_apr_msg *apr_msg,
+ struct cm_id_private *cm_id_priv,
+ enum ib_cm_apr_status status,
+ void *info,
+ u8 info_length,
+ const void *private_data,
+ u8 private_data_len)
+{
+ cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
+ apr_msg->local_comm_id = cm_id_priv->id.local_id;
+ apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ apr_msg->ap_status = (u8) status;
+
+ if (info && info_length) {
+ apr_msg->info_length = info_length;
+ memcpy(apr_msg->info, info, info_length);
+ }
+
+ if (private_data && private_data_len)
+ memcpy(apr_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_apr(struct ib_cm_id *cm_id,
+ enum ib_cm_apr_status status,
+ void *info,
+ u8 info_length,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ unsigned long flags;
+ int ret;
+
+ if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
+ (info && info_length > IB_CM_APR_INFO_LENGTH))
+ return -EINVAL;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state != IB_CM_ESTABLISHED ||
+ (cm_id->lap_state != IB_CM_LAP_RCVD &&
+ cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (ret)
+ goto out;
+
+ cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
+ info, info_length, private_data, private_data_len);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_free_msg(msg);
+ return ret;
+ }
+
+ cm_id->lap_state = IB_CM_LAP_IDLE;
+out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_apr);
+
+static int cm_apr_handler(struct cm_work *work)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_apr_msg *apr_msg;
+ int ret;
+
+ apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
+ apr_msg->local_comm_id);
+ if (!cm_id_priv)
+ return -EINVAL; /* Unmatched reply. */
+
+ work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
+ work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
+ work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
+ work->cm_event.private_data = &apr_msg->private_data;
+
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
+ (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
+ cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ goto out;
+ }
+ cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ cm_id_priv->msg = NULL;
+
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+out:
+ cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+static int cm_timewait_handler(struct cm_work *work)
+{
+ struct cm_timewait_info *timewait_info;
+ struct cm_id_private *cm_id_priv;
+ int ret;
+
+ timewait_info = (struct cm_timewait_info *)work;
+ spin_lock_irq(&cm.lock);
+ list_del(&timewait_info->list);
+ spin_unlock_irq(&cm.lock);
+
+ cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+ if (!cm_id_priv)
+ return -EINVAL;
+
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
+ cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ goto out;
+ }
+ cm_id_priv->id.state = IB_CM_IDLE;
+ ret = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!ret)
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (ret)
+ cm_process_work(cm_id_priv, work);
+ else
+ cm_deref_id(cm_id_priv);
+ return 0;
+out:
+ cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
+ struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_req_param *param)
+{
+ cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
+ cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
+ sidr_req_msg->request_id = cm_id_priv->id.local_id;
+ sidr_req_msg->pkey = param->path->pkey;
+ sidr_req_msg->service_id = param->service_id;
+
+ if (param->private_data && param->private_data_len)
+ memcpy(sidr_req_msg->private_data, param->private_data,
+ param->private_data_len);
+}
+
+int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
+ struct ib_cm_sidr_req_param *param)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ unsigned long flags;
+ int ret;
+
+ if (!param->path || (param->private_data &&
+ param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
+ return -EINVAL;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
+ if (ret)
+ goto out;
+
+ cm_id->service_id = param->service_id;
+ cm_id->service_mask = ~cpu_to_be64(0);
+ cm_id_priv->timeout_ms = param->timeout_ms;
+ if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
+ printk(KERN_WARNING PFX "sidr req timeout_ms %d > %d, "
+ "decreasing used timeout_ms\n", param->timeout_ms,
+ cm_convert_to_ms(max_timeout));
+ cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+ }
+ cm_id_priv->max_cm_retries = param->max_cm_retries;
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (ret)
+ goto out;
+
+ cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
+ param);
+ msg->timeout_ms = cm_id_priv->timeout_ms;
+ msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state == IB_CM_IDLE)
+ ret = ib_post_send_mad(msg, NULL);
+ else
+ ret = -EINVAL;
+
+ if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_free_msg(msg);
+ goto out;
+ }
+ cm_id->state = IB_CM_SIDR_REQ_SENT;
+ cm_id_priv->msg = msg;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+out:
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_sidr_req);
+
+static void cm_format_sidr_req_event(struct cm_work *work,
+ struct ib_cm_id *listen_id)
+{
+ struct cm_sidr_req_msg *sidr_req_msg;
+ struct ib_cm_sidr_req_event_param *param;
+
+ sidr_req_msg = (struct cm_sidr_req_msg *)
+ work->mad_recv_wc->recv_buf.mad;
+ param = &work->cm_event.param.sidr_req_rcvd;
+ param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
+ param->listen_id = listen_id;
+ param->port = work->port->port_num;
+ work->cm_event.private_data = &sidr_req_msg->private_data;
+}
+
+static int cm_sidr_req_handler(struct cm_work *work)
+{
+ struct ib_cm_id *cm_id;
+ struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+ struct cm_sidr_req_msg *sidr_req_msg;
+ struct ib_wc *wc;
+
+ cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id))
+ return PTR_ERR(cm_id);
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+
+ /* Record SGID/SLID and request ID for lookup. */
+ sidr_req_msg = (struct cm_sidr_req_msg *)
+ work->mad_recv_wc->recv_buf.mad;
+ wc = work->mad_recv_wc->wc;
+ cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
+ cm_id_priv->av.dgid.global.interface_id = 0;
+ cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ cm_id_priv->id.remote_id = sidr_req_msg->request_id;
+ cm_id_priv->tid = sidr_req_msg->hdr.tid;
+ atomic_inc(&cm_id_priv->work_count);
+
+ spin_lock_irq(&cm.lock);
+ cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
+ if (cur_cm_id_priv) {
+ spin_unlock_irq(&cm.lock);
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_SIDR_REQ_COUNTER]);
+ goto out; /* Duplicate message. */
+ }
+ cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
+ cur_cm_id_priv = cm_find_listen(cm_id->device,
+ sidr_req_msg->service_id,
+ sidr_req_msg->private_data);
+ if (!cur_cm_id_priv) {
+ spin_unlock_irq(&cm.lock);
+ cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
+ goto out; /* No match. */
+ }
+ atomic_inc(&cur_cm_id_priv->refcount);
+ spin_unlock_irq(&cm.lock);
+
+ cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = cur_cm_id_priv->id.context;
+ cm_id_priv->id.service_id = sidr_req_msg->service_id;
+ cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+
+ cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
+ cm_process_work(cm_id_priv, work);
+ cm_deref_id(cur_cm_id_priv);
+ return 0;
+out:
+ ib_destroy_cm_id(&cm_id_priv->id);
+ return -EINVAL;
+}
+
+static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
+ struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_rep_param *param)
+{
+ cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
+ cm_id_priv->tid);
+ sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
+ sidr_rep_msg->status = param->status;
+ cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
+ sidr_rep_msg->service_id = cm_id_priv->id.service_id;
+ sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
+
+ if (param->info && param->info_length)
+ memcpy(sidr_rep_msg->info, param->info, param->info_length);
+
+ if (param->private_data && param->private_data_len)
+ memcpy(sidr_rep_msg->private_data, param->private_data,
+ param->private_data_len);
+}
+
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+ struct ib_cm_sidr_rep_param *param)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
+ unsigned long flags;
+ int ret;
+
+ if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
+ (param->private_data &&
+ param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
+ return -EINVAL;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (ret)
+ goto error;
+
+ cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
+ param);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_free_msg(msg);
+ return ret;
+ }
+ cm_id->state = IB_CM_IDLE;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ spin_lock_irqsave(&cm.lock, flags);
+ rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return 0;
+
+error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_sidr_rep);
+
+static void cm_format_sidr_rep_event(struct cm_work *work)
+{
+ struct cm_sidr_rep_msg *sidr_rep_msg;
+ struct ib_cm_sidr_rep_event_param *param;
+
+ sidr_rep_msg = (struct cm_sidr_rep_msg *)
+ work->mad_recv_wc->recv_buf.mad;
+ param = &work->cm_event.param.sidr_rep_rcvd;
+ param->status = sidr_rep_msg->status;
+ param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
+ param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
+ param->info = &sidr_rep_msg->info;
+ param->info_len = sidr_rep_msg->info_length;
+ work->cm_event.private_data = &sidr_rep_msg->private_data;
+}
+
+static int cm_sidr_rep_handler(struct cm_work *work)
+{
+ struct cm_sidr_rep_msg *sidr_rep_msg;
+ struct cm_id_private *cm_id_priv;
+
+ sidr_rep_msg = (struct cm_sidr_rep_msg *)
+ work->mad_recv_wc->recv_buf.mad;
+ cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
+ if (!cm_id_priv)
+ return -EINVAL; /* Unmatched reply. */
+
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ goto out;
+ }
+ cm_id_priv->id.state = IB_CM_IDLE;
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ cm_format_sidr_rep_event(work);
+ cm_process_work(cm_id_priv, work);
+ return 0;
+out:
+ cm_deref_id(cm_id_priv);
+ return -EINVAL;
+}
+
+static void cm_process_send_error(struct ib_mad_send_buf *msg,
+ enum ib_wc_status wc_status)
+{
+ struct cm_id_private *cm_id_priv;
+ struct ib_cm_event cm_event;
+ enum ib_cm_state state;
+ int ret;
+
+ memset(&cm_event, 0, sizeof cm_event);
+ cm_id_priv = msg->context[0];
+
+ /* Discard old sends or ones without a response. */
+ spin_lock_irq(&cm_id_priv->lock);
+ state = (enum ib_cm_state) (unsigned long) msg->context[1];
+ if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
+ goto discard;
+
+ switch (state) {
+ case IB_CM_REQ_SENT:
+ case IB_CM_MRA_REQ_RCVD:
+ cm_reset_to_idle(cm_id_priv);
+ cm_event.event = IB_CM_REQ_ERROR;
+ break;
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ cm_reset_to_idle(cm_id_priv);
+ cm_event.event = IB_CM_REP_ERROR;
+ break;
+ case IB_CM_DREQ_SENT:
+ cm_enter_timewait(cm_id_priv);
+ cm_event.event = IB_CM_DREQ_ERROR;
+ break;
+ case IB_CM_SIDR_REQ_SENT:
+ cm_id_priv->id.state = IB_CM_IDLE;
+ cm_event.event = IB_CM_SIDR_REQ_ERROR;
+ break;
+ default:
+ goto discard;
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
+ cm_event.param.send_status = wc_status;
+
+ /* No other events can occur on the cm_id at this point. */
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
+ cm_free_msg(msg);
+ if (ret)
+ ib_destroy_cm_id(&cm_id_priv->id);
+ return;
+discard:
+ spin_unlock_irq(&cm_id_priv->lock);
+ cm_free_msg(msg);
+}
+
+static void cm_send_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+ struct cm_port *port;
+ u16 attr_index;
+
+ port = mad_agent->context;
+ attr_index = be16_to_cpu(((struct ib_mad_hdr *)
+ msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
+
+ /*
+ * If the send was in response to a received message (context[0] is not
+ * set to a cm_id), and is not a REJ, then it is a send that was
+ * manually retried.
+ */
+ if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
+ msg->retries = 1;
+
+ atomic_long_add(1 + msg->retries,
+ &port->counter_group[CM_XMIT].counter[attr_index]);
+ if (msg->retries)
+ atomic_long_add(msg->retries,
+ &port->counter_group[CM_XMIT_RETRIES].
+ counter[attr_index]);
+
+ switch (mad_send_wc->status) {
+ case IB_WC_SUCCESS:
+ case IB_WC_WR_FLUSH_ERR:
+ cm_free_msg(msg);
+ break;
+ default:
+ if (msg->context[0] && msg->context[1])
+ cm_process_send_error(msg, mad_send_wc->status);
+ else
+ cm_free_msg(msg);
+ break;
+ }
+}
+
+static void cm_work_handler(struct work_struct *_work)
+{
+ struct cm_work *work = container_of(_work, struct cm_work, work.work);
+ int ret;
+
+ switch (work->cm_event.event) {
+ case IB_CM_REQ_RECEIVED:
+ ret = cm_req_handler(work);
+ break;
+ case IB_CM_MRA_RECEIVED:
+ ret = cm_mra_handler(work);
+ break;
+ case IB_CM_REJ_RECEIVED:
+ ret = cm_rej_handler(work);
+ break;
+ case IB_CM_REP_RECEIVED:
+ ret = cm_rep_handler(work);
+ break;
+ case IB_CM_RTU_RECEIVED:
+ ret = cm_rtu_handler(work);
+ break;
+ case IB_CM_USER_ESTABLISHED:
+ ret = cm_establish_handler(work);
+ break;
+ case IB_CM_DREQ_RECEIVED:
+ ret = cm_dreq_handler(work);
+ break;
+ case IB_CM_DREP_RECEIVED:
+ ret = cm_drep_handler(work);
+ break;
+ case IB_CM_SIDR_REQ_RECEIVED:
+ ret = cm_sidr_req_handler(work);
+ break;
+ case IB_CM_SIDR_REP_RECEIVED:
+ ret = cm_sidr_rep_handler(work);
+ break;
+ case IB_CM_LAP_RECEIVED:
+ ret = cm_lap_handler(work);
+ break;
+ case IB_CM_APR_RECEIVED:
+ ret = cm_apr_handler(work);
+ break;
+ case IB_CM_TIMEWAIT_EXIT:
+ ret = cm_timewait_handler(work);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ if (ret)
+ cm_free_work(work);
+}
+
+static int cm_establish(struct ib_cm_id *cm_id)
+{
+ struct cm_id_private *cm_id_priv;
+ struct cm_work *work;
+ unsigned long flags;
+ int ret = 0;
+
+ work = kmalloc(sizeof *work, GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id->state)
+ {
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ cm_id->state = IB_CM_ESTABLISHED;
+ break;
+ case IB_CM_ESTABLISHED:
+ ret = -EISCONN;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ if (ret) {
+ kfree(work);
+ goto out;
+ }
+
+ /*
+ * The CM worker thread may try to destroy the cm_id before it
+ * can execute this work item. To prevent potential deadlock,
+ * we need to find the cm_id once we're in the context of the
+ * worker thread, rather than holding a reference on it.
+ */
+ INIT_DELAYED_WORK(&work->work, cm_work_handler);
+ work->local_id = cm_id->local_id;
+ work->remote_id = cm_id->remote_id;
+ work->mad_recv_wc = NULL;
+ work->cm_event.event = IB_CM_USER_ESTABLISHED;
+ queue_delayed_work(cm.wq, &work->work, 0);
+out:
+ return ret;
+}
+
+static int cm_migrate(struct ib_cm_id *cm_id)
+{
+ struct cm_id_private *cm_id_priv;
+ unsigned long flags;
+ int ret = 0;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state == IB_CM_ESTABLISHED &&
+ (cm_id->lap_state == IB_CM_LAP_UNINIT ||
+ cm_id->lap_state == IB_CM_LAP_IDLE)) {
+ cm_id->lap_state = IB_CM_LAP_IDLE;
+ cm_id_priv->av = cm_id_priv->alt_av;
+ } else
+ ret = -EINVAL;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ return ret;
+}
+
+int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
+{
+ int ret;
+
+ switch (event) {
+ case IB_EVENT_COMM_EST:
+ ret = cm_establish(cm_id);
+ break;
+ case IB_EVENT_PATH_MIG:
+ ret = cm_migrate(cm_id);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(ib_cm_notify);
+
+static void cm_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct cm_port *port = mad_agent->context;
+ struct cm_work *work;
+ enum ib_cm_event_type event;
+ u16 attr_id;
+ int paths = 0;
+
+ switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
+ case CM_REQ_ATTR_ID:
+ paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
+ alt_local_lid != 0);
+ event = IB_CM_REQ_RECEIVED;
+ break;
+ case CM_MRA_ATTR_ID:
+ event = IB_CM_MRA_RECEIVED;
+ break;
+ case CM_REJ_ATTR_ID:
+ event = IB_CM_REJ_RECEIVED;
+ break;
+ case CM_REP_ATTR_ID:
+ event = IB_CM_REP_RECEIVED;
+ break;
+ case CM_RTU_ATTR_ID:
+ event = IB_CM_RTU_RECEIVED;
+ break;
+ case CM_DREQ_ATTR_ID:
+ event = IB_CM_DREQ_RECEIVED;
+ break;
+ case CM_DREP_ATTR_ID:
+ event = IB_CM_DREP_RECEIVED;
+ break;
+ case CM_SIDR_REQ_ATTR_ID:
+ event = IB_CM_SIDR_REQ_RECEIVED;
+ break;
+ case CM_SIDR_REP_ATTR_ID:
+ event = IB_CM_SIDR_REP_RECEIVED;
+ break;
+ case CM_LAP_ATTR_ID:
+ paths = 1;
+ event = IB_CM_LAP_RECEIVED;
+ break;
+ case CM_APR_ATTR_ID:
+ event = IB_CM_APR_RECEIVED;
+ break;
+ default:
+ ib_free_recv_mad(mad_recv_wc);
+ return;
+ }
+
+ attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
+ atomic_long_inc(&port->counter_group[CM_RECV].
+ counter[attr_id - CM_ATTR_ID_OFFSET]);
+
+ work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
+ GFP_KERNEL);
+ if (!work) {
+ ib_free_recv_mad(mad_recv_wc);
+ return;
+ }
+
+ INIT_DELAYED_WORK(&work->work, cm_work_handler);
+ work->cm_event.event = event;
+ work->mad_recv_wc = mad_recv_wc;
+ work->port = port;
+ queue_delayed_work(cm.wq, &work->work, 0);
+}
+
+static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
+ struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id_priv->id.state) {
+ case IB_CM_REQ_SENT:
+ case IB_CM_MRA_REQ_RCVD:
+ case IB_CM_REQ_RCVD:
+ case IB_CM_MRA_REQ_SENT:
+ case IB_CM_REP_RCVD:
+ case IB_CM_MRA_REP_SENT:
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ case IB_CM_ESTABLISHED:
+ *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX | IB_QP_PORT;
+ qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
+ if (cm_id_priv->responder_resources)
+ qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_ATOMIC;
+ qp_attr->pkey_index = cm_id_priv->av.pkey_index;
+ qp_attr->port_num = cm_id_priv->av.port->port_num;
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+
+static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
+ struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id_priv->id.state) {
+ case IB_CM_REQ_RCVD:
+ case IB_CM_MRA_REQ_SENT:
+ case IB_CM_REP_RCVD:
+ case IB_CM_MRA_REP_SENT:
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ case IB_CM_ESTABLISHED:
+ *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN | IB_QP_RQ_PSN;
+ qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+ qp_attr->path_mtu = cm_id_priv->path_mtu;
+ qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
+ qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
+ if (cm_id_priv->qp_type == IB_QPT_RC) {
+ *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_MIN_RNR_TIMER;
+ qp_attr->max_dest_rd_atomic =
+ cm_id_priv->responder_resources;
+ qp_attr->min_rnr_timer = 0;
+ }
+ if (cm_id_priv->alt_av.ah_attr.dlid) {
+ *qp_attr_mask |= IB_QP_ALT_PATH;
+ qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+ qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
+ qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
+ qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
+ }
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+
+static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
+ struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id_priv->id.state) {
+ /* Allow transition to RTS before sending REP */
+ case IB_CM_REQ_RCVD:
+ case IB_CM_MRA_REQ_SENT:
+
+ case IB_CM_REP_RCVD:
+ case IB_CM_MRA_REP_SENT:
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ case IB_CM_ESTABLISHED:
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
+ *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
+ qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
+ if (cm_id_priv->qp_type == IB_QPT_RC) {
+ *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC;
+ qp_attr->timeout = cm_id_priv->av.timeout;
+ qp_attr->retry_cnt = cm_id_priv->retry_count;
+ qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
+ qp_attr->max_rd_atomic =
+ cm_id_priv->initiator_depth;
+ }
+ if (cm_id_priv->alt_av.ah_attr.dlid) {
+ *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
+ qp_attr->path_mig_state = IB_MIG_REARM;
+ }
+ } else {
+ *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
+ qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+ qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
+ qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
+ qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
+ qp_attr->path_mig_state = IB_MIG_REARM;
+ }
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+
+int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
+ struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask)
+{
+ struct cm_id_private *cm_id_priv;
+ int ret;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ switch (qp_attr->qp_state) {
+ case IB_QPS_INIT:
+ ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
+ break;
+ case IB_QPS_RTR:
+ ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
+ break;
+ case IB_QPS_RTS:
+ ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(ib_cm_init_qp_attr);
+
+static void cm_get_ack_delay(struct cm_device *cm_dev)
+{
+ struct ib_device_attr attr;
+
+ if (ib_query_device(cm_dev->ib_device, &attr))
+ cm_dev->ack_delay = 0; /* acks will rely on packet life time */
+ else
+ cm_dev->ack_delay = attr.local_ca_ack_delay;
+}
+
+static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
+ char *buf)
+{
+ struct cm_counter_group *group;
+ struct cm_counter_attribute *cm_attr;
+
+ group = container_of(obj, struct cm_counter_group, obj);
+ cm_attr = container_of(attr, struct cm_counter_attribute, attr);
+
+ return sprintf(buf, "%ld\n",
+ atomic_long_read(&group->counter[cm_attr->index]));
+}
+
+static struct sysfs_ops cm_counter_ops = {
+ .show = cm_show_counter
+};
+
+static struct kobj_type cm_counter_obj_type = {
+ .sysfs_ops = &cm_counter_ops,
+ .default_attrs = cm_counter_default_attrs
+};
+
+static void cm_release_port_obj(struct kobject *obj)
+{
+ struct cm_port *cm_port;
+
+ cm_port = container_of(obj, struct cm_port, port_obj);
+ kfree(cm_port);
+}
+
+static struct kobj_type cm_port_obj_type = {
+ .release = cm_release_port_obj
+};
+
+struct class cm_class = {
+ .name = "infiniband_cm",
+};
+EXPORT_SYMBOL(cm_class);
+
+static int cm_create_port_fs(struct cm_port *port)
+{
+ int i, ret;
+
+ ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
+ &port->cm_dev->device->kobj,
+ "%d", port->port_num);
+ if (ret) {
+ kfree(port);
+ return ret;
+ }
+
+ for (i = 0; i < CM_COUNTER_GROUPS; i++) {
+ ret = kobject_init_and_add(&port->counter_group[i].obj,
+ &cm_counter_obj_type,
+ &port->port_obj,
+ "%s", counter_group_names[i]);
+ if (ret)
+ goto error;
+ }
+
+ return 0;
+
+error:
+ while (i--)
+ kobject_put(&port->counter_group[i].obj);
+ kobject_put(&port->port_obj);
+ return ret;
+
+}
+
+static void cm_remove_port_fs(struct cm_port *port)
+{
+ int i;
+
+ for (i = 0; i < CM_COUNTER_GROUPS; i++)
+ kobject_put(&port->counter_group[i].obj);
+
+ kobject_put(&port->port_obj);
+}
+
+static void cm_add_one(struct ib_device *ib_device)
+{
+ struct cm_device *cm_dev;
+ struct cm_port *port;
+ struct ib_mad_reg_req reg_req = {
+ .mgmt_class = IB_MGMT_CLASS_CM,
+ .mgmt_class_version = IB_CM_CLASS_VERSION
+ };
+ struct ib_port_modify port_modify = {
+ .set_port_cap_mask = IB_PORT_CM_SUP
+ };
+ unsigned long flags;
+ int ret;
+ u8 i;
+
+ if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
+ ib_device->phys_port_cnt, GFP_KERNEL);
+ if (!cm_dev)
+ return;
+
+ cm_dev->ib_device = ib_device;
+ cm_get_ack_delay(cm_dev);
+
+ cm_dev->device = device_create(&cm_class, &ib_device->dev,
+ MKDEV(0, 0), NULL,
+ "%s", ib_device->name);
+ if (!cm_dev->device) {
+ kfree(cm_dev);
+ return;
+ }
+
+ set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
+ for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ port = kzalloc(sizeof *port, GFP_KERNEL);
+ if (!port)
+ goto error1;
+
+ cm_dev->port[i-1] = port;
+ port->cm_dev = cm_dev;
+ port->port_num = i;
+
+ ret = cm_create_port_fs(port);
+ if (ret)
+ goto error1;
+
+ port->mad_agent = ib_register_mad_agent(ib_device, i,
+ IB_QPT_GSI,
+ ®_req,
+ 0,
+ cm_send_handler,
+ cm_recv_handler,
+ port);
+ if (IS_ERR(port->mad_agent))
+ goto error2;
+
+ ret = ib_modify_port(ib_device, i, 0, &port_modify);
+ if (ret)
+ goto error3;
+ }
+ ib_set_client_data(ib_device, &cm_client, cm_dev);
+
+ write_lock_irqsave(&cm.device_lock, flags);
+ list_add_tail(&cm_dev->list, &cm.device_list);
+ write_unlock_irqrestore(&cm.device_lock, flags);
+ return;
+
+error3:
+ ib_unregister_mad_agent(port->mad_agent);
+error2:
+ cm_remove_port_fs(port);
+error1:
+ port_modify.set_port_cap_mask = 0;
+ port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
+ while (--i) {
+ port = cm_dev->port[i-1];
+ ib_modify_port(ib_device, port->port_num, 0, &port_modify);
+ ib_unregister_mad_agent(port->mad_agent);
+ cm_remove_port_fs(port);
+ }
+ device_unregister(cm_dev->device);
+ kfree(cm_dev);
+}
+
+static void cm_remove_one(struct ib_device *ib_device)
+{
+ struct cm_device *cm_dev;
+ struct cm_port *port;
+ struct ib_port_modify port_modify = {
+ .clr_port_cap_mask = IB_PORT_CM_SUP
+ };
+ unsigned long flags;
+ int i;
+
+ cm_dev = ib_get_client_data(ib_device, &cm_client);
+ if (!cm_dev)
+ return;
+
+ write_lock_irqsave(&cm.device_lock, flags);
+ list_del(&cm_dev->list);
+ write_unlock_irqrestore(&cm.device_lock, flags);
+
+ for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ port = cm_dev->port[i-1];
+ ib_modify_port(ib_device, port->port_num, 0, &port_modify);
+ ib_unregister_mad_agent(port->mad_agent);
+ flush_workqueue(cm.wq);
+ cm_remove_port_fs(port);
+ }
+ device_unregister(cm_dev->device);
+ kfree(cm_dev);
+}
+
+static int __init ib_cm_init(void)
+{
+ int ret;
+
+ memset(&cm, 0, sizeof cm);
+ INIT_LIST_HEAD(&cm.device_list);
+ rwlock_init(&cm.device_lock);
+ spin_lock_init(&cm.lock);
+ cm.listen_service_table = RB_ROOT;
+ cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
+ cm.remote_id_table = RB_ROOT;
+ cm.remote_qp_table = RB_ROOT;
+ cm.remote_sidr_table = RB_ROOT;
+ idr_init(&cm.local_id_table);
+ get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
+ idr_pre_get(&cm.local_id_table, GFP_KERNEL);
+ INIT_LIST_HEAD(&cm.timewait_list);
+
+ ret = class_register(&cm_class);
+ if (ret)
+ return -ENOMEM;
+
+ cm.wq = create_workqueue("ib_cm");
+ if (!cm.wq) {
+ ret = -ENOMEM;
+ goto error1;
+ }
+
+ ret = ib_register_client(&cm_client);
+ if (ret)
+ goto error2;
+
+ return 0;
+error2:
+ destroy_workqueue(cm.wq);
+error1:
+ class_unregister(&cm_class);
+ return ret;
+}
+
+static void __exit ib_cm_cleanup(void)
+{
+ struct cm_timewait_info *timewait_info, *tmp;
+
+ spin_lock_irq(&cm.lock);
+ list_for_each_entry(timewait_info, &cm.timewait_list, list)
+ cancel_delayed_work(&timewait_info->work.work);
+ spin_unlock_irq(&cm.lock);
+
+ ib_unregister_client(&cm_client);
+ destroy_workqueue(cm.wq);
+
+ list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
+ list_del(&timewait_info->list);
+ kfree(timewait_info);
+ }
+
+ class_unregister(&cm_class);
+ idr_destroy(&cm.local_id_table);
+}
+
+module_init_order(ib_cm_init, SI_ORDER_SECOND);
+module_exit_order(ib_cm_cleanup, SI_ORDER_FIRST);
+
Property changes on: trunk/sys/ofed/drivers/infiniband/core/cm.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/cm_msgs.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/cm_msgs.h (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/cm_msgs.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,819 @@
+/*
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING the madirectory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use source and binary forms, with or
+ * withmodification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retathe above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE
+ * SOFTWARE.
+ */
+#if !defined(CM_MSGS_H)
+#define CM_MSGS_H
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cm.h>
+
+/*
+ * Parameters to routines below should be in network-byte order, and values
+ * are returned in network-byte order.
+ */
+
+#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
+
+#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
+#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
+#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
+#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
+#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
+#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
+#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
+#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
+#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
+#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
+#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
+
+enum cm_msg_sequence {
+ CM_MSG_SEQUENCE_REQ,
+ CM_MSG_SEQUENCE_LAP,
+ CM_MSG_SEQUENCE_DREQ,
+ CM_MSG_SEQUENCE_SIDR
+};
+
+struct cm_req_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 rsvd4;
+ __be64 service_id;
+ __be64 local_ca_guid;
+ __be32 rsvd24;
+ __be32 local_qkey;
+ /* local QPN:24, responder resources:8 */
+ __be32 offset32;
+ /* local EECN:24, initiator depth:8 */
+ __be32 offset36;
+ /*
+ * remote EECN:24, remote CM response timeout:5,
+ * transport service type:2, end-to-end flow control:1
+ */
+ __be32 offset40;
+ /* starting PSN:24, local CM response timeout:5, retry count:3 */
+ __be32 offset44;
+ __be16 pkey;
+ /* path MTU:4, RDC exists:1, RNR retry count:3. */
+ u8 offset50;
+ /* max CM Retries:4, SRQ:1, rsvd:3 */
+ u8 offset51;
+
+ __be16 primary_local_lid;
+ __be16 primary_remote_lid;
+ union ib_gid primary_local_gid;
+ union ib_gid primary_remote_gid;
+ /* flow label:20, rsvd:6, packet rate:6 */
+ __be32 primary_offset88;
+ u8 primary_traffic_class;
+ u8 primary_hop_limit;
+ /* SL:4, subnet local:1, rsvd:3 */
+ u8 primary_offset94;
+ /* local ACK timeout:5, rsvd:3 */
+ u8 primary_offset95;
+
+ __be16 alt_local_lid;
+ __be16 alt_remote_lid;
+ union ib_gid alt_local_gid;
+ union ib_gid alt_remote_gid;
+ /* flow label:20, rsvd:6, packet rate:6 */
+ __be32 alt_offset132;
+ u8 alt_traffic_class;
+ u8 alt_hop_limit;
+ /* SL:4, subnet local:1, rsvd:3 */
+ u8 alt_offset138;
+ /* local ACK timeout:5, rsvd:3 */
+ u8 alt_offset139;
+
+ u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE];
+
+} __attribute__ ((packed));
+
+static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
+{
+ return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8);
+}
+
+static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn)
+{
+ req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
+ (be32_to_cpu(req_msg->offset32) &
+ 0x000000FF));
+}
+
+static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg)
+{
+ return (u8) be32_to_cpu(req_msg->offset32);
+}
+
+static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res)
+{
+ req_msg->offset32 = cpu_to_be32(resp_res |
+ (be32_to_cpu(req_msg->offset32) &
+ 0xFFFFFF00));
+}
+
+static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg)
+{
+ return (u8) be32_to_cpu(req_msg->offset36);
+}
+
+static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg,
+ u8 init_depth)
+{
+ req_msg->offset36 = cpu_to_be32(init_depth |
+ (be32_to_cpu(req_msg->offset36) &
+ 0xFFFFFF00));
+}
+
+static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg)
+{
+ return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3);
+}
+
+static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg,
+ u8 resp_timeout)
+{
+ req_msg->offset40 = cpu_to_be32((resp_timeout << 3) |
+ (be32_to_cpu(req_msg->offset40) &
+ 0xFFFFFF07));
+}
+
+static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
+{
+ u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1;
+ switch(transport_type) {
+ case 0: return IB_QPT_RC;
+ case 1: return IB_QPT_UC;
+ default: return 0;
+ }
+}
+
+static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
+ enum ib_qp_type qp_type)
+{
+ switch(qp_type) {
+ case IB_QPT_UC:
+ req_msg->offset40 = cpu_to_be32((be32_to_cpu(
+ req_msg->offset40) &
+ 0xFFFFFFF9) | 0x2);
+ break;
+ default:
+ req_msg->offset40 = cpu_to_be32(be32_to_cpu(
+ req_msg->offset40) &
+ 0xFFFFFFF9);
+ }
+}
+
+static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg)
+{
+ return be32_to_cpu(req_msg->offset40) & 0x1;
+}
+
+static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg,
+ u8 flow_ctrl)
+{
+ req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) |
+ (be32_to_cpu(req_msg->offset40) &
+ 0xFFFFFFFE));
+}
+
+static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
+{
+ return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8);
+}
+
+static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg,
+ __be32 starting_psn)
+{
+ req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
+ (be32_to_cpu(req_msg->offset44) & 0x000000FF));
+}
+
+static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg)
+{
+ return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3);
+}
+
+static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg,
+ u8 resp_timeout)
+{
+ req_msg->offset44 = cpu_to_be32((resp_timeout << 3) |
+ (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07));
+}
+
+static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg)
+{
+ return (u8) (be32_to_cpu(req_msg->offset44) & 0x7);
+}
+
+static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg,
+ u8 retry_count)
+{
+ req_msg->offset44 = cpu_to_be32((retry_count & 0x7) |
+ (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8));
+}
+
+static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg)
+{
+ return req_msg->offset50 >> 4;
+}
+
+static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu)
+{
+ req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4));
+}
+
+static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg)
+{
+ return req_msg->offset50 & 0x7;
+}
+
+static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg,
+ u8 rnr_retry_count)
+{
+ req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) |
+ (rnr_retry_count & 0x7));
+}
+
+static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg)
+{
+ return req_msg->offset51 >> 4;
+}
+
+static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg,
+ u8 retries)
+{
+ req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4));
+}
+
+static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg)
+{
+ return (req_msg->offset51 & 0x8) >> 3;
+}
+
+static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq)
+{
+ req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) |
+ ((srq & 0x1) << 3));
+}
+
+static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
+{
+ return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12);
+}
+
+static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg,
+ __be32 flow_label)
+{
+ req_msg->primary_offset88 = cpu_to_be32(
+ (be32_to_cpu(req_msg->primary_offset88) &
+ 0x00000FFF) |
+ (be32_to_cpu(flow_label) << 12));
+}
+
+static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg)
+{
+ return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F);
+}
+
+static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg,
+ u8 rate)
+{
+ req_msg->primary_offset88 = cpu_to_be32(
+ (be32_to_cpu(req_msg->primary_offset88) &
+ 0xFFFFFFC0) | (rate & 0x3F));
+}
+
+static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg)
+{
+ return (u8) (req_msg->primary_offset94 >> 4);
+}
+
+static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl)
+{
+ req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) |
+ (sl << 4));
+}
+
+static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg)
+{
+ return (u8) ((req_msg->primary_offset94 & 0x08) >> 3);
+}
+
+static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg,
+ u8 subnet_local)
+{
+ req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) |
+ ((subnet_local & 0x1) << 3));
+}
+
+static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg)
+{
+ return (u8) (req_msg->primary_offset95 >> 3);
+}
+
+static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg,
+ u8 local_ack_timeout)
+{
+ req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) |
+ (local_ack_timeout << 3));
+}
+
+static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
+{
+ return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12);
+}
+
+static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg,
+ __be32 flow_label)
+{
+ req_msg->alt_offset132 = cpu_to_be32(
+ (be32_to_cpu(req_msg->alt_offset132) &
+ 0x00000FFF) |
+ (be32_to_cpu(flow_label) << 12));
+}
+
+static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg)
+{
+ return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F);
+}
+
+static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg,
+ u8 rate)
+{
+ req_msg->alt_offset132 = cpu_to_be32(
+ (be32_to_cpu(req_msg->alt_offset132) &
+ 0xFFFFFFC0) | (rate & 0x3F));
+}
+
+static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg)
+{
+ return (u8) (req_msg->alt_offset138 >> 4);
+}
+
+static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl)
+{
+ req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) |
+ (sl << 4));
+}
+
+static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg)
+{
+ return (u8) ((req_msg->alt_offset138 & 0x08) >> 3);
+}
+
+static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg,
+ u8 subnet_local)
+{
+ req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) |
+ ((subnet_local & 0x1) << 3));
+}
+
+static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg)
+{
+ return (u8) (req_msg->alt_offset139 >> 3);
+}
+
+static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg,
+ u8 local_ack_timeout)
+{
+ req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) |
+ (local_ack_timeout << 3));
+}
+
+/* Message REJected or MRAed */
+enum cm_msg_response {
+ CM_MSG_RESPONSE_REQ = 0x0,
+ CM_MSG_RESPONSE_REP = 0x1,
+ CM_MSG_RESPONSE_OTHER = 0x2
+};
+
+ struct cm_mra_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+ /* message MRAed:2, rsvd:6 */
+ u8 offset8;
+ /* service timeout:5, rsvd:3 */
+ u8 offset9;
+
+ u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE];
+
+} __attribute__ ((packed));
+
+static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg)
+{
+ return (u8) (mra_msg->offset8 >> 6);
+}
+
+static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg)
+{
+ mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6));
+}
+
+static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg)
+{
+ return (u8) (mra_msg->offset9 >> 3);
+}
+
+static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg,
+ u8 service_timeout)
+{
+ mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) |
+ (service_timeout << 3));
+}
+
+struct cm_rej_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+ /* message REJected:2, rsvd:6 */
+ u8 offset8;
+ /* reject info length:7, rsvd:1. */
+ u8 offset9;
+ __be16 reason;
+ u8 ari[IB_CM_REJ_ARI_LENGTH];
+
+ u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
+
+} __attribute__ ((packed));
+
+static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg)
+{
+ return (u8) (rej_msg->offset8 >> 6);
+}
+
+static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg)
+{
+ rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6));
+}
+
+static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg)
+{
+ return (u8) (rej_msg->offset9 >> 1);
+}
+
+static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg,
+ u8 len)
+{
+ rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1));
+}
+
+struct cm_rep_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+ __be32 local_qkey;
+ /* local QPN:24, rsvd:8 */
+ __be32 offset12;
+ /* local EECN:24, rsvd:8 */
+ __be32 offset16;
+ /* starting PSN:24 rsvd:8 */
+ __be32 offset20;
+ u8 resp_resources;
+ u8 initiator_depth;
+ /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */
+ u8 offset26;
+ /* RNR retry count:3, SRQ:1, rsvd:5 */
+ u8 offset27;
+ __be64 local_ca_guid;
+
+ u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
+
+} __attribute__ ((packed));
+
+static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
+{
+ return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8);
+}
+
+static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
+{
+ rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
+ (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
+}
+
+static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
+{
+ return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
+}
+
+static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg,
+ __be32 starting_psn)
+{
+ rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
+ (be32_to_cpu(rep_msg->offset20) & 0x000000FF));
+}
+
+static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg)
+{
+ return (u8) (rep_msg->offset26 >> 3);
+}
+
+static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg,
+ u8 target_ack_delay)
+{
+ rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) |
+ (target_ack_delay << 3));
+}
+
+static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg)
+{
+ return (u8) ((rep_msg->offset26 & 0x06) >> 1);
+}
+
+static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover)
+{
+ rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) |
+ ((failover & 0x3) << 1));
+}
+
+static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg)
+{
+ return (u8) (rep_msg->offset26 & 0x01);
+}
+
+static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg,
+ u8 flow_ctrl)
+{
+ rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) |
+ (flow_ctrl & 0x1));
+}
+
+static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg)
+{
+ return (u8) (rep_msg->offset27 >> 5);
+}
+
+static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg,
+ u8 rnr_retry_count)
+{
+ rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) |
+ (rnr_retry_count << 5));
+}
+
+static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg)
+{
+ return (u8) ((rep_msg->offset27 >> 4) & 0x1);
+}
+
+static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq)
+{
+ rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) |
+ ((srq & 0x1) << 4));
+}
+
+struct cm_rtu_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+
+ u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
+
+} __attribute__ ((packed));
+
+struct cm_dreq_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+ /* remote QPN/EECN:24, rsvd:8 */
+ __be32 offset8;
+
+ u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
+
+} __attribute__ ((packed));
+
+static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
+{
+ return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8);
+}
+
+static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn)
+{
+ dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
+ (be32_to_cpu(dreq_msg->offset8) & 0x000000FF));
+}
+
+struct cm_drep_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+
+ u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
+
+} __attribute__ ((packed));
+
+struct cm_lap_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+
+ __be32 rsvd8;
+ /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */
+ __be32 offset12;
+ __be32 rsvd16;
+
+ __be16 alt_local_lid;
+ __be16 alt_remote_lid;
+ union ib_gid alt_local_gid;
+ union ib_gid alt_remote_gid;
+ /* flow label:20, rsvd:4, traffic class:8 */
+ __be32 offset56;
+ u8 alt_hop_limit;
+ /* rsvd:2, packet rate:6 */
+ u8 offset61;
+ /* SL:4, subnet local:1, rsvd:3 */
+ u8 offset62;
+ /* local ACK timeout:5, rsvd:3 */
+ u8 offset63;
+
+ u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
+} __attribute__ ((packed));
+
+static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
+{
+ return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8);
+}
+
+static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn)
+{
+ lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
+ (be32_to_cpu(lap_msg->offset12) &
+ 0x000000FF));
+}
+
+static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg)
+{
+ return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3);
+}
+
+static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg,
+ u8 resp_timeout)
+{
+ lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) |
+ (be32_to_cpu(lap_msg->offset12) &
+ 0xFFFFFF07));
+}
+
+static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
+{
+ return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12);
+}
+
+static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg,
+ __be32 flow_label)
+{
+ lap_msg->offset56 = cpu_to_be32(
+ (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) |
+ (be32_to_cpu(flow_label) << 12));
+}
+
+static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg)
+{
+ return (u8) be32_to_cpu(lap_msg->offset56);
+}
+
+static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg,
+ u8 traffic_class)
+{
+ lap_msg->offset56 = cpu_to_be32(traffic_class |
+ (be32_to_cpu(lap_msg->offset56) &
+ 0xFFFFFF00));
+}
+
+static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg)
+{
+ return lap_msg->offset61 & 0x3F;
+}
+
+static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg,
+ u8 packet_rate)
+{
+ lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0);
+}
+
+static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg)
+{
+ return lap_msg->offset62 >> 4;
+}
+
+static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl)
+{
+ lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F);
+}
+
+static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg)
+{
+ return (lap_msg->offset62 >> 3) & 0x1;
+}
+
+static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg,
+ u8 subnet_local)
+{
+ lap_msg->offset62 = ((subnet_local & 0x1) << 3) |
+ (lap_msg->offset61 & 0xF7);
+}
+static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg)
+{
+ return lap_msg->offset63 >> 3;
+}
+
+static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg,
+ u8 local_ack_timeout)
+{
+ lap_msg->offset63 = (local_ack_timeout << 3) |
+ (lap_msg->offset63 & 0x07);
+}
+
+struct cm_apr_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+
+ u8 info_length;
+ u8 ap_status;
+ u8 info[IB_CM_APR_INFO_LENGTH];
+
+ u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
+} __attribute__ ((packed));
+
+struct cm_sidr_req_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 request_id;
+ __be16 pkey;
+ __be16 rsvd;
+ __be64 service_id;
+
+ u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE];
+} __attribute__ ((packed));
+
+struct cm_sidr_rep_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 request_id;
+ u8 status;
+ u8 info_length;
+ __be16 rsvd;
+ /* QPN:24, rsvd:8 */
+ __be32 offset8;
+ __be64 service_id;
+ __be32 qkey;
+ u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
+
+ u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
+} __attribute__ ((packed));
+
+static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
+{
+ return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8);
+}
+
+static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg,
+ __be32 qpn)
+{
+ sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
+ (be32_to_cpu(sidr_rep_msg->offset8) &
+ 0x000000FF));
+}
+
+#endif /* CM_MSGS_H */
Property changes on: trunk/sys/ofed/drivers/infiniband/core/cm_msgs.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/cma.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/cma.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/cma.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,3420 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/mutex.h>
+#include <linux/random.h>
+#include <linux/idr.h>
+#include <linux/inetdevice.h>
+
+#include <net/tcp.h>
+#include <net/ipv6.h>
+
+#include <rdma/rdma_cm.h>
+#include <rdma/rdma_cm_ib.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_cm.h>
+#include <rdma/ib_sa.h>
+#include <rdma/iw_cm.h>
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("Generic RDMA CM Agent");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static int tavor_quirk = 0;
+module_param_named(tavor_quirk, tavor_quirk, int, 0644);
+MODULE_PARM_DESC(tavor_quirk, "Tavor performance quirk: limit MTU to 1K if > 0");
+
+int unify_tcp_port_space = 1;
+module_param(unify_tcp_port_space, int, 0644);
+MODULE_PARM_DESC(unify_tcp_port_space, "Unify the host TCP and RDMA port "
+ "space allocation (default=1)");
+
+#define CMA_CM_RESPONSE_TIMEOUT 20
+#define CMA_MAX_CM_RETRIES 15
+#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
+#define IBOE_PACKET_LIFETIME 18
+
+static int cma_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
+module_param_named(cma_response_timeout, cma_response_timeout, int, 0644);
+MODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT default=20");
+
+static int def_prec2sl = 3;
+module_param_named(def_prec2sl, def_prec2sl, int, 0644);
+MODULE_PARM_DESC(def_prec2sl, "Default value for SL priority with RoCE. Valid values 0 - 7");
+
+static void cma_add_one(struct ib_device *device);
+static void cma_remove_one(struct ib_device *device);
+
+static struct ib_client cma_client = {
+ .name = "cma",
+ .add = cma_add_one,
+ .remove = cma_remove_one
+};
+
+static struct ib_sa_client sa_client;
+static struct rdma_addr_client addr_client;
+static LIST_HEAD(dev_list);
+static LIST_HEAD(listen_any_list);
+static DEFINE_MUTEX(lock);
+static struct workqueue_struct *cma_wq;
+static DEFINE_IDR(sdp_ps);
+static DEFINE_IDR(tcp_ps);
+static DEFINE_IDR(udp_ps);
+static DEFINE_IDR(ipoib_ps);
+#if defined(INET)
+static int next_port;
+#endif
+
+struct cma_device {
+ struct list_head list;
+ struct ib_device *device;
+ struct completion comp;
+ atomic_t refcount;
+ struct list_head id_list;
+};
+
+enum cma_state {
+ CMA_IDLE,
+ CMA_ADDR_QUERY,
+ CMA_ADDR_RESOLVED,
+ CMA_ROUTE_QUERY,
+ CMA_ROUTE_RESOLVED,
+ CMA_CONNECT,
+ CMA_DISCONNECT,
+ CMA_ADDR_BOUND,
+ CMA_LISTEN,
+ CMA_DEVICE_REMOVAL,
+ CMA_DESTROYING
+};
+
+struct rdma_bind_list {
+ struct idr *ps;
+ struct hlist_head owners;
+ unsigned short port;
+};
+
+/*
+ * Device removal can occur at anytime, so we need extra handling to
+ * serialize notifying the user of device removal with other callbacks.
+ * We do this by disabling removal notification while a callback is in process,
+ * and reporting it after the callback completes.
+ */
+struct rdma_id_private {
+ struct rdma_cm_id id;
+
+ struct rdma_bind_list *bind_list;
+ struct socket *sock;
+ struct hlist_node node;
+ struct list_head list; /* listen_any_list or cma_device.list */
+ struct list_head listen_list; /* per device listens */
+ struct cma_device *cma_dev;
+ struct list_head mc_list;
+
+ int internal_id;
+ enum cma_state state;
+ spinlock_t lock;
+ struct mutex qp_mutex;
+
+ struct completion comp;
+ atomic_t refcount;
+ struct mutex handler_mutex;
+
+ int backlog;
+ int timeout_ms;
+ struct ib_sa_query *query;
+ int query_id;
+ union {
+ struct ib_cm_id *ib;
+ struct iw_cm_id *iw;
+ } cm_id;
+
+ u32 seq_num;
+ u32 qkey;
+ u32 qp_num;
+ u8 srq;
+ u8 tos;
+};
+
+struct cma_multicast {
+ struct rdma_id_private *id_priv;
+ union {
+ struct ib_sa_multicast *ib;
+ } multicast;
+ struct list_head list;
+ void *context;
+ struct sockaddr_storage addr;
+ struct kref mcref;
+};
+
+struct cma_work {
+ struct work_struct work;
+ struct rdma_id_private *id;
+ enum cma_state old_state;
+ enum cma_state new_state;
+ struct rdma_cm_event event;
+};
+
+struct cma_ndev_work {
+ struct work_struct work;
+ struct rdma_id_private *id;
+ struct rdma_cm_event event;
+};
+
+struct iboe_mcast_work {
+ struct work_struct work;
+ struct rdma_id_private *id;
+ struct cma_multicast *mc;
+};
+
+union cma_ip_addr {
+ struct in6_addr ip6;
+ struct {
+ __be32 pad[3];
+ __be32 addr;
+ } ip4;
+};
+
+struct cma_hdr {
+ u8 cma_version;
+ u8 ip_version; /* IP version: 7:4 */
+ __be16 port;
+ union cma_ip_addr src_addr;
+ union cma_ip_addr dst_addr;
+};
+
+struct sdp_hh {
+ u8 bsdh[16];
+ u8 sdp_version; /* Major version: 7:4 */
+ u8 ip_version; /* IP version: 7:4 */
+ u8 sdp_specific1[10];
+ __be16 port;
+ __be16 sdp_specific2;
+ union cma_ip_addr src_addr;
+ union cma_ip_addr dst_addr;
+};
+
+struct sdp_hah {
+ u8 bsdh[16];
+ u8 sdp_version;
+};
+
+#define CMA_VERSION 0x00
+#define SDP_MAJ_VERSION 0x2
+
+static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&id_priv->lock, flags);
+ ret = (id_priv->state == comp);
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return ret;
+}
+
+static int cma_comp_exch(struct rdma_id_private *id_priv,
+ enum cma_state comp, enum cma_state exch)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&id_priv->lock, flags);
+ if ((ret = (id_priv->state == comp)))
+ id_priv->state = exch;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return ret;
+}
+
+static enum cma_state cma_exch(struct rdma_id_private *id_priv,
+ enum cma_state exch)
+{
+ unsigned long flags;
+ enum cma_state old;
+
+ spin_lock_irqsave(&id_priv->lock, flags);
+ old = id_priv->state;
+ id_priv->state = exch;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return old;
+}
+
+static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
+{
+ return hdr->ip_version >> 4;
+}
+
+static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+{
+ hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
+}
+
+static inline u8 sdp_get_majv(u8 sdp_version)
+{
+ return sdp_version >> 4;
+}
+
+static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
+{
+ return hh->ip_version >> 4;
+}
+
+static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
+{
+ hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
+}
+
+static inline int cma_is_ud_ps(enum rdma_port_space ps)
+{
+ return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
+}
+
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev)
+{
+ atomic_inc(&cma_dev->refcount);
+ id_priv->cma_dev = cma_dev;
+ id_priv->id.device = cma_dev->device;
+ id_priv->id.route.addr.dev_addr.transport =
+ rdma_node_get_transport(cma_dev->device->node_type);
+ list_add_tail(&id_priv->list, &cma_dev->id_list);
+}
+
+static inline void cma_deref_dev(struct cma_device *cma_dev)
+{
+ if (atomic_dec_and_test(&cma_dev->refcount))
+ complete(&cma_dev->comp);
+}
+
+static inline void release_mc(struct kref *kref)
+{
+ struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
+
+ kfree(mc->multicast.ib);
+ kfree(mc);
+}
+
+static void cma_detach_from_dev(struct rdma_id_private *id_priv)
+{
+ list_del(&id_priv->list);
+ cma_deref_dev(id_priv->cma_dev);
+ id_priv->cma_dev = NULL;
+}
+
+static int cma_set_qkey(struct rdma_id_private *id_priv)
+{
+ struct ib_sa_mcmember_rec rec;
+ int ret = 0;
+
+ if (id_priv->qkey)
+ return 0;
+
+ switch (id_priv->id.ps) {
+ case RDMA_PS_UDP:
+ id_priv->qkey = RDMA_UDP_QKEY;
+ break;
+ case RDMA_PS_IPOIB:
+ ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
+ ret = ib_sa_get_mcmember_rec(id_priv->id.device,
+ id_priv->id.port_num, &rec.mgid,
+ &rec);
+ if (!ret)
+ id_priv->qkey = be32_to_cpu(rec.qkey);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct cma_device *cma_dev;
+ union ib_gid gid;
+ int ret = -ENODEV;
+
+ if (dev_addr->dev_type != ARPHRD_INFINIBAND) {
+ iboe_addr_get_sgid(dev_addr, &gid);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ ret = ib_find_cached_gid(cma_dev->device, &gid,
+ &id_priv->id.port_num, NULL);
+ if (!ret)
+ goto out;
+ }
+ }
+
+ memcpy(&gid, dev_addr->src_dev_addr +
+ rdma_addr_gid_offset(dev_addr), sizeof gid);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ ret = ib_find_cached_gid(cma_dev->device, &gid,
+ &id_priv->id.port_num, NULL);
+ if (!ret)
+ break;
+ }
+
+out:
+ if (!ret)
+ cma_attach_to_dev(id_priv, cma_dev);
+
+ return ret;
+}
+
+static void cma_deref_id(struct rdma_id_private *id_priv)
+{
+ if (atomic_dec_and_test(&id_priv->refcount))
+ complete(&id_priv->comp);
+}
+
+static int cma_disable_callback(struct rdma_id_private *id_priv,
+ enum cma_state state)
+{
+ mutex_lock(&id_priv->handler_mutex);
+ if (id_priv->state != state) {
+ mutex_unlock(&id_priv->handler_mutex);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int cma_has_cm_dev(struct rdma_id_private *id_priv)
+{
+ return (id_priv->id.device && id_priv->cm_id.ib);
+}
+
+struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+ void *context, enum rdma_port_space ps)
+{
+ struct rdma_id_private *id_priv;
+
+ id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
+ if (!id_priv)
+ return ERR_PTR(-ENOMEM);
+
+ id_priv->state = CMA_IDLE;
+ id_priv->id.context = context;
+ id_priv->id.event_handler = event_handler;
+ id_priv->id.ps = ps;
+ spin_lock_init(&id_priv->lock);
+ mutex_init(&id_priv->qp_mutex);
+ init_completion(&id_priv->comp);
+ atomic_set(&id_priv->refcount, 1);
+ mutex_init(&id_priv->handler_mutex);
+ INIT_LIST_HEAD(&id_priv->listen_list);
+ INIT_LIST_HEAD(&id_priv->mc_list);
+ get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
+
+ return &id_priv->id;
+}
+EXPORT_SYMBOL(rdma_create_id);
+
+static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+ if (ret)
+ return ret;
+
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret)
+ return ret;
+
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+ if (ret)
+ return ret;
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ qp_attr.sq_psn = 0;
+ ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+
+ return ret;
+}
+
+static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+ if (ret)
+ return ret;
+
+ return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+}
+
+int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct rdma_id_private *id_priv;
+ struct ib_qp *qp;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (id->device != pd->device)
+ return -EINVAL;
+
+ qp = ib_create_qp(pd, qp_init_attr);
+ if (IS_ERR(qp))
+ return PTR_ERR(qp);
+
+ if (cma_is_ud_ps(id_priv->id.ps))
+ ret = cma_init_ud_qp(id_priv, qp);
+ else
+ ret = cma_init_conn_qp(id_priv, qp);
+ if (ret)
+ goto err;
+
+ id->qp = qp;
+ id_priv->qp_num = qp->qp_num;
+ id_priv->srq = (qp->srq != NULL);
+ return 0;
+err:
+ ib_destroy_qp(qp);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_create_qp);
+
+void rdma_destroy_qp(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
+ ib_destroy_qp(id_priv->id.qp);
+ id_priv->id.qp = NULL;
+ mutex_unlock(&id_priv->qp_mutex);
+}
+EXPORT_SYMBOL(rdma_destroy_qp);
+
+static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
+{
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+
+ mutex_lock(&id_priv->qp_mutex);
+ if (!id_priv->id.qp) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Need to update QP attributes from default values. */
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+ if (ret)
+ goto out;
+
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+ if (ret)
+ goto out;
+
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+ if (ret)
+ goto out;
+
+ if (conn_param)
+ qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+ mutex_unlock(&id_priv->qp_mutex);
+ return ret;
+}
+
+static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
+{
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+
+ mutex_lock(&id_priv->qp_mutex);
+ if (!id_priv->id.qp) {
+ ret = 0;
+ goto out;
+ }
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+ if (ret)
+ goto out;
+
+ if (conn_param)
+ qp_attr.max_rd_atomic = conn_param->initiator_depth;
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+ mutex_unlock(&id_priv->qp_mutex);
+ return ret;
+}
+
+static int cma_modify_qp_err(struct rdma_id_private *id_priv)
+{
+ struct ib_qp_attr qp_attr;
+ int ret;
+
+ mutex_lock(&id_priv->qp_mutex);
+ if (!id_priv->id.qp) {
+ ret = 0;
+ goto out;
+ }
+
+ qp_attr.qp_state = IB_QPS_ERR;
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
+out:
+ mutex_unlock(&id_priv->qp_mutex);
+ return ret;
+}
+
+static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ int ret;
+ u16 pkey;
+
+ if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
+ IB_LINK_LAYER_INFINIBAND)
+ pkey = ib_addr_get_pkey(dev_addr);
+ else
+ pkey = 0xffff;
+
+ ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
+ pkey, &qp_attr->pkey_index);
+ if (ret)
+ return ret;
+
+ qp_attr->port_num = id_priv->id.port_num;
+ *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+ if (cma_is_ud_ps(id_priv->id.ps)) {
+ ret = cma_set_qkey(id_priv);
+ if (ret)
+ return ret;
+
+ qp_attr->qkey = id_priv->qkey;
+ *qp_attr_mask |= IB_QP_QKEY;
+ } else {
+ qp_attr->qp_access_flags = 0;
+ *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
+ }
+ return 0;
+}
+
+int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask)
+{
+ struct rdma_id_private *id_priv;
+ int ret = 0;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
+ ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
+ else
+ ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
+ qp_attr_mask);
+ if (qp_attr->qp_state == IB_QPS_RTR)
+ qp_attr->rq_psn = id_priv->seq_num;
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ if (!id_priv->cm_id.iw) {
+ qp_attr->qp_access_flags = 0;
+ *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+ } else
+ ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+ qp_attr_mask);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(rdma_init_qp_attr);
+
+static inline int cma_zero_addr(struct sockaddr *addr)
+{
+ struct in6_addr *ip6;
+
+ if (addr->sa_family == AF_INET)
+ return ipv4_is_zeronet(
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr);
+ else {
+ ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
+ return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
+ ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
+ }
+}
+
+static inline int cma_loopback_addr(struct sockaddr *addr)
+{
+ if (addr->sa_family == AF_INET)
+ return ipv4_is_loopback(
+ ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ else
+ return ipv6_addr_loopback(
+ &((struct sockaddr_in6 *) addr)->sin6_addr);
+}
+
+static inline int cma_any_addr(struct sockaddr *addr)
+{
+ return cma_zero_addr(addr) || cma_loopback_addr(addr);
+}
+
+static inline __be16 cma_port(struct sockaddr *addr)
+{
+ if (addr->sa_family == AF_INET)
+ return ((struct sockaddr_in *) addr)->sin_port;
+ else
+ return ((struct sockaddr_in6 *) addr)->sin6_port;
+}
+
+static inline int cma_any_port(struct sockaddr *addr)
+{
+ return !cma_port(addr);
+}
+
+static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
+ u8 *ip_ver, __be16 *port,
+ union cma_ip_addr **src, union cma_ip_addr **dst)
+{
+ switch (ps) {
+ case RDMA_PS_SDP:
+ if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
+ SDP_MAJ_VERSION)
+ return -EINVAL;
+
+ *ip_ver = sdp_get_ip_ver(hdr);
+ *port = ((struct sdp_hh *) hdr)->port;
+ *src = &((struct sdp_hh *) hdr)->src_addr;
+ *dst = &((struct sdp_hh *) hdr)->dst_addr;
+ break;
+ default:
+ if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
+ return -EINVAL;
+
+ *ip_ver = cma_get_ip_ver(hdr);
+ *port = ((struct cma_hdr *) hdr)->port;
+ *src = &((struct cma_hdr *) hdr)->src_addr;
+ *dst = &((struct cma_hdr *) hdr)->dst_addr;
+ break;
+ }
+
+ if (*ip_ver != 4 && *ip_ver != 6)
+ return -EINVAL;
+ return 0;
+}
+
+static void cma_save_net_info(struct rdma_addr *addr,
+ struct rdma_addr *listen_addr,
+ u8 ip_ver, __be16 port,
+ union cma_ip_addr *src, union cma_ip_addr *dst)
+{
+ struct sockaddr_in *listen4, *ip4;
+ struct sockaddr_in6 *listen6, *ip6;
+
+ switch (ip_ver) {
+ case 4:
+ listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
+ ip4 = (struct sockaddr_in *) &addr->src_addr;
+ ip4->sin_family = listen4->sin_family;
+ ip4->sin_addr.s_addr = dst->ip4.addr;
+ ip4->sin_port = listen4->sin_port;
+
+ ip4 = (struct sockaddr_in *) &addr->dst_addr;
+ ip4->sin_family = listen4->sin_family;
+ ip4->sin_addr.s_addr = src->ip4.addr;
+ ip4->sin_port = port;
+ break;
+ case 6:
+ listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
+ ip6 = (struct sockaddr_in6 *) &addr->src_addr;
+ ip6->sin6_family = listen6->sin6_family;
+ ip6->sin6_addr = dst->ip6;
+ ip6->sin6_port = listen6->sin6_port;
+
+ ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
+ ip6->sin6_family = listen6->sin6_family;
+ ip6->sin6_addr = src->ip6;
+ ip6->sin6_port = port;
+ break;
+ default:
+ break;
+ }
+}
+
+static inline int cma_user_data_offset(enum rdma_port_space ps)
+{
+ switch (ps) {
+ case RDMA_PS_SDP:
+ return 0;
+ default:
+ return sizeof(struct cma_hdr);
+ }
+}
+
+static void cma_cancel_route(struct rdma_id_private *id_priv)
+{
+ switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ if (id_priv->query)
+ ib_sa_cancel_query(id_priv->query_id, id_priv->query);
+ break;
+ default:
+ break;
+ }
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+ struct rdma_id_private *dev_id_priv;
+
+ /*
+ * Remove from listen_any_list to prevent added devices from spawning
+ * additional listen requests.
+ */
+ mutex_lock(&lock);
+ list_del(&id_priv->list);
+
+ while (!list_empty(&id_priv->listen_list)) {
+ dev_id_priv = list_entry(id_priv->listen_list.next,
+ struct rdma_id_private, listen_list);
+ /* sync with device removal to avoid duplicate destruction */
+ list_del_init(&dev_id_priv->list);
+ list_del(&dev_id_priv->listen_list);
+ mutex_unlock(&lock);
+
+ rdma_destroy_id(&dev_id_priv->id);
+ mutex_lock(&lock);
+ }
+ mutex_unlock(&lock);
+}
+
+static void cma_cancel_operation(struct rdma_id_private *id_priv,
+ enum cma_state state)
+{
+ switch (state) {
+ case CMA_ADDR_QUERY:
+ rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
+ break;
+ case CMA_ROUTE_QUERY:
+ cma_cancel_route(id_priv);
+ break;
+ case CMA_LISTEN:
+ if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
+ && !id_priv->cma_dev)
+ cma_cancel_listens(id_priv);
+ break;
+ default:
+ break;
+ }
+}
+
+static void cma_release_port(struct rdma_id_private *id_priv)
+{
+ struct rdma_bind_list *bind_list = id_priv->bind_list;
+
+ if (!bind_list)
+ return;
+
+ mutex_lock(&lock);
+ hlist_del(&id_priv->node);
+ if (hlist_empty(&bind_list->owners)) {
+ idr_remove(bind_list->ps, bind_list->port);
+ kfree(bind_list);
+ }
+ mutex_unlock(&lock);
+ if (id_priv->sock)
+ sock_release(id_priv->sock);
+}
+
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+ struct cma_multicast *mc;
+
+ while (!list_empty(&id_priv->mc_list)) {
+ mc = container_of(id_priv->mc_list.next,
+ struct cma_multicast, list);
+ list_del(&mc->list);
+ switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ kref_put(&mc->mcref, release_mc);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void rdma_destroy_id(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv;
+ enum cma_state state;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ state = cma_exch(id_priv, CMA_DESTROYING);
+ cma_cancel_operation(id_priv, state);
+
+ mutex_lock(&lock);
+ if (id_priv->cma_dev) {
+ mutex_unlock(&lock);
+ switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+ ib_destroy_cm_id(id_priv->cm_id.ib);
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+ iw_destroy_cm_id(id_priv->cm_id.iw);
+ break;
+ default:
+ break;
+ }
+ cma_leave_mc_groups(id_priv);
+ mutex_lock(&lock);
+ cma_detach_from_dev(id_priv);
+ }
+ mutex_unlock(&lock);
+
+ cma_release_port(id_priv);
+ cma_deref_id(id_priv);
+ wait_for_completion(&id_priv->comp);
+
+ if (id_priv->internal_id)
+ cma_deref_id(id_priv->id.context);
+
+ kfree(id_priv->id.route.path_rec);
+ kfree(id_priv);
+}
+EXPORT_SYMBOL(rdma_destroy_id);
+
+static int cma_rep_recv(struct rdma_id_private *id_priv)
+{
+ int ret;
+
+ ret = cma_modify_qp_rtr(id_priv, NULL);
+ if (ret)
+ goto reject;
+
+ ret = cma_modify_qp_rts(id_priv, NULL);
+ if (ret)
+ goto reject;
+
+ ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
+ if (ret)
+ goto reject;
+
+ return 0;
+reject:
+ cma_modify_qp_err(id_priv);
+ ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
+ NULL, 0, NULL, 0);
+ return ret;
+}
+
+static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
+{
+ if (id_priv->id.ps == RDMA_PS_SDP &&
+ sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
+ SDP_MAJ_VERSION)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void cma_set_rep_event_data(struct rdma_cm_event *event,
+ struct ib_cm_rep_event_param *rep_data,
+ void *private_data)
+{
+ event->param.conn.private_data = private_data;
+ event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
+ event->param.conn.responder_resources = rep_data->responder_resources;
+ event->param.conn.initiator_depth = rep_data->initiator_depth;
+ event->param.conn.flow_control = rep_data->flow_control;
+ event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
+ event->param.conn.srq = rep_data->srq;
+ event->param.conn.qp_num = rep_data->remote_qpn;
+}
+
+static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+{
+ struct rdma_id_private *id_priv = cm_id->context;
+ struct rdma_cm_event event;
+ int ret = 0;
+
+ if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
+ cma_disable_callback(id_priv, CMA_CONNECT)) ||
+ (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
+ cma_disable_callback(id_priv, CMA_DISCONNECT)))
+ return 0;
+
+ memset(&event, 0, sizeof event);
+ switch (ib_event->event) {
+ case IB_CM_REQ_ERROR:
+ case IB_CM_REP_ERROR:
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+ break;
+ case IB_CM_REP_RECEIVED:
+ event.status = cma_verify_rep(id_priv, ib_event->private_data);
+ if (event.status)
+ event.event = RDMA_CM_EVENT_CONNECT_ERROR;
+ else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
+ event.status = cma_rep_recv(id_priv);
+ event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
+ RDMA_CM_EVENT_ESTABLISHED;
+ } else
+ event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
+ cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
+ ib_event->private_data);
+ break;
+ case IB_CM_RTU_RECEIVED:
+ case IB_CM_USER_ESTABLISHED:
+ event.event = RDMA_CM_EVENT_ESTABLISHED;
+ break;
+ case IB_CM_DREQ_ERROR:
+ event.status = -ETIMEDOUT; /* fall through */
+ case IB_CM_DREQ_RECEIVED:
+ case IB_CM_DREP_RECEIVED:
+ if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
+ goto out;
+ event.event = RDMA_CM_EVENT_DISCONNECTED;
+ break;
+ case IB_CM_TIMEWAIT_EXIT:
+ event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
+ break;
+ case IB_CM_MRA_RECEIVED:
+ /* ignore event */
+ goto out;
+ case IB_CM_REJ_RECEIVED:
+ cma_modify_qp_err(id_priv);
+ event.status = ib_event->param.rej_rcvd.reason;
+ event.event = RDMA_CM_EVENT_REJECTED;
+ event.param.conn.private_data = ib_event->private_data;
+ event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
+ break;
+ default:
+ printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
+ ib_event->event);
+ goto out;
+ }
+
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ id_priv->cm_id.ib = NULL;
+ cma_exch(id_priv, CMA_DESTROYING);
+ mutex_unlock(&id_priv->handler_mutex);
+ rdma_destroy_id(&id_priv->id);
+ return ret;
+ }
+out:
+ mutex_unlock(&id_priv->handler_mutex);
+ return ret;
+}
+
+static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
+ struct ib_cm_event *ib_event)
+{
+ struct rdma_id_private *id_priv;
+ struct rdma_cm_id *id;
+ struct rdma_route *rt;
+ union cma_ip_addr *src, *dst;
+ __be16 port;
+ u8 ip_ver;
+ int ret;
+
+ if (cma_get_net_info(ib_event->private_data, listen_id->ps,
+ &ip_ver, &port, &src, &dst))
+ goto err;
+
+ id = rdma_create_id(listen_id->event_handler, listen_id->context,
+ listen_id->ps);
+ if (IS_ERR(id))
+ goto err;
+
+ cma_save_net_info(&id->route.addr, &listen_id->route.addr,
+ ip_ver, port, src, dst);
+
+ rt = &id->route;
+ rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
+ rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
+ GFP_KERNEL);
+ if (!rt->path_rec)
+ goto destroy_id;
+
+ rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
+ if (rt->num_paths == 2)
+ rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
+
+ if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+ rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
+ rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
+ ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
+ } else {
+ ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
+ &rt->addr.dev_addr);
+ if (ret)
+ goto destroy_id;
+ }
+ rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ id_priv->state = CMA_CONNECT;
+ return id_priv;
+
+destroy_id:
+ rdma_destroy_id(id);
+err:
+ return NULL;
+}
+
+static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
+ struct ib_cm_event *ib_event)
+{
+ struct rdma_id_private *id_priv;
+ struct rdma_cm_id *id;
+ union cma_ip_addr *src, *dst;
+ __be16 port;
+ u8 ip_ver;
+ int ret;
+
+ id = rdma_create_id(listen_id->event_handler, listen_id->context,
+ listen_id->ps);
+ if (IS_ERR(id))
+ return NULL;
+
+
+ if (cma_get_net_info(ib_event->private_data, listen_id->ps,
+ &ip_ver, &port, &src, &dst))
+ goto err;
+
+ cma_save_net_info(&id->route.addr, &listen_id->route.addr,
+ ip_ver, port, src, dst);
+
+ if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
+ ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
+ &id->route.addr.dev_addr);
+ if (ret)
+ goto err;
+ }
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ id_priv->state = CMA_CONNECT;
+ return id_priv;
+err:
+ rdma_destroy_id(id);
+ return NULL;
+}
+
+static void cma_set_req_event_data(struct rdma_cm_event *event,
+ struct ib_cm_req_event_param *req_data,
+ void *private_data, int offset)
+{
+ event->param.conn.private_data = private_data + offset;
+ event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
+ event->param.conn.responder_resources = req_data->responder_resources;
+ event->param.conn.initiator_depth = req_data->initiator_depth;
+ event->param.conn.flow_control = req_data->flow_control;
+ event->param.conn.retry_count = req_data->retry_count;
+ event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
+ event->param.conn.srq = req_data->srq;
+ event->param.conn.qp_num = req_data->remote_qpn;
+}
+
+static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+{
+ struct rdma_id_private *listen_id, *conn_id;
+ struct rdma_cm_event event;
+ int offset, ret;
+
+ listen_id = cm_id->context;
+ if (cma_disable_callback(listen_id, CMA_LISTEN))
+ return -ECONNABORTED;
+
+ memset(&event, 0, sizeof event);
+ offset = cma_user_data_offset(listen_id->id.ps);
+ event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
+ if (cma_is_ud_ps(listen_id->id.ps)) {
+ conn_id = cma_new_udp_id(&listen_id->id, ib_event);
+ event.param.ud.private_data = ib_event->private_data + offset;
+ event.param.ud.private_data_len =
+ IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
+ } else {
+ conn_id = cma_new_conn_id(&listen_id->id, ib_event);
+ cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
+ ib_event->private_data, offset);
+ }
+ if (!conn_id) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&lock);
+ ret = cma_acquire_dev(conn_id);
+ mutex_unlock(&lock);
+ if (ret)
+ goto release_conn_id;
+
+ conn_id->cm_id.ib = cm_id;
+ cm_id->context = conn_id;
+ cm_id->cm_handler = cma_ib_handler;
+
+ ret = conn_id->id.event_handler(&conn_id->id, &event);
+ if (!ret) {
+ /*
+ * Acquire mutex to prevent user executing rdma_destroy_id()
+ * while we're accessing the cm_id.
+ */
+ mutex_lock(&lock);
+ if (cma_comp(conn_id, CMA_CONNECT) &&
+ !cma_is_ud_ps(conn_id->id.ps))
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ mutex_unlock(&lock);
+ mutex_unlock(&conn_id->handler_mutex);
+ goto out;
+ }
+
+ /* Destroy the CM ID by returning a non-zero value. */
+ conn_id->cm_id.ib = NULL;
+
+release_conn_id:
+ cma_exch(conn_id, CMA_DESTROYING);
+ mutex_unlock(&conn_id->handler_mutex);
+ rdma_destroy_id(&conn_id->id);
+
+out:
+ mutex_unlock(&listen_id->handler_mutex);
+ return ret;
+}
+
+static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
+{
+ return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
+}
+
+static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
+ struct ib_cm_compare_data *compare)
+{
+ struct cma_hdr *cma_data, *cma_mask;
+ struct sdp_hh *sdp_data, *sdp_mask;
+ __be32 ip4_addr;
+#ifdef INET6
+ struct in6_addr ip6_addr;
+#endif
+
+ memset(compare, 0, sizeof *compare);
+ cma_data = (void *) compare->data;
+ cma_mask = (void *) compare->mask;
+ sdp_data = (void *) compare->data;
+ sdp_mask = (void *) compare->mask;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+ if (ps == RDMA_PS_SDP) {
+ sdp_set_ip_ver(sdp_data, 4);
+ sdp_set_ip_ver(sdp_mask, 0xF);
+ sdp_data->dst_addr.ip4.addr = ip4_addr;
+ sdp_mask->dst_addr.ip4.addr = htonl(~0);
+ } else {
+ cma_set_ip_ver(cma_data, 4);
+ cma_set_ip_ver(cma_mask, 0xF);
+ cma_data->dst_addr.ip4.addr = ip4_addr;
+ cma_mask->dst_addr.ip4.addr = htonl(~0);
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
+ if (ps == RDMA_PS_SDP) {
+ sdp_set_ip_ver(sdp_data, 6);
+ sdp_set_ip_ver(sdp_mask, 0xF);
+ sdp_data->dst_addr.ip6 = ip6_addr;
+ memset(&sdp_mask->dst_addr.ip6, 0xFF,
+ sizeof sdp_mask->dst_addr.ip6);
+ } else {
+ cma_set_ip_ver(cma_data, 6);
+ cma_set_ip_ver(cma_mask, 0xF);
+ cma_data->dst_addr.ip6 = ip6_addr;
+ memset(&cma_mask->dst_addr.ip6, 0xFF,
+ sizeof cma_mask->dst_addr.ip6);
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+}
+
+static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
+{
+ struct rdma_id_private *id_priv = iw_id->context;
+ struct rdma_cm_event event;
+ struct sockaddr_in *sin;
+ int ret = 0;
+
+ if (cma_disable_callback(id_priv, CMA_CONNECT))
+ return 0;
+
+ memset(&event, 0, sizeof event);
+ switch (iw_event->event) {
+ case IW_CM_EVENT_CLOSE:
+ event.event = RDMA_CM_EVENT_DISCONNECTED;
+ break;
+ case IW_CM_EVENT_CONNECT_REPLY:
+ sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+ *sin = iw_event->local_addr;
+ sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+ *sin = iw_event->remote_addr;
+ switch ((int)iw_event->status) {
+ case 0:
+ event.event = RDMA_CM_EVENT_ESTABLISHED;
+ break;
+ case -ECONNRESET:
+ case -ECONNREFUSED:
+ event.event = RDMA_CM_EVENT_REJECTED;
+ break;
+ case -ETIMEDOUT:
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ break;
+ default:
+ event.event = RDMA_CM_EVENT_CONNECT_ERROR;
+ break;
+ }
+ break;
+ case IW_CM_EVENT_ESTABLISHED:
+ event.event = RDMA_CM_EVENT_ESTABLISHED;
+ break;
+ default:
+ BUG_ON(1);
+ }
+
+ event.status = iw_event->status;
+ event.param.conn.private_data = iw_event->private_data;
+ event.param.conn.private_data_len = iw_event->private_data_len;
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ id_priv->cm_id.iw = NULL;
+ cma_exch(id_priv, CMA_DESTROYING);
+ mutex_unlock(&id_priv->handler_mutex);
+ rdma_destroy_id(&id_priv->id);
+ return ret;
+ }
+
+ mutex_unlock(&id_priv->handler_mutex);
+ return ret;
+}
+
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+ struct iw_cm_event *iw_event)
+{
+ struct rdma_cm_id *new_cm_id;
+ struct rdma_id_private *listen_id, *conn_id;
+ struct sockaddr_in *sin;
+ struct net_device *dev = NULL;
+ struct rdma_cm_event event;
+ int ret;
+ struct ib_device_attr attr;
+
+ listen_id = cm_id->context;
+ if (cma_disable_callback(listen_id, CMA_LISTEN))
+ return -ECONNABORTED;
+
+ /* Create a new RDMA id for the new IW CM ID */
+ new_cm_id = rdma_create_id(listen_id->id.event_handler,
+ listen_id->id.context,
+ RDMA_PS_TCP);
+ if (IS_ERR(new_cm_id)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+ mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
+ conn_id->state = CMA_CONNECT;
+
+ dev = ip_dev_find(NULL, iw_event->local_addr.sin_addr.s_addr);
+ if (!dev) {
+ ret = -EADDRNOTAVAIL;
+ mutex_unlock(&conn_id->handler_mutex);
+ rdma_destroy_id(new_cm_id);
+ goto out;
+ }
+ ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+ if (ret) {
+ mutex_unlock(&conn_id->handler_mutex);
+ rdma_destroy_id(new_cm_id);
+ goto out;
+ }
+
+ mutex_lock(&lock);
+ ret = cma_acquire_dev(conn_id);
+ mutex_unlock(&lock);
+ if (ret) {
+ mutex_unlock(&conn_id->handler_mutex);
+ rdma_destroy_id(new_cm_id);
+ goto out;
+ }
+
+ conn_id->cm_id.iw = cm_id;
+ cm_id->context = conn_id;
+ cm_id->cm_handler = cma_iw_handler;
+
+ sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+ *sin = iw_event->local_addr;
+ sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+ *sin = iw_event->remote_addr;
+
+ ret = ib_query_device(conn_id->id.device, &attr);
+ if (ret) {
+ mutex_unlock(&conn_id->handler_mutex);
+ rdma_destroy_id(new_cm_id);
+ goto out;
+ }
+
+ memset(&event, 0, sizeof event);
+ event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
+ event.param.conn.private_data = iw_event->private_data;
+ event.param.conn.private_data_len = iw_event->private_data_len;
+ event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
+ event.param.conn.responder_resources = attr.max_qp_rd_atom;
+ ret = conn_id->id.event_handler(&conn_id->id, &event);
+ if (ret) {
+ /* User wants to destroy the CM ID */
+ conn_id->cm_id.iw = NULL;
+ cma_exch(conn_id, CMA_DESTROYING);
+ mutex_unlock(&conn_id->handler_mutex);
+ rdma_destroy_id(&conn_id->id);
+ goto out;
+ }
+
+ mutex_unlock(&conn_id->handler_mutex);
+
+out:
+ if (dev)
+ dev_put(dev);
+ mutex_unlock(&listen_id->handler_mutex);
+ return ret;
+}
+
+static int cma_ib_listen(struct rdma_id_private *id_priv)
+{
+ struct ib_cm_compare_data compare_data;
+ struct sockaddr *addr;
+ __be64 svc_id;
+ int ret;
+
+ id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
+ id_priv);
+ if (IS_ERR(id_priv->cm_id.ib))
+ return PTR_ERR(id_priv->cm_id.ib);
+
+ addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+ svc_id = cma_get_service_id(id_priv->id.ps, addr);
+ if (cma_any_addr(addr))
+ ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
+ else {
+ cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
+ ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
+ }
+
+ if (ret) {
+ ib_destroy_cm_id(id_priv->cm_id.ib);
+ id_priv->cm_id.ib = NULL;
+ }
+
+ return ret;
+}
+
+static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
+{
+ int ret;
+ struct sockaddr_in *sin;
+
+ id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+ id_priv->sock,
+ iw_conn_req_handler,
+ id_priv);
+ if (IS_ERR(id_priv->cm_id.iw))
+ return PTR_ERR(id_priv->cm_id.iw);
+
+ sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+ id_priv->cm_id.iw->local_addr = *sin;
+
+ ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
+
+ if (ret) {
+ iw_destroy_cm_id(id_priv->cm_id.iw);
+ id_priv->cm_id.iw = NULL;
+ }
+
+ return ret;
+}
+
+static int cma_listen_handler(struct rdma_cm_id *id,
+ struct rdma_cm_event *event)
+{
+ struct rdma_id_private *id_priv = id->context;
+
+ id->context = id_priv->id.context;
+ id->event_handler = id_priv->id.event_handler;
+ return id_priv->id.event_handler(id, event);
+}
+
+static void cma_listen_on_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev)
+{
+ struct rdma_id_private *dev_id_priv;
+ struct rdma_cm_id *id;
+ int ret;
+
+ id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
+ if (IS_ERR(id))
+ return;
+
+ dev_id_priv = container_of(id, struct rdma_id_private, id);
+
+ dev_id_priv->state = CMA_ADDR_BOUND;
+ memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
+ ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
+
+ cma_attach_to_dev(dev_id_priv, cma_dev);
+ list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
+ atomic_inc(&id_priv->refcount);
+ dev_id_priv->internal_id = 1;
+
+ ret = rdma_listen(id, id_priv->backlog);
+ if (ret)
+ printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
+ "listening on device %s\n", ret, cma_dev->device->name);
+}
+
+static void cma_listen_on_all(struct rdma_id_private *id_priv)
+{
+ struct cma_device *cma_dev;
+
+ mutex_lock(&lock);
+ list_add_tail(&id_priv->list, &listen_any_list);
+ list_for_each_entry(cma_dev, &dev_list, list)
+ cma_listen_on_dev(id_priv, cma_dev);
+ mutex_unlock(&lock);
+}
+
+int rdma_listen(struct rdma_cm_id *id, int backlog)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (id_priv->state == CMA_IDLE) {
+ ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
+ ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
+ if (ret)
+ return ret;
+ }
+
+ if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
+ return -EINVAL;
+
+ id_priv->backlog = backlog;
+ if (id->device) {
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ ret = cma_ib_listen(id_priv);
+ if (ret)
+ goto err;
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ ret = cma_iw_listen(id_priv, backlog);
+ if (ret)
+ goto err;
+ break;
+ default:
+ ret = -ENOSYS;
+ goto err;
+ }
+ } else
+ cma_listen_on_all(id_priv);
+
+ return 0;
+err:
+ id_priv->backlog = 0;
+ cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_listen);
+
+void rdma_set_service_type(struct rdma_cm_id *id, int tos)
+{
+ struct rdma_id_private *id_priv;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ id_priv->tos = (u8) tos;
+}
+EXPORT_SYMBOL(rdma_set_service_type);
+
+static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
+ void *context)
+{
+ struct cma_work *work = context;
+ struct rdma_route *route;
+
+ route = &work->id->id.route;
+
+ if (!status) {
+ route->num_paths = 1;
+ *route->path_rec = *path_rec;
+ } else {
+ work->old_state = CMA_ROUTE_QUERY;
+ work->new_state = CMA_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+ work->event.status = status;
+ }
+
+ queue_work(cma_wq, &work->work);
+}
+
+static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
+ struct cma_work *work)
+{
+ struct rdma_addr *addr = &id_priv->id.route.addr;
+ struct ib_sa_path_rec path_rec;
+ ib_sa_comp_mask comp_mask;
+ struct sockaddr_in6 *sin6;
+
+ memset(&path_rec, 0, sizeof path_rec);
+ rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
+ rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
+ path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
+ path_rec.numb_path = 1;
+ path_rec.reversible = 1;
+ path_rec.service_id = cma_get_service_id(id_priv->id.ps,
+ (struct sockaddr *) &addr->dst_addr);
+
+ comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
+ IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
+ IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
+
+ if (addr->src_addr.ss_family == AF_INET) {
+ path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
+ comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
+ } else {
+ sin6 = (struct sockaddr_in6 *) &addr->src_addr;
+ path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
+ comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
+ }
+
+ if (tavor_quirk) {
+ path_rec.mtu_selector = IB_SA_LT;
+ path_rec.mtu = IB_MTU_2048;
+ }
+
+ id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &path_rec,
+ comp_mask, timeout_ms,
+ GFP_KERNEL, cma_query_handler,
+ work, &id_priv->query);
+
+ return (id_priv->query_id < 0) ? id_priv->query_id : 0;
+}
+
+static void cma_work_handler(struct work_struct *_work)
+{
+ struct cma_work *work = container_of(_work, struct cma_work, work);
+ struct rdma_id_private *id_priv = work->id;
+ int destroy = 0;
+
+ mutex_lock(&id_priv->handler_mutex);
+ if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+ goto out;
+
+ if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ destroy = 1;
+ }
+out:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_deref_id(id_priv);
+ if (destroy)
+ rdma_destroy_id(&id_priv->id);
+ kfree(work);
+}
+
+static void cma_ndev_work_handler(struct work_struct *_work)
+{
+ struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
+ struct rdma_id_private *id_priv = work->id;
+ int destroy = 0;
+
+ mutex_lock(&id_priv->handler_mutex);
+ if (id_priv->state == CMA_DESTROYING ||
+ id_priv->state == CMA_DEVICE_REMOVAL)
+ goto out;
+
+ if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ destroy = 1;
+ }
+
+out:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_deref_id(id_priv);
+ if (destroy)
+ rdma_destroy_id(&id_priv->id);
+ kfree(work);
+}
+
+static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+ struct rdma_route *route = &id_priv->id.route;
+ struct cma_work *work;
+ int ret;
+
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = CMA_ROUTE_QUERY;
+ work->new_state = CMA_ROUTE_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+
+ route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
+ if (!route->path_rec) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ ret = cma_query_ib_route(id_priv, timeout_ms, work);
+ if (ret)
+ goto err2;
+
+ return 0;
+err2:
+ kfree(route->path_rec);
+ route->path_rec = NULL;
+err1:
+ kfree(work);
+ return ret;
+}
+
+int rdma_set_ib_paths(struct rdma_cm_id *id,
+ struct ib_sa_path_rec *path_rec, int num_paths)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
+ return -EINVAL;
+
+ id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
+ if (!id->route.path_rec) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
+ return 0;
+err:
+ cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_set_ib_paths);
+
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+ struct cma_work *work;
+
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = CMA_ROUTE_QUERY;
+ work->new_state = CMA_ROUTE_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ queue_work(cma_wq, &work->work);
+ return 0;
+}
+
+static u8 tos_to_sl(u8 tos)
+{
+ return def_prec2sl & 7;
+}
+
+static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
+{
+ struct rdma_route *route = &id_priv->id.route;
+ struct rdma_addr *addr = &route->addr;
+ struct cma_work *work;
+ int ret;
+ struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
+ struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
+ struct net_device *ndev = NULL;
+ u16 vid;
+
+ if (src_addr->sin_family != dst_addr->sin_family)
+ return -EINVAL;
+
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+
+ route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
+ if (!route->path_rec) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ route->num_paths = 1;
+
+ if (addr->dev_addr.bound_dev_if)
+ ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+ if (!ndev) {
+ ret = -ENODEV;
+ goto err2;
+ }
+
+ vid = rdma_vlan_dev_vlan_id(ndev);
+
+ iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
+ iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
+
+ route->path_rec->hop_limit = 1;
+ route->path_rec->reversible = 1;
+ route->path_rec->pkey = cpu_to_be16(0xffff);
+ route->path_rec->mtu_selector = IB_SA_EQ;
+ route->path_rec->sl = tos_to_sl(id_priv->tos);
+
+#ifdef __linux__
+ route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
+#else
+ route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu);
+#endif
+ route->path_rec->rate_selector = IB_SA_EQ;
+ route->path_rec->rate = iboe_get_rate(ndev);
+ dev_put(ndev);
+ route->path_rec->packet_life_time_selector = IB_SA_EQ;
+ route->path_rec->packet_life_time = IBOE_PACKET_LIFETIME;
+ if (!route->path_rec->mtu) {
+ ret = -EINVAL;
+ goto err2;
+ }
+
+ work->old_state = CMA_ROUTE_QUERY;
+ work->new_state = CMA_ROUTE_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ work->event.status = 0;
+
+ queue_work(cma_wq, &work->work);
+
+ return 0;
+
+err2:
+ kfree(route->path_rec);
+ route->path_rec = NULL;
+err1:
+ kfree(work);
+ return ret;
+}
+
+int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
+ return -EINVAL;
+
+ atomic_inc(&id_priv->refcount);
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ switch (rdma_port_get_link_layer(id->device, id->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ret = cma_resolve_ib_route(id_priv, timeout_ms);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ ret = cma_resolve_iboe_route(id_priv);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ ret = cma_resolve_iw_route(id_priv, timeout_ms);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
+ cma_deref_id(id_priv);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_route);
+
+static int cma_bind_loopback(struct rdma_id_private *id_priv)
+{
+ struct cma_device *cma_dev;
+ struct ib_port_attr port_attr;
+ union ib_gid gid;
+ u16 pkey;
+ int ret;
+ u8 p;
+
+ mutex_lock(&lock);
+ if (list_empty(&dev_list)) {
+ ret = -ENODEV;
+ goto out;
+ }
+ list_for_each_entry(cma_dev, &dev_list, list)
+ for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
+ if (!ib_query_port(cma_dev->device, p, &port_attr) &&
+ port_attr.state == IB_PORT_ACTIVE)
+ goto port_found;
+
+ p = 1;
+ cma_dev = list_entry(dev_list.next, struct cma_device, list);
+
+port_found:
+ ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
+ if (ret)
+ goto out;
+
+ ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
+ if (ret)
+ goto out;
+
+ id_priv->id.route.addr.dev_addr.dev_type =
+ (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
+ ARPHRD_INFINIBAND : ARPHRD_ETHER;
+
+ rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
+ id_priv->id.port_num = p;
+ cma_attach_to_dev(id_priv, cma_dev);
+out:
+ mutex_unlock(&lock);
+ return ret;
+}
+
+static void addr_handler(int status, struct sockaddr *src_addr,
+ struct rdma_dev_addr *dev_addr, void *context)
+{
+ struct rdma_id_private *id_priv = context;
+ struct rdma_cm_event event;
+
+ memset(&event, 0, sizeof event);
+ mutex_lock(&id_priv->handler_mutex);
+
+ /*
+ * Grab mutex to block rdma_destroy_id() from removing the device while
+ * we're trying to acquire it.
+ */
+ mutex_lock(&lock);
+ if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+ mutex_unlock(&lock);
+ goto out;
+ }
+
+ if (!status && !id_priv->cma_dev)
+ status = cma_acquire_dev(id_priv);
+ mutex_unlock(&lock);
+
+ if (status) {
+ if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
+ goto out;
+ event.event = RDMA_CM_EVENT_ADDR_ERROR;
+ event.status = status;
+ } else {
+ memcpy(&id_priv->id.route.addr.src_addr, src_addr,
+ ip_addr_size(src_addr));
+ event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+ }
+
+ if (id_priv->id.event_handler(&id_priv->id, &event)) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_deref_id(id_priv);
+ rdma_destroy_id(&id_priv->id);
+ return;
+ }
+out:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_deref_id(id_priv);
+}
+
+static int cma_resolve_loopback(struct rdma_id_private *id_priv)
+{
+ struct cma_work *work;
+ struct sockaddr *src, *dst;
+ union ib_gid gid;
+ int ret;
+
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ if (!id_priv->cma_dev) {
+ ret = cma_bind_loopback(id_priv);
+ if (ret)
+ goto err;
+ }
+
+ rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+
+ src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+ if (cma_zero_addr(src)) {
+ dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+ if ((src->sa_family = dst->sa_family) == AF_INET) {
+ ((struct sockaddr_in *) src)->sin_addr.s_addr =
+ ((struct sockaddr_in *) dst)->sin_addr.s_addr;
+ } else {
+ ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
+ &((struct sockaddr_in6 *) dst)->sin6_addr);
+ }
+ }
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = CMA_ADDR_QUERY;
+ work->new_state = CMA_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+ queue_work(cma_wq, &work->work);
+ return 0;
+err:
+ kfree(work);
+ return ret;
+}
+
+static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ struct sockaddr *dst_addr)
+{
+ if (!src_addr || !src_addr->sa_family) {
+ src_addr = (struct sockaddr *) &id->route.addr.src_addr;
+ if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
+ ((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
+ ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+ }
+ }
+ if (!cma_any_addr(src_addr))
+ return rdma_bind_addr(id, src_addr);
+ else {
+ struct sockaddr_in addr_in;
+
+ memset(&addr_in, 0, sizeof addr_in);
+ addr_in.sin_family = dst_addr->sa_family;
+ addr_in.sin_len = sizeof addr_in;
+ return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
+ }
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ struct sockaddr *dst_addr, int timeout_ms)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (id_priv->state == CMA_IDLE) {
+ ret = cma_bind_addr(id, src_addr, dst_addr);
+ if (ret)
+ return ret;
+ }
+
+ if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
+ return -EINVAL;
+
+ atomic_inc(&id_priv->refcount);
+ memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
+ if (cma_any_addr(dst_addr))
+ ret = cma_resolve_loopback(id_priv);
+ else
+ ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
+ dst_addr, &id->route.addr.dev_addr,
+ timeout_ms, addr_handler, id_priv);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
+ cma_deref_id(id_priv);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_addr);
+
+static void cma_bind_port(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv)
+{
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+ sin->sin_port = htons(bind_list->port);
+ id_priv->bind_list = bind_list;
+ hlist_add_head(&id_priv->node, &bind_list->owners);
+}
+
+static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
+ unsigned short snum)
+{
+ struct rdma_bind_list *bind_list;
+ int port, ret;
+
+ bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
+ if (!bind_list)
+ return -ENOMEM;
+
+ do {
+ ret = idr_get_new_above(ps, bind_list, snum, &port);
+ } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
+
+ if (ret)
+ goto err1;
+
+ if (port != snum) {
+ ret = -EADDRNOTAVAIL;
+ goto err2;
+ }
+
+ bind_list->ps = ps;
+ bind_list->port = (unsigned short) port;
+ cma_bind_port(bind_list, id_priv);
+ return 0;
+err2:
+ idr_remove(ps, port);
+err1:
+ kfree(bind_list);
+ return ret;
+}
+
+static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
+{
+#if defined(INET)
+ struct rdma_bind_list *bind_list;
+ int port, ret, low, high;
+
+ bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
+ if (!bind_list)
+ return -ENOMEM;
+
+retry:
+ /* FIXME: add proper port randomization per like inet_csk_get_port */
+ do {
+ ret = idr_get_new_above(ps, bind_list, next_port, &port);
+ } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
+
+ if (ret)
+ goto err1;
+
+ inet_get_local_port_range(&low, &high);
+ if (port > high) {
+ if (next_port != low) {
+ idr_remove(ps, port);
+ next_port = low;
+ goto retry;
+ }
+ ret = -EADDRNOTAVAIL;
+ goto err2;
+ }
+
+ if (port == high)
+ next_port = low;
+ else
+ next_port = port + 1;
+
+ bind_list->ps = ps;
+ bind_list->port = (unsigned short) port;
+ cma_bind_port(bind_list, id_priv);
+ return 0;
+err2:
+ idr_remove(ps, port);
+err1:
+ kfree(bind_list);
+ return ret;
+#else
+ return -ENOSPC;
+#endif
+}
+
+static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
+{
+ struct rdma_id_private *cur_id;
+ struct sockaddr_in *sin, *cur_sin;
+ struct rdma_bind_list *bind_list;
+ struct hlist_node *node;
+ unsigned short snum;
+
+ sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+ snum = ntohs(sin->sin_port);
+#ifdef __linux__
+ if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
+#endif
+
+ bind_list = idr_find(ps, snum);
+ if (!bind_list)
+ return cma_alloc_port(ps, id_priv, snum);
+
+ /*
+ * We don't support binding to any address if anyone is bound to
+ * a specific address on the same port.
+ */
+ if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr))
+ return -EADDRNOTAVAIL;
+
+ hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
+ if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr))
+ return -EADDRNOTAVAIL;
+
+ cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
+ if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
+ return -EADDRINUSE;
+ }
+
+ cma_bind_port(bind_list, id_priv);
+ return 0;
+}
+
+static int cma_get_tcp_port(struct rdma_id_private *id_priv)
+{
+ int ret;
+ int size;
+ struct socket *sock;
+
+ ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (ret)
+ return ret;
+#ifdef __linux__
+ ret = sock->ops->bind(sock,
+ (struct sockaddr *) &id_priv->id.route.addr.src_addr,
+ ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
+#else
+ ret = -sobind(sock,
+ (struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ curthread);
+#endif
+ if (ret) {
+ sock_release(sock);
+ return ret;
+ }
+
+ size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr);
+ ret = sock_getname(sock,
+ (struct sockaddr *) &id_priv->id.route.addr.src_addr,
+ &size, 0);
+ if (ret) {
+ sock_release(sock);
+ return ret;
+ }
+
+ id_priv->sock = sock;
+ return 0;
+}
+
+static int cma_get_port(struct rdma_id_private *id_priv)
+{
+ struct idr *ps;
+ int ret;
+
+ switch (id_priv->id.ps) {
+ case RDMA_PS_SDP:
+ ps = &sdp_ps;
+ break;
+ case RDMA_PS_TCP:
+ ps = &tcp_ps;
+ if (unify_tcp_port_space) {
+ ret = cma_get_tcp_port(id_priv);
+ if (ret)
+ goto out;
+ }
+ break;
+ case RDMA_PS_UDP:
+ ps = &udp_ps;
+ break;
+ case RDMA_PS_IPOIB:
+ ps = &ipoib_ps;
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ mutex_lock(&lock);
+ if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
+ ret = cma_alloc_any_port(ps, id_priv);
+ else
+ ret = cma_use_port(ps, id_priv);
+ mutex_unlock(&lock);
+out:
+ return ret;
+}
+
+static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
+ struct sockaddr *addr)
+{
+#if defined(INET6)
+ struct sockaddr_in6 *sin6;
+
+ if (addr->sa_family != AF_INET6)
+ return 0;
+
+ sin6 = (struct sockaddr_in6 *) addr;
+#ifdef __linux__
+ if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
+#else
+ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) &&
+#endif
+ !sin6->sin6_scope_id)
+ return -EINVAL;
+
+ dev_addr->bound_dev_if = sin6->sin6_scope_id;
+#endif
+ return 0;
+}
+
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
+ return -EINVAL;
+
+ ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+ if (ret)
+ goto err1;
+
+ if (!cma_any_addr(addr)) {
+ ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
+ if (ret)
+ goto err1;
+
+ mutex_lock(&lock);
+ ret = cma_acquire_dev(id_priv);
+ mutex_unlock(&lock);
+ if (ret)
+ goto err1;
+ }
+
+ memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+ ret = cma_get_port(id_priv);
+ if (ret)
+ goto err2;
+
+ return 0;
+err2:
+ if (id_priv->cma_dev) {
+ mutex_lock(&lock);
+ cma_detach_from_dev(id_priv);
+ mutex_unlock(&lock);
+ }
+err1:
+ cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_bind_addr);
+
+static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
+ struct rdma_route *route)
+{
+ struct cma_hdr *cma_hdr;
+ struct sdp_hh *sdp_hdr;
+
+ if (route->addr.src_addr.ss_family == AF_INET) {
+ struct sockaddr_in *src4, *dst4;
+
+ src4 = (struct sockaddr_in *) &route->addr.src_addr;
+ dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+
+ switch (ps) {
+ case RDMA_PS_SDP:
+ sdp_hdr = hdr;
+ if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+ return -EINVAL;
+ sdp_set_ip_ver(sdp_hdr, 4);
+ sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+ sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+ sdp_hdr->port = src4->sin_port;
+ break;
+ default:
+ cma_hdr = hdr;
+ cma_hdr->cma_version = CMA_VERSION;
+ cma_set_ip_ver(cma_hdr, 4);
+ cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+ cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+ cma_hdr->port = src4->sin_port;
+ break;
+ }
+ } else {
+ struct sockaddr_in6 *src6, *dst6;
+
+ src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
+ dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
+
+ switch (ps) {
+ case RDMA_PS_SDP:
+ sdp_hdr = hdr;
+ if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+ return -EINVAL;
+ sdp_set_ip_ver(sdp_hdr, 6);
+ sdp_hdr->src_addr.ip6 = src6->sin6_addr;
+ sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
+ sdp_hdr->port = src6->sin6_port;
+ break;
+ default:
+ cma_hdr = hdr;
+ cma_hdr->cma_version = CMA_VERSION;
+ cma_set_ip_ver(cma_hdr, 6);
+ cma_hdr->src_addr.ip6 = src6->sin6_addr;
+ cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
+ cma_hdr->port = src6->sin6_port;
+ break;
+ }
+ }
+ return 0;
+}
+
+static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
+ struct ib_cm_event *ib_event)
+{
+ struct rdma_id_private *id_priv = cm_id->context;
+ struct rdma_cm_event event;
+ struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
+ int ret = 0;
+
+ if (cma_disable_callback(id_priv, CMA_CONNECT))
+ return 0;
+
+ memset(&event, 0, sizeof event);
+ switch (ib_event->event) {
+ case IB_CM_SIDR_REQ_ERROR:
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+ break;
+ case IB_CM_SIDR_REP_RECEIVED:
+ event.param.ud.private_data = ib_event->private_data;
+ event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
+ if (rep->status != IB_SIDR_SUCCESS) {
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = ib_event->param.sidr_rep_rcvd.status;
+ break;
+ }
+ ret = cma_set_qkey(id_priv);
+ if (ret) {
+ event.event = RDMA_CM_EVENT_ADDR_ERROR;
+ event.status = -EINVAL;
+ break;
+ }
+ if (id_priv->qkey != rep->qkey) {
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -EINVAL;
+ break;
+ }
+ ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
+ id_priv->id.route.path_rec,
+ &event.param.ud.ah_attr);
+ event.param.ud.qp_num = rep->qpn;
+ event.param.ud.qkey = rep->qkey;
+ event.event = RDMA_CM_EVENT_ESTABLISHED;
+ event.status = 0;
+ break;
+ default:
+ printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
+ ib_event->event);
+ goto out;
+ }
+
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ id_priv->cm_id.ib = NULL;
+ cma_exch(id_priv, CMA_DESTROYING);
+ mutex_unlock(&id_priv->handler_mutex);
+ rdma_destroy_id(&id_priv->id);
+ return ret;
+ }
+out:
+ mutex_unlock(&id_priv->handler_mutex);
+ return ret;
+}
+
+static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
+{
+ struct ib_cm_sidr_req_param req;
+ struct rdma_route *route;
+ int ret;
+
+ req.private_data_len = sizeof(struct cma_hdr) +
+ conn_param->private_data_len;
+ req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+ if (!req.private_data)
+ return -ENOMEM;
+
+ if (conn_param->private_data && conn_param->private_data_len)
+ memcpy((void *) req.private_data + sizeof(struct cma_hdr),
+ conn_param->private_data, conn_param->private_data_len);
+
+ route = &id_priv->id.route;
+ ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
+ if (ret)
+ goto out;
+
+ id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
+ cma_sidr_rep_handler, id_priv);
+ if (IS_ERR(id_priv->cm_id.ib)) {
+ ret = PTR_ERR(id_priv->cm_id.ib);
+ goto out;
+ }
+
+ req.path = route->path_rec;
+ req.service_id = cma_get_service_id(id_priv->id.ps,
+ (struct sockaddr *) &route->addr.dst_addr);
+ req.timeout_ms = 1 << (cma_response_timeout - 8);
+ req.max_cm_retries = CMA_MAX_CM_RETRIES;
+
+ ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
+ if (ret) {
+ ib_destroy_cm_id(id_priv->cm_id.ib);
+ id_priv->cm_id.ib = NULL;
+ }
+out:
+ kfree(req.private_data);
+ return ret;
+}
+
+static int cma_connect_ib(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
+{
+ struct ib_cm_req_param req;
+ struct rdma_route *route;
+ void *private_data;
+ int offset, ret;
+
+ memset(&req, 0, sizeof req);
+ offset = cma_user_data_offset(id_priv->id.ps);
+ req.private_data_len = offset + conn_param->private_data_len;
+ private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+ if (!private_data)
+ return -ENOMEM;
+
+ if (conn_param->private_data && conn_param->private_data_len)
+ memcpy(private_data + offset, conn_param->private_data,
+ conn_param->private_data_len);
+
+ id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
+ id_priv);
+ if (IS_ERR(id_priv->cm_id.ib)) {
+ ret = PTR_ERR(id_priv->cm_id.ib);
+ goto out;
+ }
+
+ route = &id_priv->id.route;
+ ret = cma_format_hdr(private_data, id_priv->id.ps, route);
+ if (ret)
+ goto out;
+ req.private_data = private_data;
+
+ req.primary_path = &route->path_rec[0];
+ if (route->num_paths == 2)
+ req.alternate_path = &route->path_rec[1];
+
+ req.service_id = cma_get_service_id(id_priv->id.ps,
+ (struct sockaddr *) &route->addr.dst_addr);
+ req.qp_num = id_priv->qp_num;
+ req.qp_type = IB_QPT_RC;
+ req.starting_psn = id_priv->seq_num;
+ req.responder_resources = conn_param->responder_resources;
+ req.initiator_depth = conn_param->initiator_depth;
+ req.flow_control = conn_param->flow_control;
+ req.retry_count = conn_param->retry_count;
+ req.rnr_retry_count = conn_param->rnr_retry_count;
+ req.remote_cm_response_timeout = cma_response_timeout;
+ req.local_cm_response_timeout = cma_response_timeout;
+ req.max_cm_retries = CMA_MAX_CM_RETRIES;
+ req.srq = id_priv->srq ? 1 : 0;
+
+ ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
+out:
+ if (ret && !IS_ERR(id_priv->cm_id.ib)) {
+ ib_destroy_cm_id(id_priv->cm_id.ib);
+ id_priv->cm_id.ib = NULL;
+ }
+
+ kfree(private_data);
+ return ret;
+}
+
+static int cma_connect_iw(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
+{
+ struct iw_cm_id *cm_id;
+ struct sockaddr_in* sin;
+ int ret;
+ struct iw_cm_conn_param iw_param;
+
+ cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock,
+ cma_iw_handler, id_priv);
+ if (IS_ERR(cm_id)) {
+ ret = PTR_ERR(cm_id);
+ goto out;
+ }
+
+ id_priv->cm_id.iw = cm_id;
+
+ sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+ cm_id->local_addr = *sin;
+
+ sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+ cm_id->remote_addr = *sin;
+
+ ret = cma_modify_qp_rtr(id_priv, conn_param);
+ if (ret)
+ goto out;
+
+ iw_param.ord = conn_param->initiator_depth;
+ iw_param.ird = conn_param->responder_resources;
+ iw_param.private_data = conn_param->private_data;
+ iw_param.private_data_len = conn_param->private_data_len;
+ if (id_priv->id.qp)
+ iw_param.qpn = id_priv->qp_num;
+ else
+ iw_param.qpn = conn_param->qp_num;
+ ret = iw_cm_connect(cm_id, &iw_param);
+out:
+ if (ret && !IS_ERR(cm_id)) {
+ iw_destroy_cm_id(cm_id);
+ id_priv->cm_id.iw = NULL;
+ }
+ return ret;
+}
+
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
+ return -EINVAL;
+
+ if (!id->qp) {
+ id_priv->qp_num = conn_param->qp_num;
+ id_priv->srq = conn_param->srq;
+ }
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ if (cma_is_ud_ps(id->ps))
+ ret = cma_resolve_ib_udp(id_priv, conn_param);
+ else
+ ret = cma_connect_ib(id_priv, conn_param);
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ ret = cma_connect_iw(id_priv, conn_param);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_connect);
+
+static int cma_accept_ib(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
+{
+ struct ib_cm_rep_param rep;
+ int ret;
+
+ ret = cma_modify_qp_rtr(id_priv, conn_param);
+ if (ret)
+ goto out;
+
+ ret = cma_modify_qp_rts(id_priv, conn_param);
+ if (ret)
+ goto out;
+
+ memset(&rep, 0, sizeof rep);
+ rep.qp_num = id_priv->qp_num;
+ rep.starting_psn = id_priv->seq_num;
+ rep.private_data = conn_param->private_data;
+ rep.private_data_len = conn_param->private_data_len;
+ rep.responder_resources = conn_param->responder_resources;
+ rep.initiator_depth = conn_param->initiator_depth;
+ rep.failover_accepted = 0;
+ rep.flow_control = conn_param->flow_control;
+ rep.rnr_retry_count = conn_param->rnr_retry_count;
+ rep.srq = id_priv->srq ? 1 : 0;
+
+ ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
+out:
+ return ret;
+}
+
+static int cma_accept_iw(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
+{
+ struct iw_cm_conn_param iw_param;
+ int ret;
+
+ ret = cma_modify_qp_rtr(id_priv, conn_param);
+ if (ret)
+ return ret;
+
+ iw_param.ord = conn_param->initiator_depth;
+ iw_param.ird = conn_param->responder_resources;
+ iw_param.private_data = conn_param->private_data;
+ iw_param.private_data_len = conn_param->private_data_len;
+ if (id_priv->id.qp) {
+ iw_param.qpn = id_priv->qp_num;
+ } else
+ iw_param.qpn = conn_param->qp_num;
+
+ return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
+}
+
+static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
+ enum ib_cm_sidr_status status,
+ const void *private_data, int private_data_len)
+{
+ struct ib_cm_sidr_rep_param rep;
+ int ret;
+
+ memset(&rep, 0, sizeof rep);
+ rep.status = status;
+ if (status == IB_SIDR_SUCCESS) {
+ ret = cma_set_qkey(id_priv);
+ if (ret)
+ return ret;
+ rep.qp_num = id_priv->qp_num;
+ rep.qkey = id_priv->qkey;
+ }
+ rep.private_data = private_data;
+ rep.private_data_len = private_data_len;
+
+ return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
+}
+
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp(id_priv, CMA_CONNECT))
+ return -EINVAL;
+
+ if (!id->qp && conn_param) {
+ id_priv->qp_num = conn_param->qp_num;
+ id_priv->srq = conn_param->srq;
+ }
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ if (cma_is_ud_ps(id->ps))
+ ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+ conn_param->private_data,
+ conn_param->private_data_len);
+ else if (conn_param)
+ ret = cma_accept_ib(id_priv, conn_param);
+ else
+ ret = cma_rep_recv(id_priv);
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ ret = cma_accept_iw(id_priv, conn_param);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ if (ret)
+ goto reject;
+
+ return 0;
+reject:
+ cma_modify_qp_err(id_priv);
+ rdma_reject(id, NULL, 0);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_accept);
+
+int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_has_cm_dev(id_priv))
+ return -EINVAL;
+
+ switch (id->device->node_type) {
+ case RDMA_NODE_IB_CA:
+ ret = ib_cm_notify(id_priv->cm_id.ib, event);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rdma_notify);
+
+int rdma_reject(struct rdma_cm_id *id, const void *private_data,
+ u8 private_data_len)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_has_cm_dev(id_priv))
+ return -EINVAL;
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ if (cma_is_ud_ps(id->ps))
+ ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
+ private_data, private_data_len);
+ else
+ ret = ib_send_cm_rej(id_priv->cm_id.ib,
+ IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, private_data, private_data_len);
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ ret = iw_cm_reject(id_priv->cm_id.iw,
+ private_data, private_data_len);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rdma_reject);
+
+int rdma_disconnect(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_has_cm_dev(id_priv))
+ return -EINVAL;
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ ret = cma_modify_qp_err(id_priv);
+ if (ret)
+ goto out;
+ /* Initiate or respond to a disconnect. */
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
+ ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+out:
+ return ret;
+}
+EXPORT_SYMBOL(rdma_disconnect);
+
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc = multicast->context;
+ struct rdma_cm_event event;
+ int ret;
+
+ id_priv = mc->id_priv;
+ if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
+ cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
+ return 0;
+
+ mutex_lock(&id_priv->qp_mutex);
+ if (!status && id_priv->id.qp)
+ status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+ multicast->rec.mlid);
+ mutex_unlock(&id_priv->qp_mutex);
+
+ memset(&event, 0, sizeof event);
+ event.status = status;
+ event.param.ud.private_data = mc->context;
+ if (!status) {
+ event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ ib_init_ah_from_mcmember(id_priv->id.device,
+ id_priv->id.port_num, &multicast->rec,
+ &event.param.ud.ah_attr);
+ event.param.ud.qp_num = 0xFFFFFF;
+ event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ } else
+ event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+ if (ret) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ mutex_unlock(&id_priv->handler_mutex);
+ rdma_destroy_id(&id_priv->id);
+ return 0;
+ }
+
+ mutex_unlock(&id_priv->handler_mutex);
+ return 0;
+}
+
+static void cma_set_mgid(struct rdma_id_private *id_priv,
+ struct sockaddr *addr, union ib_gid *mgid)
+{
+#if defined(INET) || defined(INET6)
+ unsigned char mc_map[MAX_ADDR_LEN];
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+#endif
+#ifdef INET
+ struct sockaddr_in *sin = (struct sockaddr_in *) addr;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
+#endif
+
+ if (cma_any_addr(addr)) {
+ memset(mgid, 0, sizeof *mgid);
+#ifdef INET6
+ } else if ((addr->sa_family == AF_INET6) &&
+ ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
+ 0xFF10A01B)) {
+ /* IPv6 address is an SA assigned MGID. */
+ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ } else if (addr->sa_family == AF_INET6) {
+ ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ *mgid = *(union ib_gid *) (mc_map + 4);
+#endif
+#ifdef INET
+ } else {
+ ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ *mgid = *(union ib_gid *) (mc_map + 4);
+#endif
+ }
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct ib_sa_mcmember_rec rec;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ ib_sa_comp_mask comp_mask;
+ int ret;
+
+ ib_addr_get_mgid(dev_addr, &rec.mgid);
+ ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+ &rec.mgid, &rec);
+ if (ret)
+ return ret;
+
+ cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ rdma_addr_get_sgid(dev_addr, &rec.port_gid);
+ rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+ rec.join_state = 1;
+
+ comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+ IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+ IB_SA_MCMEMBER_REC_FLOW_LABEL |
+ IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+ if (id_priv->id.ps == RDMA_PS_IPOIB)
+ comp_mask |= IB_SA_MCMEMBER_REC_RATE |
+ IB_SA_MCMEMBER_REC_RATE_SELECTOR;
+
+ mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec,
+ comp_mask, GFP_KERNEL,
+ cma_ib_mc_handler, mc);
+ if (IS_ERR(mc->multicast.ib))
+ return PTR_ERR(mc->multicast.ib);
+
+ return 0;
+}
+
+
+static void iboe_mcast_work_handler(struct work_struct *work)
+{
+ struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
+ struct cma_multicast *mc = mw->mc;
+ struct ib_sa_multicast *m = mc->multicast.ib;
+
+ mc->multicast.ib->context = mc;
+ cma_ib_mc_handler(0, m);
+ kref_put(&mc->mcref, release_mc);
+ kfree(mw);
+}
+
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
+
+ if (cma_any_addr(addr)) {
+ memset(mgid, 0, sizeof *mgid);
+ } else if (addr->sa_family == AF_INET6)
+ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ else {
+ mgid->raw[0] = 0xff;
+ mgid->raw[1] = 0x0e;
+ mgid->raw[2] = 0;
+ mgid->raw[3] = 0;
+ mgid->raw[4] = 0;
+ mgid->raw[5] = 0;
+ mgid->raw[6] = 0;
+ mgid->raw[7] = 0;
+ mgid->raw[8] = 0;
+ mgid->raw[9] = 0;
+ mgid->raw[10] = 0xff;
+ mgid->raw[11] = 0xff;
+ *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
+ }
+}
+
+static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct iboe_mcast_work *work;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ int err;
+ struct sockaddr *addr = (struct sockaddr *)&mc->addr;
+ struct net_device *ndev = NULL;
+
+ if (cma_zero_addr((struct sockaddr *)&mc->addr))
+ return -EINVAL;
+
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
+ if (!mc->multicast.ib) {
+ err = -ENOMEM;
+ goto out1;
+ }
+
+ cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
+
+ mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!ndev) {
+ err = -ENODEV;
+ goto out2;
+ }
+
+ mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
+ mc->multicast.ib->rec.hop_limit = 1;
+#ifdef __linux__
+ mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+#else
+ mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu);
+#endif
+ dev_put(ndev);
+ if (!mc->multicast.ib->rec.mtu) {
+ err = -EINVAL;
+ goto out2;
+ }
+ iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
+ work->id = id_priv;
+ work->mc = mc;
+ INIT_WORK(&work->work, iboe_mcast_work_handler);
+ kref_get(&mc->mcref);
+ queue_work(cma_wq, &work->work);
+
+ return 0;
+
+out2:
+ kfree(mc->multicast.ib);
+out1:
+ kfree(work);
+ return err;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ return -EINVAL;
+
+ mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ if (!mc)
+ return -ENOMEM;
+
+ memcpy(&mc->addr, addr, ip_addr_size(addr));
+ mc->context = context;
+ mc->id_priv = id_priv;
+
+ spin_lock(&id_priv->lock);
+ list_add(&mc->list, &id_priv->mc_list);
+ spin_unlock(&id_priv->lock);
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ switch (rdma_port_get_link_layer(id->device, id->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ret = cma_join_ib_multicast(id_priv, mc);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ kref_init(&mc->mcref);
+ ret = cma_iboe_join_multicast(id_priv, mc);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ if (ret) {
+ spin_lock_irq(&id_priv->lock);
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+ kfree(mc);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(rdma_join_multicast);
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irq(&id_priv->lock);
+ list_for_each_entry(mc, &id_priv->mc_list, list) {
+ if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ if (id->qp)
+ ib_detach_mcast(id->qp,
+ &mc->multicast.ib->rec.mgid,
+ mc->multicast.ib->rec.mlid);
+ if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
+ switch (rdma_port_get_link_layer(id->device, id->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ kref_put(&mc->mcref, release_mc);
+ break;
+ default:
+ break;
+ }
+ }
+ return;
+ }
+ }
+ spin_unlock_irq(&id_priv->lock);
+}
+EXPORT_SYMBOL(rdma_leave_multicast);
+
+static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
+{
+ struct rdma_dev_addr *dev_addr;
+ struct cma_ndev_work *work;
+
+ dev_addr = &id_priv->id.route.addr.dev_addr;
+
+#ifdef __linux__
+ if ((dev_addr->bound_dev_if == ndev->ifindex) &&
+ memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
+ printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
+ ndev->name, &id_priv->id);
+#else
+ if ((dev_addr->bound_dev_if == ndev->if_index) &&
+ memcmp(dev_addr->src_dev_addr, IF_LLADDR(ndev), ndev->if_addrlen)) {
+ printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
+ ndev->if_xname, &id_priv->id);
+#endif
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ INIT_WORK(&work->work, cma_ndev_work_handler);
+ work->id = id_priv;
+ work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
+ atomic_inc(&id_priv->refcount);
+ queue_work(cma_wq, &work->work);
+ }
+
+ return 0;
+}
+
+static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
+ void *ctx)
+{
+ struct net_device *ndev = (struct net_device *)ctx;
+ struct cma_device *cma_dev;
+ struct rdma_id_private *id_priv;
+ int ret = NOTIFY_DONE;
+
+#ifdef __linux__
+ if (dev_net(ndev) != &init_net)
+ return NOTIFY_DONE;
+
+ if (event != NETDEV_BONDING_FAILOVER)
+ return NOTIFY_DONE;
+
+ if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+ return NOTIFY_DONE;
+#else
+ if (event != NETDEV_DOWN && event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+#endif
+
+ mutex_lock(&lock);
+ list_for_each_entry(cma_dev, &dev_list, list)
+ list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+ ret = cma_netdev_change(ndev, id_priv);
+ if (ret)
+ goto out;
+ }
+
+out:
+ mutex_unlock(&lock);
+ return ret;
+}
+
+static struct notifier_block cma_nb = {
+ .notifier_call = cma_netdev_callback
+};
+
+static void cma_add_one(struct ib_device *device)
+{
+ struct cma_device *cma_dev;
+ struct rdma_id_private *id_priv;
+
+ cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
+ if (!cma_dev)
+ return;
+
+ cma_dev->device = device;
+
+ init_completion(&cma_dev->comp);
+ atomic_set(&cma_dev->refcount, 1);
+ INIT_LIST_HEAD(&cma_dev->id_list);
+ ib_set_client_data(device, &cma_client, cma_dev);
+
+ mutex_lock(&lock);
+ list_add_tail(&cma_dev->list, &dev_list);
+ list_for_each_entry(id_priv, &listen_any_list, list)
+ cma_listen_on_dev(id_priv, cma_dev);
+ mutex_unlock(&lock);
+}
+
+static int cma_remove_id_dev(struct rdma_id_private *id_priv)
+{
+ struct rdma_cm_event event;
+ enum cma_state state;
+ int ret = 0;
+
+ /* Record that we want to remove the device */
+ state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
+ if (state == CMA_DESTROYING)
+ return 0;
+
+ cma_cancel_operation(id_priv, state);
+ mutex_lock(&id_priv->handler_mutex);
+
+ /* Check for destruction from another callback. */
+ if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
+ goto out;
+
+ memset(&event, 0, sizeof event);
+ event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+out:
+ mutex_unlock(&id_priv->handler_mutex);
+ return ret;
+}
+
+static void cma_process_remove(struct cma_device *cma_dev)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ mutex_lock(&lock);
+ while (!list_empty(&cma_dev->id_list)) {
+ id_priv = list_entry(cma_dev->id_list.next,
+ struct rdma_id_private, list);
+
+ list_del(&id_priv->listen_list);
+ list_del_init(&id_priv->list);
+ atomic_inc(&id_priv->refcount);
+ mutex_unlock(&lock);
+
+ ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
+ cma_deref_id(id_priv);
+ if (ret)
+ rdma_destroy_id(&id_priv->id);
+
+ mutex_lock(&lock);
+ }
+ mutex_unlock(&lock);
+
+ cma_deref_dev(cma_dev);
+ wait_for_completion(&cma_dev->comp);
+}
+
+static void cma_remove_one(struct ib_device *device)
+{
+ struct cma_device *cma_dev;
+
+ cma_dev = ib_get_client_data(device, &cma_client);
+ if (!cma_dev)
+ return;
+
+ mutex_lock(&lock);
+ list_del(&cma_dev->list);
+ mutex_unlock(&lock);
+
+ cma_process_remove(cma_dev);
+ kfree(cma_dev);
+}
+
+static int cma_init(void)
+{
+ int ret;
+#if defined(INET)
+ int low, high, remaining;
+
+ get_random_bytes(&next_port, sizeof next_port);
+ inet_get_local_port_range(&low, &high);
+ remaining = (high - low) + 1;
+ next_port = ((unsigned int) next_port % remaining) + low;
+#endif
+
+ cma_wq = create_singlethread_workqueue("rdma_cm");
+ if (!cma_wq)
+ return -ENOMEM;
+
+ ib_sa_register_client(&sa_client);
+ rdma_addr_register_client(&addr_client);
+ register_netdevice_notifier(&cma_nb);
+
+ ret = ib_register_client(&cma_client);
+ if (ret)
+ goto err;
+ return 0;
+
+err:
+ unregister_netdevice_notifier(&cma_nb);
+ rdma_addr_unregister_client(&addr_client);
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(cma_wq);
+ return ret;
+}
+
+static void cma_cleanup(void)
+{
+ ib_unregister_client(&cma_client);
+ unregister_netdevice_notifier(&cma_nb);
+ rdma_addr_unregister_client(&addr_client);
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(cma_wq);
+ idr_destroy(&sdp_ps);
+ idr_destroy(&tcp_ps);
+ idr_destroy(&udp_ps);
+ idr_destroy(&ipoib_ps);
+}
+
+module_init(cma_init);
+module_exit(cma_cleanup);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/cma.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/core_priv.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/core_priv.h (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/core_priv.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CORE_PRIV_H
+#define _CORE_PRIV_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <rdma/ib_verbs.h>
+
+int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *,
+ u8, struct kobject *));
+void ib_device_unregister_sysfs(struct ib_device *device);
+
+int ib_sysfs_setup(void);
+void ib_sysfs_cleanup(void);
+
+int ib_cache_setup(void);
+void ib_cache_cleanup(void);
+
+#endif /* _CORE_PRIV_H */
Property changes on: trunk/sys/ofed/drivers/infiniband/core/core_priv.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/device.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/device.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/device.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,771 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+
+#include "core_priv.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("core kernel InfiniBand API");
+MODULE_LICENSE("Dual BSD/GPL");
+
+#ifdef __ia64__
+/* workaround for a bug in hp chipset that would cause kernel
+ panic when dma resources are exhaused */
+int dma_map_sg_hp_wa = 0;
+#endif
+
+struct ib_client_data {
+ struct list_head list;
+ struct ib_client *client;
+ void * data;
+};
+
+static LIST_HEAD(device_list);
+static LIST_HEAD(client_list);
+
+/*
+ * device_mutex protects access to both device_list and client_list.
+ * There's no real point to using multiple locks or something fancier
+ * like an rwsem: we always access both lists, and we're always
+ * modifying one list or the other list. In any case this is not a
+ * hot path so there's no point in trying to optimize.
+ */
+static DEFINE_MUTEX(device_mutex);
+
+static int ib_device_check_mandatory(struct ib_device *device)
+{
+#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
+ static const struct {
+ size_t offset;
+ char *name;
+ } mandatory_table[] = {
+ IB_MANDATORY_FUNC(query_device),
+ IB_MANDATORY_FUNC(query_port),
+ IB_MANDATORY_FUNC(query_pkey),
+ IB_MANDATORY_FUNC(query_gid),
+ IB_MANDATORY_FUNC(alloc_pd),
+ IB_MANDATORY_FUNC(dealloc_pd),
+ IB_MANDATORY_FUNC(create_ah),
+ IB_MANDATORY_FUNC(destroy_ah),
+ IB_MANDATORY_FUNC(create_qp),
+ IB_MANDATORY_FUNC(modify_qp),
+ IB_MANDATORY_FUNC(destroy_qp),
+ IB_MANDATORY_FUNC(post_send),
+ IB_MANDATORY_FUNC(post_recv),
+ IB_MANDATORY_FUNC(create_cq),
+ IB_MANDATORY_FUNC(destroy_cq),
+ IB_MANDATORY_FUNC(poll_cq),
+ IB_MANDATORY_FUNC(req_notify_cq),
+ IB_MANDATORY_FUNC(get_dma_mr),
+ IB_MANDATORY_FUNC(dereg_mr)
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
+ if (!*(void **) ((u_char *) device + mandatory_table[i].offset)) {
+ printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
+ device->name, mandatory_table[i].name);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static struct ib_device *__ib_device_get_by_name(const char *name)
+{
+ struct ib_device *device;
+
+ list_for_each_entry(device, &device_list, core_list)
+ if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
+ return device;
+
+ return NULL;
+}
+
+
+static int alloc_name(char *name)
+{
+ unsigned long *inuse;
+ char buf[IB_DEVICE_NAME_MAX];
+ struct ib_device *device;
+ int i;
+
+ inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!inuse)
+ return -ENOMEM;
+
+ list_for_each_entry(device, &device_list, core_list) {
+ if (!sscanf(device->name, name, &i))
+ continue;
+ if (i < 0 || i >= PAGE_SIZE * 8)
+ continue;
+ snprintf(buf, sizeof buf, name, i);
+ if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
+ set_bit(i, inuse);
+ }
+
+ i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
+ free_page((unsigned long) inuse);
+ snprintf(buf, sizeof buf, name, i);
+
+ if (__ib_device_get_by_name(buf))
+ return -ENFILE;
+
+ strlcpy(name, buf, IB_DEVICE_NAME_MAX);
+ return 0;
+}
+
+static int start_port(struct ib_device *device)
+{
+ return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
+}
+
+
+static int end_port(struct ib_device *device)
+{
+ return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+ 0 : device->phys_port_cnt;
+}
+
+/**
+ * ib_alloc_device - allocate an IB device struct
+ * @size:size of structure to allocate
+ *
+ * Low-level drivers should use ib_alloc_device() to allocate &struct
+ * ib_device. @size is the size of the structure to be allocated,
+ * including any private data used by the low-level driver.
+ * ib_dealloc_device() must be used to free structures allocated with
+ * ib_alloc_device().
+ */
+struct ib_device *ib_alloc_device(size_t size)
+{
+ BUG_ON(size < sizeof (struct ib_device));
+
+ return kzalloc(size, GFP_KERNEL);
+}
+EXPORT_SYMBOL(ib_alloc_device);
+
+/**
+ * ib_dealloc_device - free an IB device struct
+ * @device:structure to free
+ *
+ * Free a structure allocated with ib_alloc_device().
+ */
+void ib_dealloc_device(struct ib_device *device)
+{
+ if (device->reg_state == IB_DEV_UNINITIALIZED) {
+ kfree(device);
+ return;
+ }
+
+ BUG_ON(device->reg_state != IB_DEV_UNREGISTERED);
+
+ kobject_put(&device->dev.kobj);
+}
+EXPORT_SYMBOL(ib_dealloc_device);
+
+static int add_client_context(struct ib_device *device, struct ib_client *client)
+{
+ struct ib_client_data *context;
+ unsigned long flags;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context) {
+ printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n",
+ device->name, client->name);
+ return -ENOMEM;
+ }
+
+ context->client = client;
+ context->data = NULL;
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_add(&context->list, &device->client_data_list);
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+
+ return 0;
+}
+
+static int read_port_table_lengths(struct ib_device *device)
+{
+ struct ib_port_attr *tprops = NULL;
+ int num_ports, ret = -ENOMEM;
+ u8 port_index;
+
+ tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
+ if (!tprops)
+ goto out;
+
+ num_ports = end_port(device) - start_port(device) + 1;
+
+ device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports,
+ GFP_KERNEL);
+ device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports,
+ GFP_KERNEL);
+ if (!device->pkey_tbl_len || !device->gid_tbl_len)
+ goto err;
+
+ for (port_index = 0; port_index < num_ports; ++port_index) {
+ ret = ib_query_port(device, port_index + start_port(device),
+ tprops);
+ if (ret)
+ goto err;
+ device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
+ device->gid_tbl_len[port_index] = tprops->gid_tbl_len;
+ }
+
+ ret = 0;
+ goto out;
+
+err:
+ kfree(device->gid_tbl_len);
+ kfree(device->pkey_tbl_len);
+out:
+ kfree(tprops);
+ return ret;
+}
+
+/**
+ * ib_register_device - Register an IB device with IB core
+ * @device:Device to register
+ *
+ * Low-level drivers use ib_register_device() to register their
+ * devices with the IB core. All registered clients will receive a
+ * callback for each device that is added. @device must be allocated
+ * with ib_alloc_device().
+ */
+int ib_register_device(struct ib_device *device,
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *))
+{
+ int ret;
+
+ mutex_lock(&device_mutex);
+
+ if (strchr(device->name, '%')) {
+ ret = alloc_name(device->name);
+ if (ret)
+ goto out;
+ }
+
+ if (ib_device_check_mandatory(device)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&device->event_handler_list);
+ INIT_LIST_HEAD(&device->client_data_list);
+ spin_lock_init(&device->event_handler_lock);
+ spin_lock_init(&device->client_data_lock);
+ device->ib_uverbs_xrcd_table = RB_ROOT;
+ mutex_init(&device->xrcd_table_mutex);
+
+ ret = read_port_table_lengths(device);
+ if (ret) {
+ printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n",
+ device->name);
+ goto out;
+ }
+
+ ret = ib_device_register_sysfs(device, port_callback);
+ if (ret) {
+ printk(KERN_WARNING "Couldn't register device %s with driver model\n",
+ device->name);
+ kfree(device->gid_tbl_len);
+ kfree(device->pkey_tbl_len);
+ goto out;
+ }
+
+ list_add_tail(&device->core_list, &device_list);
+
+ device->reg_state = IB_DEV_REGISTERED;
+
+ {
+ struct ib_client *client;
+
+ list_for_each_entry(client, &client_list, list)
+ if (client->add && !add_client_context(device, client))
+ client->add(device);
+ }
+
+ out:
+ mutex_unlock(&device_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ib_register_device);
+
+/**
+ * ib_unregister_device - Unregister an IB device
+ * @device:Device to unregister
+ *
+ * Unregister an IB device. All clients will receive a remove callback.
+ */
+void ib_unregister_device(struct ib_device *device)
+{
+ struct ib_client *client;
+ struct ib_client_data *context, *tmp;
+ unsigned long flags;
+
+ mutex_lock(&device_mutex);
+
+ list_for_each_entry_reverse(client, &client_list, list)
+ if (client->remove)
+ client->remove(device);
+
+ list_del(&device->core_list);
+
+ kfree(device->gid_tbl_len);
+ kfree(device->pkey_tbl_len);
+
+ mutex_unlock(&device_mutex);
+
+ ib_device_unregister_sysfs(device);
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+ kfree(context);
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+
+ device->reg_state = IB_DEV_UNREGISTERED;
+}
+EXPORT_SYMBOL(ib_unregister_device);
+
+/**
+ * ib_register_client - Register an IB client
+ * @client:Client to register
+ *
+ * Upper level users of the IB drivers can use ib_register_client() to
+ * register callbacks for IB device addition and removal. When an IB
+ * device is added, each registered client's add method will be called
+ * (in the order the clients were registered), and when a device is
+ * removed, each client's remove method will be called (in the reverse
+ * order that clients were registered). In addition, when
+ * ib_register_client() is called, the client will receive an add
+ * callback for all devices already registered.
+ */
+int ib_register_client(struct ib_client *client)
+{
+ struct ib_device *device;
+
+ mutex_lock(&device_mutex);
+
+ list_add_tail(&client->list, &client_list);
+ list_for_each_entry(device, &device_list, core_list)
+ if (client->add && !add_client_context(device, client))
+ client->add(device);
+
+ mutex_unlock(&device_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_register_client);
+
+/**
+ * ib_unregister_client - Unregister an IB client
+ * @client:Client to unregister
+ *
+ * Upper level users use ib_unregister_client() to remove their client
+ * registration. When ib_unregister_client() is called, the client
+ * will receive a remove callback for each IB device still registered.
+ */
+void ib_unregister_client(struct ib_client *client)
+{
+ struct ib_client_data *context, *tmp;
+ struct ib_device *device;
+ unsigned long flags;
+
+ mutex_lock(&device_mutex);
+
+ list_for_each_entry(device, &device_list, core_list) {
+ if (client->remove)
+ client->remove(device);
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+ if (context->client == client) {
+ list_del(&context->list);
+ kfree(context);
+ }
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+ }
+ list_del(&client->list);
+
+ mutex_unlock(&device_mutex);
+}
+EXPORT_SYMBOL(ib_unregister_client);
+
+/**
+ * ib_get_client_data - Get IB client context
+ * @device:Device to get context for
+ * @client:Client to get context for
+ *
+ * ib_get_client_data() returns client context set with
+ * ib_set_client_data().
+ */
+void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
+{
+ struct ib_client_data *context;
+ void *ret = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_for_each_entry(context, &device->client_data_list, list)
+ if (context->client == client) {
+ ret = context->data;
+ break;
+ }
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_client_data);
+
+/**
+ * ib_set_client_data - Set IB client context
+ * @device:Device to set context for
+ * @client:Client to set context for
+ * @data:Context to set
+ *
+ * ib_set_client_data() sets client context that can be retrieved with
+ * ib_get_client_data().
+ */
+void ib_set_client_data(struct ib_device *device, struct ib_client *client,
+ void *data)
+{
+ struct ib_client_data *context;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_for_each_entry(context, &device->client_data_list, list)
+ if (context->client == client) {
+ context->data = data;
+ goto out;
+ }
+
+ printk(KERN_WARNING "No client context found for %s/%s\n",
+ device->name, client->name);
+
+out:
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+}
+EXPORT_SYMBOL(ib_set_client_data);
+
+/**
+ * ib_register_event_handler - Register an IB event handler
+ * @event_handler:Handler to register
+ *
+ * ib_register_event_handler() registers an event handler that will be
+ * called back when asynchronous IB events occur (as defined in
+ * chapter 11 of the InfiniBand Architecture Specification). This
+ * callback may occur in interrupt context.
+ */
+int ib_register_event_handler (struct ib_event_handler *event_handler)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ list_add_tail(&event_handler->list,
+ &event_handler->device->event_handler_list);
+ spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_register_event_handler);
+
+/**
+ * ib_unregister_event_handler - Unregister an event handler
+ * @event_handler:Handler to unregister
+ *
+ * Unregister an event handler registered with
+ * ib_register_event_handler().
+ */
+int ib_unregister_event_handler(struct ib_event_handler *event_handler)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ list_del(&event_handler->list);
+ spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_unregister_event_handler);
+
+/**
+ * ib_dispatch_event - Dispatch an asynchronous event
+ * @event:Event to dispatch
+ *
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
+ * event to all registered event handlers when an asynchronous event
+ * occurs.
+ */
+void ib_dispatch_event(struct ib_event *event)
+{
+ unsigned long flags;
+ struct ib_event_handler *handler;
+
+ spin_lock_irqsave(&event->device->event_handler_lock, flags);
+
+ list_for_each_entry(handler, &event->device->event_handler_list, list)
+ handler->handler(handler, event);
+
+ spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
+}
+EXPORT_SYMBOL(ib_dispatch_event);
+
+/**
+ * ib_query_device - Query IB device attributes
+ * @device:Device to query
+ * @device_attr:Device attributes
+ *
+ * ib_query_device() returns the attributes of a device through the
+ * @device_attr pointer.
+ */
+int ib_query_device(struct ib_device *device,
+ struct ib_device_attr *device_attr)
+{
+ return device->query_device(device, device_attr);
+}
+EXPORT_SYMBOL(ib_query_device);
+
+/**
+ * ib_query_port - Query IB port attributes
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @port_attr:Port attributes
+ *
+ * ib_query_port() returns the attributes of a port through the
+ * @port_attr pointer.
+ */
+int ib_query_port(struct ib_device *device,
+ u8 port_num,
+ struct ib_port_attr *port_attr)
+{
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ return device->query_port(device, port_num, port_attr);
+}
+EXPORT_SYMBOL(ib_query_port);
+
+/**
+ * ib_query_gid - Get GID table entry
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @index:GID table index to query
+ * @gid:Returned GID
+ *
+ * ib_query_gid() fetches the specified GID table entry.
+ */
+int ib_query_gid(struct ib_device *device,
+ u8 port_num, int index, union ib_gid *gid)
+{
+ return device->query_gid(device, port_num, index, gid);
+}
+EXPORT_SYMBOL(ib_query_gid);
+
+/**
+ * ib_query_pkey - Get P_Key table entry
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @index:P_Key table index to query
+ * @pkey:Returned P_Key
+ *
+ * ib_query_pkey() fetches the specified P_Key table entry.
+ */
+int ib_query_pkey(struct ib_device *device,
+ u8 port_num, u16 index, u16 *pkey)
+{
+ return device->query_pkey(device, port_num, index, pkey);
+}
+EXPORT_SYMBOL(ib_query_pkey);
+
+/**
+ * ib_modify_device - Change IB device attributes
+ * @device:Device to modify
+ * @device_modify_mask:Mask of attributes to change
+ * @device_modify:New attribute values
+ *
+ * ib_modify_device() changes a device's attributes as specified by
+ * the @device_modify_mask and @device_modify structure.
+ */
+int ib_modify_device(struct ib_device *device,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ return device->modify_device(device, device_modify_mask,
+ device_modify);
+}
+EXPORT_SYMBOL(ib_modify_device);
+
+/**
+ * ib_modify_port - Modifies the attributes for the specified port.
+ * @device: The device to modify.
+ * @port_num: The number of the port to modify.
+ * @port_modify_mask: Mask used to specify which attributes of the port
+ * to change.
+ * @port_modify: New attribute values for the port.
+ *
+ * ib_modify_port() changes a port's attributes as specified by the
+ * @port_modify_mask and @port_modify structure.
+ */
+int ib_modify_port(struct ib_device *device,
+ u8 port_num, int port_modify_mask,
+ struct ib_port_modify *port_modify)
+{
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ return device->modify_port(device, port_num, port_modify_mask,
+ port_modify);
+}
+EXPORT_SYMBOL(ib_modify_port);
+
+/**
+ * ib_find_gid - Returns the port number and GID table index where
+ * a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the GID table where the GID was found. This
+ * parameter may be NULL.
+ */
+int ib_find_gid(struct ib_device *device, union ib_gid *gid,
+ u8 *port_num, u16 *index)
+{
+ union ib_gid tmp_gid;
+ int ret, port, i;
+
+ for (port = start_port(device); port <= end_port(device); ++port) {
+ for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
+ ret = ib_query_gid(device, port, i, &tmp_gid);
+ if (ret)
+ return ret;
+ if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
+ *port_num = port;
+ if (index)
+ *index = i;
+ return 0;
+ }
+ }
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(ib_find_gid);
+
+/**
+ * ib_find_pkey - Returns the PKey table index where a specified
+ * PKey value occurs.
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the PKey table where the PKey was found.
+ */
+int ib_find_pkey(struct ib_device *device,
+ u8 port_num, u16 pkey, u16 *index)
+{
+ int ret, i;
+ u16 tmp_pkey;
+
+ for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
+ ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
+ if (ret)
+ return ret;
+
+ if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
+ *index = i;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(ib_find_pkey);
+
+static int __init ib_core_init(void)
+{
+ int ret;
+
+#ifdef __ia64__
+ if (ia64_platform_is("hpzx1"))
+ dma_map_sg_hp_wa = 1;
+#endif
+
+ ret = ib_sysfs_setup();
+ if (ret)
+ printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
+
+ ret = ib_cache_setup();
+ if (ret) {
+ printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
+ ib_sysfs_cleanup();
+ }
+
+ return ret;
+}
+
+static void __exit ib_core_cleanup(void)
+{
+ ib_cache_cleanup();
+ ib_sysfs_cleanup();
+ /* Make sure that any pending umem accounting work is done. */
+ flush_scheduled_work();
+}
+
+module_init(ib_core_init);
+module_exit(ib_core_cleanup);
+
+#undef MODULE_VERSION
+#include <sys/module.h>
+static int
+ibcore_evhand(module_t mod, int event, void *arg)
+{
+ return (0);
+}
+
+static moduledata_t ibcore_mod = {
+ .name = "ibcore",
+ .evhand = ibcore_evhand,
+};
+
+MODULE_VERSION(ibcore, 1);
+DECLARE_MODULE(ibcore, ibcore_mod, SI_SUB_SMP, SI_ORDER_ANY);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/device.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/fmr_pool.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/fmr_pool.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/fmr_pool.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,544 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/jhash.h>
+#include <linux/kthread.h>
+
+#include <rdma/ib_fmr_pool.h>
+
+#include "core_priv.h"
+
+#define PFX "fmr_pool: "
+
+enum {
+ IB_FMR_MAX_REMAPS = 32,
+
+ IB_FMR_HASH_BITS = 8,
+ IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
+ IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
+};
+
+/*
+ * If an FMR is not in use, then the list member will point to either
+ * its pool's free_list (if the FMR can be mapped again; that is,
+ * remap_count < pool->max_remaps) or its pool's dirty_list (if the
+ * FMR needs to be unmapped before being remapped). In either of
+ * these cases it is a bug if the ref_count is not 0. In other words,
+ * if ref_count is > 0, then the list member must not be linked into
+ * either free_list or dirty_list.
+ *
+ * The cache_node member is used to link the FMR into a cache bucket
+ * (if caching is enabled). This is independent of the reference
+ * count of the FMR. When a valid FMR is released, its ref_count is
+ * decremented, and if ref_count reaches 0, the FMR is placed in
+ * either free_list or dirty_list as appropriate. However, it is not
+ * removed from the cache and may be "revived" if a call to
+ * ib_fmr_register_physical() occurs before the FMR is remapped. In
+ * this case we just increment the ref_count and remove the FMR from
+ * free_list/dirty_list.
+ *
+ * Before we remap an FMR from free_list, we remove it from the cache
+ * (to prevent another user from obtaining a stale FMR). When an FMR
+ * is released, we add it to the tail of the free list, so that our
+ * cache eviction policy is "least recently used."
+ *
+ * All manipulation of ref_count, list and cache_node is protected by
+ * pool_lock to maintain consistency.
+ */
+
+struct ib_fmr_pool {
+ spinlock_t pool_lock;
+
+ int pool_size;
+ int max_pages;
+ int max_remaps;
+ int dirty_watermark;
+ int dirty_len;
+ struct list_head free_list;
+ struct list_head dirty_list;
+ struct hlist_head *cache_bucket;
+
+ void (*flush_function)(struct ib_fmr_pool *pool,
+ void * arg);
+ void *flush_arg;
+
+ struct task_struct *thread;
+
+ atomic_t req_ser;
+ atomic_t flush_ser;
+
+ wait_queue_head_t force_wait;
+};
+
+static inline u32 ib_fmr_hash(u64 first_page)
+{
+ return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &
+ (IB_FMR_HASH_SIZE - 1);
+}
+
+/* Caller must hold pool_lock */
+static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
+ u64 *page_list,
+ int page_list_len,
+ u64 io_virtual_address)
+{
+ struct hlist_head *bucket;
+ struct ib_pool_fmr *fmr;
+ struct hlist_node *pos;
+
+ if (!pool->cache_bucket)
+ return NULL;
+
+ bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
+
+ hlist_for_each_entry(fmr, pos, bucket, cache_node)
+ if (io_virtual_address == fmr->io_virtual_address &&
+ page_list_len == fmr->page_list_len &&
+ !memcmp(page_list, fmr->page_list,
+ page_list_len * sizeof *page_list))
+ return fmr;
+
+ return NULL;
+}
+
+static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
+{
+ int ret;
+ struct ib_pool_fmr *fmr;
+ LIST_HEAD(unmap_list);
+ LIST_HEAD(fmr_list);
+
+ spin_lock_irq(&pool->pool_lock);
+
+ list_for_each_entry(fmr, &pool->dirty_list, list) {
+ hlist_del_init(&fmr->cache_node);
+ fmr->remap_count = 0;
+ list_add_tail(&fmr->fmr->list, &fmr_list);
+
+#ifdef DEBUG
+ if (fmr->ref_count !=0) {
+ printk(KERN_WARNING PFX "Unmapping FMR %p with ref count %d\n",
+ fmr, fmr->ref_count);
+ }
+#endif
+ }
+
+ list_splice_init(&pool->dirty_list, &unmap_list);
+ pool->dirty_len = 0;
+
+ spin_unlock_irq(&pool->pool_lock);
+
+ if (list_empty(&unmap_list)) {
+ return;
+ }
+
+ ret = ib_unmap_fmr(&fmr_list);
+ if (ret)
+ printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);
+
+ spin_lock_irq(&pool->pool_lock);
+ list_splice(&unmap_list, &pool->free_list);
+ spin_unlock_irq(&pool->pool_lock);
+}
+
+static int ib_fmr_cleanup_thread(void *pool_ptr)
+{
+ struct ib_fmr_pool *pool = pool_ptr;
+
+ do {
+ if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
+ ib_fmr_batch_release(pool);
+
+ atomic_inc(&pool->flush_ser);
+ wake_up_interruptible(&pool->force_wait);
+
+ if (pool->flush_function)
+ pool->flush_function(pool, pool->flush_arg);
+ }
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
+ !kthread_should_stop())
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ } while (!kthread_should_stop());
+
+ return 0;
+}
+
+/**
+ * ib_create_fmr_pool - Create an FMR pool
+ * @pd:Protection domain for FMRs
+ * @params:FMR pool parameters
+ *
+ * Create a pool of FMRs. Return value is pointer to new pool or
+ * error code if creation failed.
+ */
+struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
+ struct ib_fmr_pool_param *params)
+{
+ struct ib_device *device;
+ struct ib_fmr_pool *pool;
+ struct ib_device_attr *attr;
+ int i;
+ int ret;
+ int max_remaps;
+
+ if (!params)
+ return ERR_PTR(-EINVAL);
+
+ device = pd->device;
+ if (!device->alloc_fmr || !device->dealloc_fmr ||
+ !device->map_phys_fmr || !device->unmap_fmr) {
+ printk(KERN_INFO PFX "Device %s does not support FMRs\n",
+ device->name);
+ return ERR_PTR(-ENOSYS);
+ }
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr) {
+ printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = ib_query_device(device, attr);
+ if (ret) {
+ printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
+ kfree(attr);
+ return ERR_PTR(ret);
+ }
+
+ if (!attr->max_map_per_fmr)
+ max_remaps = IB_FMR_MAX_REMAPS;
+ else
+ max_remaps = attr->max_map_per_fmr;
+
+ kfree(attr);
+
+ pool = kmalloc(sizeof *pool, GFP_KERNEL);
+ if (!pool) {
+ printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ pool->cache_bucket = NULL;
+
+ pool->flush_function = params->flush_function;
+ pool->flush_arg = params->flush_arg;
+
+ INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->dirty_list);
+
+ if (params->cache) {
+ pool->cache_bucket =
+ kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
+ GFP_KERNEL);
+ if (!pool->cache_bucket) {
+ printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");
+ ret = -ENOMEM;
+ goto out_free_pool;
+ }
+
+ for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
+ INIT_HLIST_HEAD(pool->cache_bucket + i);
+ }
+
+ pool->pool_size = 0;
+ pool->max_pages = params->max_pages_per_fmr;
+ pool->max_remaps = max_remaps;
+ pool->dirty_watermark = params->dirty_watermark;
+ pool->dirty_len = 0;
+ spin_lock_init(&pool->pool_lock);
+ atomic_set(&pool->req_ser, 0);
+ atomic_set(&pool->flush_ser, 0);
+ init_waitqueue_head(&pool->force_wait);
+
+ pool->thread = kthread_run(ib_fmr_cleanup_thread,
+ pool,
+ "ib_fmr(%s)",
+ device->name);
+ if (IS_ERR(pool->thread)) {
+ printk(KERN_WARNING PFX "couldn't start cleanup thread\n");
+ ret = PTR_ERR(pool->thread);
+ goto out_free_pool;
+ }
+
+ {
+ struct ib_pool_fmr *fmr;
+ struct ib_fmr_attr fmr_attr = {
+ .max_pages = params->max_pages_per_fmr,
+ .max_maps = pool->max_remaps,
+ .page_shift = params->page_shift
+ };
+ int bytes_per_fmr = sizeof *fmr;
+
+ if (pool->cache_bucket)
+ bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
+
+ for (i = 0; i < params->pool_size; ++i) {
+ fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
+ if (!fmr) {
+ printk(KERN_WARNING PFX "failed to allocate fmr "
+ "struct for FMR %d\n", i);
+ goto out_fail;
+ }
+
+ fmr->pool = pool;
+ fmr->remap_count = 0;
+ fmr->ref_count = 0;
+ INIT_HLIST_NODE(&fmr->cache_node);
+
+ fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
+ if (IS_ERR(fmr->fmr)) {
+ printk(KERN_WARNING PFX "fmr_create failed "
+ "for FMR %d\n", i);
+ kfree(fmr);
+ goto out_fail;
+ }
+
+ list_add_tail(&fmr->list, &pool->free_list);
+ ++pool->pool_size;
+ }
+ }
+
+ return pool;
+
+ out_free_pool:
+ kfree(pool->cache_bucket);
+ kfree(pool);
+
+ return ERR_PTR(ret);
+
+ out_fail:
+ ib_destroy_fmr_pool(pool);
+
+ return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL(ib_create_fmr_pool);
+
+/**
+ * ib_destroy_fmr_pool - Free FMR pool
+ * @pool:FMR pool to free
+ *
+ * Destroy an FMR pool and free all associated resources.
+ */
+void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
+{
+ struct ib_pool_fmr *fmr;
+ struct ib_pool_fmr *tmp;
+ LIST_HEAD(fmr_list);
+ int i;
+
+ kthread_stop(pool->thread);
+ ib_fmr_batch_release(pool);
+
+ i = 0;
+ list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
+ if (fmr->remap_count) {
+ INIT_LIST_HEAD(&fmr_list);
+ list_add_tail(&fmr->fmr->list, &fmr_list);
+ ib_unmap_fmr(&fmr_list);
+ }
+ ib_dealloc_fmr(fmr->fmr);
+ list_del(&fmr->list);
+ kfree(fmr);
+ ++i;
+ }
+
+ if (i < pool->pool_size)
+ printk(KERN_WARNING PFX "pool still has %d regions registered\n",
+ pool->pool_size - i);
+
+ kfree(pool->cache_bucket);
+ kfree(pool);
+}
+EXPORT_SYMBOL(ib_destroy_fmr_pool);
+
+/**
+ * ib_flush_fmr_pool - Invalidate all unmapped FMRs
+ * @pool:FMR pool to flush
+ *
+ * Ensure that all unmapped FMRs are fully invalidated.
+ */
+int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
+{
+ int serial;
+ struct ib_pool_fmr *fmr, *next;
+
+ /*
+ * The free_list holds FMRs that may have been used
+ * but have not been remapped enough times to be dirty.
+ * Put them on the dirty list now so that the cleanup
+ * thread will reap them too.
+ */
+ spin_lock_irq(&pool->pool_lock);
+ list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
+ if (fmr->remap_count > 0)
+ list_move(&fmr->list, &pool->dirty_list);
+ }
+ spin_unlock_irq(&pool->pool_lock);
+
+ serial = atomic_inc_return(&pool->req_ser);
+ wake_up_process(pool->thread);
+
+ if (wait_event_interruptible(pool->force_wait,
+ atomic_read(&pool->flush_ser) - serial >= 0))
+ return -EINTR;
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_flush_fmr_pool);
+
+/**
+ * ib_fmr_pool_map_phys -
+ * @pool:FMR pool to allocate FMR from
+ * @page_list:List of pages to map
+ * @list_len:Number of pages in @page_list
+ * @io_virtual_address:I/O virtual address for new FMR
+ *
+ * Map an FMR from an FMR pool.
+ */
+struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
+ u64 *page_list,
+ int list_len,
+ u64 io_virtual_address)
+{
+ struct ib_fmr_pool *pool = pool_handle;
+ struct ib_pool_fmr *fmr;
+ unsigned long flags;
+ int result;
+
+ if (list_len < 1 || list_len > pool->max_pages)
+ return ERR_PTR(-EINVAL);
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ fmr = ib_fmr_cache_lookup(pool,
+ page_list,
+ list_len,
+ io_virtual_address);
+ if (fmr) {
+ /* found in cache */
+ ++fmr->ref_count;
+ if (fmr->ref_count == 1) {
+ list_del(&fmr->list);
+ }
+
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return fmr;
+ }
+
+ if (list_empty(&pool->free_list)) {
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+ return ERR_PTR(-EAGAIN);
+ }
+
+ fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
+ list_del(&fmr->list);
+ hlist_del_init(&fmr->cache_node);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
+ io_virtual_address);
+
+ if (result) {
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ list_add(&fmr->list, &pool->free_list);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ printk(KERN_WARNING PFX "fmr_map returns %d\n", result);
+
+ return ERR_PTR(result);
+ }
+
+ ++fmr->remap_count;
+ fmr->ref_count = 1;
+
+ if (pool->cache_bucket) {
+ fmr->io_virtual_address = io_virtual_address;
+ fmr->page_list_len = list_len;
+ memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ hlist_add_head(&fmr->cache_node,
+ pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+ }
+
+ return fmr;
+}
+EXPORT_SYMBOL(ib_fmr_pool_map_phys);
+
+/**
+ * ib_fmr_pool_unmap - Unmap FMR
+ * @fmr:FMR to unmap
+ *
+ * Unmap an FMR. The FMR mapping may remain valid until the FMR is
+ * reused (or until ib_flush_fmr_pool() is called).
+ */
+int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
+{
+ struct ib_fmr_pool *pool;
+ unsigned long flags;
+
+ pool = fmr->pool;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+
+ --fmr->ref_count;
+ if (!fmr->ref_count) {
+ if (fmr->remap_count < pool->max_remaps) {
+ list_add_tail(&fmr->list, &pool->free_list);
+ } else {
+ list_add_tail(&fmr->list, &pool->dirty_list);
+ if (++pool->dirty_len >= pool->dirty_watermark) {
+ atomic_inc(&pool->req_ser);
+ wake_up_process(pool->thread);
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (fmr->ref_count < 0)
+ printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",
+ fmr, fmr->ref_count);
+#endif
+
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_fmr_pool_unmap);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/fmr_pool.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/iwcm.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/iwcm.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/iwcm.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,1029 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/string.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_addr.h>
+
+#include "iwcm.h"
+
+MODULE_AUTHOR("Tom Tucker");
+MODULE_DESCRIPTION("iWARP CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct workqueue_struct *iwcm_wq;
+struct iwcm_work {
+ struct work_struct work;
+ struct iwcm_id_private *cm_id;
+ struct list_head list;
+ struct iw_cm_event event;
+ struct list_head free_list;
+};
+
+/*
+ * The following services provide a mechanism for pre-allocating iwcm_work
+ * elements. The design pre-allocates them based on the cm_id type:
+ * LISTENING IDS: Get enough elements preallocated to handle the
+ * listen backlog.
+ * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
+ * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE
+ *
+ * Allocating them in connect and listen avoids having to deal
+ * with allocation failures on the event upcall from the provider (which
+ * is called in the interrupt context).
+ *
+ * One exception is when creating the cm_id for incoming connection requests.
+ * There are two cases:
+ * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If
+ * the backlog is exceeded, then no more connection request events will
+ * be processed. cm_event_handler() returns -ENOMEM in this case. Its up
+ * to the provider to reject the connection request.
+ * 2) in the connection request workqueue handler, cm_conn_req_handler().
+ * If work elements cannot be allocated for the new connect request cm_id,
+ * then IWCM will call the provider reject method. This is ok since
+ * cm_conn_req_handler() runs in the workqueue thread context.
+ */
+
+static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
+{
+ struct iwcm_work *work;
+
+ if (list_empty(&cm_id_priv->work_free_list))
+ return NULL;
+ work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
+ free_list);
+ list_del_init(&work->free_list);
+ return work;
+}
+
+static void put_work(struct iwcm_work *work)
+{
+ list_add(&work->free_list, &work->cm_id->work_free_list);
+}
+
+static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
+{
+ struct list_head *e, *tmp;
+
+ list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+ kfree(list_entry(e, struct iwcm_work, free_list));
+}
+
+static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
+{
+ struct iwcm_work *work;
+
+ BUG_ON(!list_empty(&cm_id_priv->work_free_list));
+ while (count--) {
+ work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
+ if (!work) {
+ dealloc_work_entries(cm_id_priv);
+ return -ENOMEM;
+ }
+ work->cm_id = cm_id_priv;
+ INIT_LIST_HEAD(&work->list);
+ put_work(work);
+ }
+ return 0;
+}
+
+/*
+ * Save private data from incoming connection requests to
+ * iw_cm_event, so the low level driver doesn't have to. Adjust
+ * the event ptr to point to the local copy.
+ */
+static int copy_private_data(struct iw_cm_event *event)
+{
+ void *p;
+
+ p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC);
+ if (!p)
+ return -ENOMEM;
+ event->private_data = p;
+ return 0;
+}
+
+static void free_cm_id(struct iwcm_id_private *cm_id_priv)
+{
+ dealloc_work_entries(cm_id_priv);
+ kfree(cm_id_priv);
+}
+
+/*
+ * Release a reference on cm_id. If the last reference is being
+ * released, enable the waiting thread (in iw_destroy_cm_id) to
+ * get woken up, and return 1 if a thread is already waiting.
+ */
+static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+ BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+ if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+ BUG_ON(!list_empty(&cm_id_priv->work_list));
+ complete(&cm_id_priv->destroy_comp);
+ return 1;
+ }
+
+ return 0;
+}
+
+static void add_ref(struct iw_cm_id *cm_id)
+{
+ struct iwcm_id_private *cm_id_priv;
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ atomic_inc(&cm_id_priv->refcount);
+}
+
+static void rem_ref(struct iw_cm_id *cm_id)
+{
+ struct iwcm_id_private *cm_id_priv;
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ if (iwcm_deref_id(cm_id_priv) &&
+ test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+ BUG_ON(!list_empty(&cm_id_priv->work_list));
+ free_cm_id(cm_id_priv);
+ }
+}
+
+static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
+
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+ struct socket *so,
+ iw_cm_handler cm_handler,
+ void *context)
+{
+ struct iwcm_id_private *cm_id_priv;
+
+ cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
+ if (!cm_id_priv)
+ return ERR_PTR(-ENOMEM);
+
+ cm_id_priv->state = IW_CM_STATE_IDLE;
+ cm_id_priv->id.device = device;
+ cm_id_priv->id.cm_handler = cm_handler;
+ cm_id_priv->id.context = context;
+ cm_id_priv->id.event_handler = cm_event_handler;
+ cm_id_priv->id.add_ref = add_ref;
+ cm_id_priv->id.rem_ref = rem_ref;
+ cm_id_priv->id.so = so;
+ spin_lock_init(&cm_id_priv->lock);
+ atomic_set(&cm_id_priv->refcount, 1);
+ init_waitqueue_head(&cm_id_priv->connect_wait);
+ init_completion(&cm_id_priv->destroy_comp);
+ INIT_LIST_HEAD(&cm_id_priv->work_list);
+ INIT_LIST_HEAD(&cm_id_priv->work_free_list);
+
+ return &cm_id_priv->id;
+}
+EXPORT_SYMBOL(iw_create_cm_id);
+
+
+static int iwcm_modify_qp_err(struct ib_qp *qp)
+{
+ struct ib_qp_attr qp_attr;
+
+ if (!qp)
+ return -EINVAL;
+
+ qp_attr.qp_state = IB_QPS_ERR;
+ return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * This is really the RDMAC CLOSING state. It is most similar to the
+ * IB SQD QP state.
+ */
+static int iwcm_modify_qp_sqd(struct ib_qp *qp)
+{
+ struct ib_qp_attr qp_attr;
+
+ BUG_ON(qp == NULL);
+ qp_attr.qp_state = IB_QPS_SQD;
+ return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * Block if a passive or active connection is currently being processed. Then
+ * process the event as follows:
+ * - If we are ESTABLISHED, move to CLOSING and modify the QP state
+ * based on the abrupt flag
+ * - If the connection is already in the CLOSING or IDLE state, the peer is
+ * disconnecting concurrently with us and we've already seen the
+ * DISCONNECT event -- ignore the request and return 0
+ * - Disconnect on a listening endpoint returns -EINVAL
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
+{
+ struct iwcm_id_private *cm_id_priv;
+ unsigned long flags;
+ int ret = 0;
+ struct ib_qp *qp = NULL;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ /* Wait if we're currently in a connect or accept downcall */
+ wait_event(cm_id_priv->connect_wait,
+ !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id_priv->state) {
+ case IW_CM_STATE_ESTABLISHED:
+ cm_id_priv->state = IW_CM_STATE_CLOSING;
+
+ /* QP could be <nul> for user-mode client */
+ if (cm_id_priv->qp)
+ qp = cm_id_priv->qp;
+ else
+ ret = -EINVAL;
+ break;
+ case IW_CM_STATE_LISTEN:
+ ret = -EINVAL;
+ break;
+ case IW_CM_STATE_CLOSING:
+ /* remote peer closed first */
+ case IW_CM_STATE_IDLE:
+ /* accept or connect returned !0 */
+ break;
+ case IW_CM_STATE_CONN_RECV:
+ /*
+ * App called disconnect before/without calling accept after
+ * connect_request event delivered.
+ */
+ break;
+ case IW_CM_STATE_CONN_SENT:
+ /* Can only get here if wait above fails */
+ default:
+ BUG();
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ if (qp) {
+ if (abrupt)
+ ret = iwcm_modify_qp_err(qp);
+ else
+ ret = iwcm_modify_qp_sqd(qp);
+
+ /*
+ * If both sides are disconnecting the QP could
+ * already be in ERR or SQD states
+ */
+ ret = 0;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_disconnect);
+
+/*
+ * CM_ID <-- DESTROYING
+ *
+ * Clean up all resources associated with the connection and release
+ * the initial reference taken by iw_create_cm_id.
+ */
+static void destroy_cm_id(struct iw_cm_id *cm_id)
+{
+ struct iwcm_id_private *cm_id_priv;
+ unsigned long flags;
+ int ret;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ /*
+ * Wait if we're currently in a connect or accept downcall. A
+ * listening endpoint should never block here.
+ */
+ wait_event(cm_id_priv->connect_wait,
+ !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id_priv->state) {
+ case IW_CM_STATE_LISTEN:
+ cm_id_priv->state = IW_CM_STATE_DESTROYING;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ /* destroy the listening endpoint */
+ ret = cm_id->device->iwcm->destroy_listen(cm_id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ break;
+ case IW_CM_STATE_ESTABLISHED:
+ cm_id_priv->state = IW_CM_STATE_DESTROYING;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ /* Abrupt close of the connection */
+ (void)iwcm_modify_qp_err(cm_id_priv->qp);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ break;
+ case IW_CM_STATE_IDLE:
+ case IW_CM_STATE_CLOSING:
+ cm_id_priv->state = IW_CM_STATE_DESTROYING;
+ break;
+ case IW_CM_STATE_CONN_RECV:
+ /*
+ * App called destroy before/without calling accept after
+ * receiving connection request event notification or
+ * returned non zero from the event callback function.
+ * In either case, must tell the provider to reject.
+ */
+ cm_id_priv->state = IW_CM_STATE_DESTROYING;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_id->device->iwcm->reject(cm_id, NULL, 0);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ break;
+ case IW_CM_STATE_CONN_SENT:
+ case IW_CM_STATE_DESTROYING:
+ default:
+ BUG();
+ break;
+ }
+ if (cm_id_priv->qp) {
+ cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+ cm_id_priv->qp = NULL;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ (void)iwcm_deref_id(cm_id_priv);
+}
+
+/*
+ * This function is only called by the application thread and cannot
+ * be called by the event thread. The function will wait for all
+ * references to be released on the cm_id and then kfree the cm_id
+ * object.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+ struct iwcm_id_private *cm_id_priv;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
+
+ destroy_cm_id(cm_id);
+
+ wait_for_completion(&cm_id_priv->destroy_comp);
+
+ free_cm_id(cm_id_priv);
+}
+EXPORT_SYMBOL(iw_destroy_cm_id);
+
+/*
+ * CM_ID <-- LISTEN
+ *
+ * Start listening for connect requests. Generates one CONNECT_REQUEST
+ * event for each inbound connect request.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ struct iwcm_id_private *cm_id_priv;
+ unsigned long flags;
+ int ret;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+ ret = alloc_work_entries(cm_id_priv, backlog);
+ if (ret)
+ return ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id_priv->state) {
+ case IW_CM_STATE_IDLE:
+ cm_id_priv->state = IW_CM_STATE_LISTEN;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+ if (ret)
+ cm_id_priv->state = IW_CM_STATE_IDLE;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_listen);
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * Rejects an inbound connection request. No events are generated.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct iwcm_id_private *cm_id_priv;
+ unsigned long flags;
+ int ret;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
+ return -EINVAL;
+ }
+ cm_id_priv->state = IW_CM_STATE_IDLE;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ ret = cm_id->device->iwcm->reject(cm_id, private_data,
+ private_data_len);
+
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_reject);
+
+/*
+ * CM_ID <-- ESTABLISHED
+ *
+ * Accepts an inbound connection request and generates an ESTABLISHED
+ * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
+ * until the ESTABLISHED event is received from the provider.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *iw_param)
+{
+ struct iwcm_id_private *cm_id_priv;
+ struct ib_qp *qp;
+ unsigned long flags;
+ int ret;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
+ return -EINVAL;
+ }
+ /* Get the ib_qp given the QPN */
+ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+ if (!qp) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return -EINVAL;
+ }
+ cm_id->device->iwcm->add_ref(qp);
+ cm_id_priv->qp = qp;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+ if (ret) {
+ /* An error on accept precludes provider events */
+ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+ cm_id_priv->state = IW_CM_STATE_IDLE;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->qp) {
+ cm_id->device->iwcm->rem_ref(qp);
+ cm_id_priv->qp = NULL;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_accept);
+
+/*
+ * Active Side: CM_ID <-- CONN_SENT
+ *
+ * If successful, results in the generation of a CONNECT_REPLY
+ * event. iw_cm_disconnect and iw_cm_destroy will block until the
+ * CONNECT_REPLY event is received from the provider.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+ struct iwcm_id_private *cm_id_priv;
+ int ret;
+ unsigned long flags;
+ struct ib_qp *qp;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+ ret = alloc_work_entries(cm_id_priv, 4);
+ if (ret)
+ return ret;
+
+ set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+ if (cm_id_priv->state != IW_CM_STATE_IDLE) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
+ return -EINVAL;
+ }
+
+ /* Get the ib_qp given the QPN */
+ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+ if (!qp) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return -EINVAL;
+ }
+ cm_id->device->iwcm->add_ref(qp);
+ cm_id_priv->qp = qp;
+ cm_id_priv->state = IW_CM_STATE_CONN_SENT;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+ if (ret) {
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->qp) {
+ cm_id->device->iwcm->rem_ref(qp);
+ cm_id_priv->qp = NULL;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+ cm_id_priv->state = IW_CM_STATE_IDLE;
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_connect);
+
+/*
+ * Passive Side: new CM_ID <-- CONN_RECV
+ *
+ * Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent.
+ *
+ * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and
+ * device. These are copied when the device is cloned. The event
+ * contains the new four tuple.
+ *
+ * An error on the child should not affect the parent, so this
+ * function does not return a value.
+ */
+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
+ struct iw_cm_event *iw_event)
+{
+ unsigned long flags;
+ struct iw_cm_id *cm_id;
+ struct iwcm_id_private *cm_id_priv;
+ int ret;
+
+ /*
+ * The provider should never generate a connection request
+ * event with a bad status.
+ */
+ BUG_ON(iw_event->status);
+
+ /*
+ * We could be destroying the listening id. If so, ignore this
+ * upcall.
+ */
+ spin_lock_irqsave(&listen_id_priv->lock, flags);
+ if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+ spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+ goto out;
+ }
+ spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
+ cm_id = iw_create_cm_id(listen_id_priv->id.device,
+ iw_event->so,
+ listen_id_priv->id.cm_handler,
+ listen_id_priv->id.context);
+ /* If the cm_id could not be created, ignore the request */
+ if (IS_ERR(cm_id))
+ goto out;
+
+ cm_id->provider_data = iw_event->provider_data;
+ cm_id->local_addr = iw_event->local_addr;
+ cm_id->remote_addr = iw_event->remote_addr;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+
+ ret = alloc_work_entries(cm_id_priv, 3);
+ if (ret) {
+ iw_cm_reject(cm_id, NULL, 0);
+ iw_destroy_cm_id(cm_id);
+ goto out;
+ }
+
+ /* Call the client CM handler */
+ ret = cm_id->cm_handler(cm_id, iw_event);
+ if (ret) {
+ iw_cm_reject(cm_id, NULL, 0);
+ set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+ destroy_cm_id(cm_id);
+ if (atomic_read(&cm_id_priv->refcount)==0)
+ free_cm_id(cm_id_priv);
+ }
+
+out:
+ if (iw_event->private_data_len)
+ kfree(iw_event->private_data);
+}
+
+/*
+ * Passive Side: CM_ID <-- ESTABLISHED
+ *
+ * The provider generated an ESTABLISHED event which means that
+ * the MPA negotion has completed successfully and we are now in MPA
+ * FPDU mode.
+ *
+ * This event can only be received in the CONN_RECV state. If the
+ * remote peer closed, the ESTABLISHED event would be received followed
+ * by the CLOSE event. If the app closes, it will block until we wake
+ * it up after processing this event.
+ */
+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
+ struct iw_cm_event *iw_event)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+ /*
+ * We clear the CONNECT_WAIT bit here to allow the callback
+ * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
+ * from a callback handler is not allowed.
+ */
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+ cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+ wake_up_all(&cm_id_priv->connect_wait);
+
+ return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- ESTABLISHED
+ *
+ * The app has called connect and is waiting for the established event to
+ * post it's requests to the server. This event will wake up anyone
+ * blocked in iw_cm_disconnect or iw_destroy_id.
+ */
+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
+ struct iw_cm_event *iw_event)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ /*
+ * Clear the connect wait bit so a callback function calling
+ * iw_cm_disconnect will not wait and deadlock this thread
+ */
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+ if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+ cm_id_priv->id.local_addr = iw_event->local_addr;
+ cm_id_priv->id.remote_addr = iw_event->remote_addr;
+ cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+ } else {
+ /* REJECTED or RESET */
+ cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+ cm_id_priv->qp = NULL;
+ cm_id_priv->state = IW_CM_STATE_IDLE;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+
+ if (iw_event->private_data_len)
+ kfree(iw_event->private_data);
+
+ /* Wake up waiters on connect complete */
+ wake_up_all(&cm_id_priv->connect_wait);
+
+ return ret;
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * If in the ESTABLISHED state, move to CLOSING.
+ */
+static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
+ struct iw_cm_event *iw_event)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
+ cm_id_priv->state = IW_CM_STATE_CLOSING;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * If in the ESTBLISHED or CLOSING states, the QP will have have been
+ * moved by the provider to the ERR state. Disassociate the CM_ID from
+ * the QP, move to IDLE, and remove the 'connected' reference.
+ *
+ * If in some other state, the cm_id was destroyed asynchronously.
+ * This is the last reference that will result in waking up
+ * the app thread blocked in iw_destroy_cm_id.
+ */
+static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
+ struct iw_cm_event *iw_event)
+{
+ unsigned long flags;
+ int ret = 0;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+ if (cm_id_priv->qp) {
+ cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+ cm_id_priv->qp = NULL;
+ }
+ switch (cm_id_priv->state) {
+ case IW_CM_STATE_ESTABLISHED:
+ case IW_CM_STATE_CLOSING:
+ cm_id_priv->state = IW_CM_STATE_IDLE;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ break;
+ case IW_CM_STATE_DESTROYING:
+ break;
+ default:
+ BUG();
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ return ret;
+}
+
+static int process_event(struct iwcm_id_private *cm_id_priv,
+ struct iw_cm_event *iw_event)
+{
+ int ret = 0;
+
+ switch (iw_event->event) {
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ cm_conn_req_handler(cm_id_priv, iw_event);
+ break;
+ case IW_CM_EVENT_CONNECT_REPLY:
+ ret = cm_conn_rep_handler(cm_id_priv, iw_event);
+ break;
+ case IW_CM_EVENT_ESTABLISHED:
+ ret = cm_conn_est_handler(cm_id_priv, iw_event);
+ break;
+ case IW_CM_EVENT_DISCONNECT:
+ cm_disconnect_handler(cm_id_priv, iw_event);
+ break;
+ case IW_CM_EVENT_CLOSE:
+ ret = cm_close_handler(cm_id_priv, iw_event);
+ break;
+ default:
+ BUG();
+ }
+
+ return ret;
+}
+
+/*
+ * Process events on the work_list for the cm_id. If the callback
+ * function requests that the cm_id be deleted, a flag is set in the
+ * cm_id flags to indicate that when the last reference is
+ * removed, the cm_id is to be destroyed. This is necessary to
+ * distinguish between an object that will be destroyed by the app
+ * thread asleep on the destroy_comp list vs. an object destroyed
+ * here synchronously when the last reference is removed.
+ */
+static void cm_work_handler(struct work_struct *_work)
+{
+ struct iwcm_work *work = container_of(_work, struct iwcm_work, work);
+ struct iw_cm_event levent;
+ struct iwcm_id_private *cm_id_priv = work->cm_id;
+ unsigned long flags;
+ int empty;
+ int ret = 0;
+ int destroy_id;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ empty = list_empty(&cm_id_priv->work_list);
+ while (!empty) {
+ work = list_entry(cm_id_priv->work_list.next,
+ struct iwcm_work, list);
+ list_del_init(&work->list);
+ empty = list_empty(&cm_id_priv->work_list);
+ levent = work->event;
+ put_work(work);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ ret = process_event(cm_id_priv, &levent);
+ if (ret) {
+ set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+ destroy_cm_id(&cm_id_priv->id);
+ }
+ BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+ destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+ if (iwcm_deref_id(cm_id_priv)) {
+ if (destroy_id) {
+ BUG_ON(!list_empty(&cm_id_priv->work_list));
+ free_cm_id(cm_id_priv);
+ }
+ return;
+ }
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * This function is called on interrupt context. Schedule events on
+ * the iwcm_wq thread to allow callback functions to downcall into
+ * the CM and/or block. Events are queued to a per-CM_ID
+ * work_list. If this is the first event on the work_list, the work
+ * element is also queued on the iwcm_wq thread.
+ *
+ * Each event holds a reference on the cm_id. Until the last posted
+ * event has been delivered and processed, the cm_id cannot be
+ * deleted.
+ *
+ * Returns:
+ * 0 - the event was handled.
+ * -ENOMEM - the event was not handled due to lack of resources.
+ */
+static int cm_event_handler(struct iw_cm_id *cm_id,
+ struct iw_cm_event *iw_event)
+{
+ struct iwcm_work *work;
+ struct iwcm_id_private *cm_id_priv;
+ unsigned long flags;
+ int ret = 0;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ work = get_work(cm_id_priv);
+ if (!work) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ INIT_WORK(&work->work, cm_work_handler);
+ work->cm_id = cm_id_priv;
+ work->event = *iw_event;
+
+ if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
+ work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
+ work->event.private_data_len) {
+ ret = copy_private_data(&work->event);
+ if (ret) {
+ put_work(work);
+ goto out;
+ }
+ }
+
+ atomic_inc(&cm_id_priv->refcount);
+ if (list_empty(&cm_id_priv->work_list)) {
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ queue_work(iwcm_wq, &work->work);
+ } else
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+
+static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
+ struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id_priv->state) {
+ case IW_CM_STATE_IDLE:
+ case IW_CM_STATE_CONN_SENT:
+ case IW_CM_STATE_CONN_RECV:
+ case IW_CM_STATE_ESTABLISHED:
+ *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+ qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
+ IB_ACCESS_REMOTE_READ;
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+
+static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
+ struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id_priv->state) {
+ case IW_CM_STATE_IDLE:
+ case IW_CM_STATE_CONN_SENT:
+ case IW_CM_STATE_CONN_RECV:
+ case IW_CM_STATE_ESTABLISHED:
+ *qp_attr_mask = 0;
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
+}
+
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
+ struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask)
+{
+ struct iwcm_id_private *cm_id_priv;
+ int ret;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ switch (qp_attr->qp_state) {
+ case IB_QPS_INIT:
+ case IB_QPS_RTR:
+ ret = iwcm_init_qp_init_attr(cm_id_priv,
+ qp_attr, qp_attr_mask);
+ break;
+ case IB_QPS_RTS:
+ ret = iwcm_init_qp_rts_attr(cm_id_priv,
+ qp_attr, qp_attr_mask);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_init_qp_attr);
+
+static int __init iw_cm_init(void)
+{
+ iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
+ if (!iwcm_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void __exit iw_cm_cleanup(void)
+{
+ destroy_workqueue(iwcm_wq);
+}
+
+module_init(iw_cm_init);
+module_exit(iw_cm_cleanup);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/iwcm.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/iwcm.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/iwcm.h (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/iwcm.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IWCM_H
+#define IWCM_H
+
+enum iw_cm_state {
+ IW_CM_STATE_IDLE, /* unbound, inactive */
+ IW_CM_STATE_LISTEN, /* listen waiting for connect */
+ IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */
+ IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */
+ IW_CM_STATE_ESTABLISHED, /* established */
+ IW_CM_STATE_CLOSING, /* disconnect */
+ IW_CM_STATE_DESTROYING /* object being deleted */
+};
+
+struct iwcm_id_private {
+ struct iw_cm_id id;
+ enum iw_cm_state state;
+ unsigned long flags;
+ struct ib_qp *qp;
+ struct completion destroy_comp;
+ wait_queue_head_t connect_wait;
+ struct list_head work_list;
+ spinlock_t lock;
+ atomic_t refcount;
+ struct list_head work_free_list;
+};
+
+#define IWCM_F_CALLBACK_DESTROY 1
+#define IWCM_F_CONNECT_WAIT 2
+
+#endif /* IWCM_H */
Property changes on: trunk/sys/ofed/drivers/infiniband/core/iwcm.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/local_sa.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/local_sa.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/local_sa.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,1273 @@
+/*
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/miscdevice.h>
+#include <linux/random.h>
+
+#include <rdma/ib_cache.h>
+#include <rdma/ib_sa.h>
+#include "sa.h"
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("InfiniBand subnet administration caching");
+MODULE_LICENSE("Dual BSD/GPL");
+
+enum {
+ SA_DB_MAX_PATHS_PER_DEST = 0x7F,
+ SA_DB_MIN_RETRY_TIMER = 4000, /* 4 sec */
+ SA_DB_MAX_RETRY_TIMER = 256000 /* 256 sec */
+};
+
+static int set_paths_per_dest(const char *val, struct kernel_param *kp);
+static unsigned long paths_per_dest = 0;
+module_param_call(paths_per_dest, set_paths_per_dest, param_get_ulong,
+ &paths_per_dest, 0644);
+MODULE_PARM_DESC(paths_per_dest, "Maximum number of paths to retrieve "
+ "to each destination (DGID). Set to 0 "
+ "to disable cache.");
+
+static int set_subscribe_inform_info(const char *val, struct kernel_param *kp);
+static char subscribe_inform_info = 1;
+module_param_call(subscribe_inform_info, set_subscribe_inform_info,
+ param_get_bool, &subscribe_inform_info, 0644);
+MODULE_PARM_DESC(subscribe_inform_info,
+ "Subscribe for SA InformInfo/Notice events.");
+
+static int do_refresh(const char *val, struct kernel_param *kp);
+module_param_call(refresh, do_refresh, NULL, NULL, 0200);
+
+static unsigned long retry_timer = SA_DB_MIN_RETRY_TIMER;
+
+enum sa_db_lookup_method {
+ SA_DB_LOOKUP_LEAST_USED,
+ SA_DB_LOOKUP_RANDOM
+};
+
+static int set_lookup_method(const char *val, struct kernel_param *kp);
+static int get_lookup_method(char *buf, struct kernel_param *kp);
+static unsigned long lookup_method;
+module_param_call(lookup_method, set_lookup_method, get_lookup_method,
+ &lookup_method, 0644);
+MODULE_PARM_DESC(lookup_method, "Method used to return path records when "
+ "multiple paths exist to a given destination.");
+
+static void sa_db_add_dev(struct ib_device *device);
+static void sa_db_remove_dev(struct ib_device *device);
+
+static struct ib_client sa_db_client = {
+ .name = "local_sa",
+ .add = sa_db_add_dev,
+ .remove = sa_db_remove_dev
+};
+
+static LIST_HEAD(dev_list);
+static DEFINE_MUTEX(lock);
+static rwlock_t rwlock;
+static struct workqueue_struct *sa_wq;
+static struct ib_sa_client sa_client;
+
+enum sa_db_state {
+ SA_DB_IDLE,
+ SA_DB_REFRESH,
+ SA_DB_DESTROY
+};
+
+struct sa_db_port {
+ struct sa_db_device *dev;
+ struct ib_mad_agent *agent;
+ /* Limit number of outstanding MADs to SA to reduce SA flooding */
+ struct ib_mad_send_buf *msg;
+ u16 sm_lid;
+ u8 sm_sl;
+ struct ib_inform_info *in_info;
+ struct ib_inform_info *out_info;
+ struct rb_root paths;
+ struct list_head update_list;
+ unsigned long update_id;
+ enum sa_db_state state;
+ struct work_struct work;
+ union ib_gid gid;
+ int port_num;
+};
+
+struct sa_db_device {
+ struct list_head list;
+ struct ib_device *device;
+ struct ib_event_handler event_handler;
+ int start_port;
+ int port_count;
+ struct sa_db_port port[0];
+};
+
+struct ib_sa_iterator {
+ struct ib_sa_iterator *next;
+};
+
+struct ib_sa_attr_iter {
+ struct ib_sa_iterator *iter;
+ unsigned long flags;
+};
+
+struct ib_sa_attr_list {
+ struct ib_sa_iterator iter;
+ struct ib_sa_iterator *tail;
+ int update_id;
+ union ib_gid gid;
+ struct rb_node node;
+};
+
+struct ib_path_rec_info {
+ struct ib_sa_iterator iter; /* keep first */
+ struct ib_sa_path_rec rec;
+ unsigned long lookups;
+};
+
+struct ib_sa_mad_iter {
+ struct ib_mad_recv_wc *recv_wc;
+ struct ib_mad_recv_buf *recv_buf;
+ int attr_size;
+ int attr_offset;
+ int data_offset;
+ int data_left;
+ void *attr;
+ u8 attr_data[0];
+};
+
+enum sa_update_type {
+ SA_UPDATE_FULL,
+ SA_UPDATE_ADD,
+ SA_UPDATE_REMOVE
+};
+
+struct update_info {
+ struct list_head list;
+ union ib_gid gid;
+ enum sa_update_type type;
+};
+
+struct sa_path_request {
+ struct work_struct work;
+ struct ib_sa_client *client;
+ void (*callback)(int, struct ib_sa_path_rec *, void *);
+ void *context;
+ struct ib_sa_path_rec path_rec;
+};
+
+static void process_updates(struct sa_db_port *port);
+
+static void free_attr_list(struct ib_sa_attr_list *attr_list)
+{
+ struct ib_sa_iterator *cur;
+
+ for (cur = attr_list->iter.next; cur; cur = attr_list->iter.next) {
+ attr_list->iter.next = cur->next;
+ kfree(cur);
+ }
+ attr_list->tail = &attr_list->iter;
+}
+
+static void remove_attr(struct rb_root *root, struct ib_sa_attr_list *attr_list)
+{
+ rb_erase(&attr_list->node, root);
+ free_attr_list(attr_list);
+ kfree(attr_list);
+}
+
+static void remove_all_attrs(struct rb_root *root)
+{
+ struct rb_node *node, *next_node;
+ struct ib_sa_attr_list *attr_list;
+
+ write_lock_irq(&rwlock);
+ for (node = rb_first(root); node; node = next_node) {
+ next_node = rb_next(node);
+ attr_list = rb_entry(node, struct ib_sa_attr_list, node);
+ remove_attr(root, attr_list);
+ }
+ write_unlock_irq(&rwlock);
+}
+
+static void remove_old_attrs(struct rb_root *root, unsigned long update_id)
+{
+ struct rb_node *node, *next_node;
+ struct ib_sa_attr_list *attr_list;
+
+ write_lock_irq(&rwlock);
+ for (node = rb_first(root); node; node = next_node) {
+ next_node = rb_next(node);
+ attr_list = rb_entry(node, struct ib_sa_attr_list, node);
+ if (attr_list->update_id != update_id)
+ remove_attr(root, attr_list);
+ }
+ write_unlock_irq(&rwlock);
+}
+
+static struct ib_sa_attr_list *insert_attr_list(struct rb_root *root,
+ struct ib_sa_attr_list *attr_list)
+{
+ struct rb_node **link = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct ib_sa_attr_list *cur_attr_list;
+ int cmp;
+
+ while (*link) {
+ parent = *link;
+ cur_attr_list = rb_entry(parent, struct ib_sa_attr_list, node);
+ cmp = memcmp(&cur_attr_list->gid, &attr_list->gid,
+ sizeof attr_list->gid);
+ if (cmp < 0)
+ link = &(*link)->rb_left;
+ else if (cmp > 0)
+ link = &(*link)->rb_right;
+ else
+ return cur_attr_list;
+ }
+ rb_link_node(&attr_list->node, parent, link);
+ rb_insert_color(&attr_list->node, root);
+ return NULL;
+}
+
+static struct ib_sa_attr_list *find_attr_list(struct rb_root *root, u8 *gid)
+{
+ struct rb_node *node = root->rb_node;
+ struct ib_sa_attr_list *attr_list;
+ int cmp;
+
+ while (node) {
+ attr_list = rb_entry(node, struct ib_sa_attr_list, node);
+ cmp = memcmp(&attr_list->gid, gid, sizeof attr_list->gid);
+ if (cmp < 0)
+ node = node->rb_left;
+ else if (cmp > 0)
+ node = node->rb_right;
+ else
+ return attr_list;
+ }
+ return NULL;
+}
+
+static int insert_attr(struct rb_root *root, unsigned long update_id, void *key,
+ struct ib_sa_iterator *iter)
+{
+ struct ib_sa_attr_list *attr_list;
+ void *err;
+
+ write_lock_irq(&rwlock);
+ attr_list = find_attr_list(root, key);
+ if (!attr_list) {
+ write_unlock_irq(&rwlock);
+ attr_list = kmalloc(sizeof *attr_list, GFP_KERNEL);
+ if (!attr_list)
+ return -ENOMEM;
+
+ attr_list->iter.next = NULL;
+ attr_list->tail = &attr_list->iter;
+ attr_list->update_id = update_id;
+ memcpy(attr_list->gid.raw, key, sizeof attr_list->gid);
+
+ write_lock_irq(&rwlock);
+ err = insert_attr_list(root, attr_list);
+ if (err) {
+ write_unlock_irq(&rwlock);
+ kfree(attr_list);
+ return PTR_ERR(err);
+ }
+ } else if (attr_list->update_id != update_id) {
+ free_attr_list(attr_list);
+ attr_list->update_id = update_id;
+ }
+
+ attr_list->tail->next = iter;
+ iter->next = NULL;
+ attr_list->tail = iter;
+ write_unlock_irq(&rwlock);
+ return 0;
+}
+
+static struct ib_sa_mad_iter *ib_sa_iter_create(struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_sa_mad_iter *iter;
+ struct ib_sa_mad *mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad;
+ int attr_size, attr_offset;
+
+ attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8;
+ attr_size = 64; /* path record length */
+ if (attr_offset < attr_size)
+ return ERR_PTR(-EINVAL);
+
+ iter = kzalloc(sizeof *iter + attr_size, GFP_KERNEL);
+ if (!iter)
+ return ERR_PTR(-ENOMEM);
+
+ iter->data_left = mad_recv_wc->mad_len - IB_MGMT_SA_HDR;
+ iter->recv_wc = mad_recv_wc;
+ iter->recv_buf = &mad_recv_wc->recv_buf;
+ iter->attr_offset = attr_offset;
+ iter->attr_size = attr_size;
+ return iter;
+}
+
+static void ib_sa_iter_free(struct ib_sa_mad_iter *iter)
+{
+ kfree(iter);
+}
+
+static void *ib_sa_iter_next(struct ib_sa_mad_iter *iter)
+{
+ struct ib_sa_mad *mad;
+ int left, offset = 0;
+
+ while (iter->data_left >= iter->attr_offset) {
+ while (iter->data_offset < IB_MGMT_SA_DATA) {
+ mad = (struct ib_sa_mad *) iter->recv_buf->mad;
+
+ left = IB_MGMT_SA_DATA - iter->data_offset;
+ if (left < iter->attr_size) {
+ /* copy first piece of the attribute */
+ iter->attr = &iter->attr_data;
+ memcpy(iter->attr,
+ &mad->data[iter->data_offset], left);
+ offset = left;
+ break;
+ } else if (offset) {
+ /* copy the second piece of the attribute */
+ memcpy(iter->attr + offset, &mad->data[0],
+ iter->attr_size - offset);
+ iter->data_offset = iter->attr_size - offset;
+ offset = 0;
+ } else {
+ iter->attr = &mad->data[iter->data_offset];
+ iter->data_offset += iter->attr_size;
+ }
+
+ iter->data_left -= iter->attr_offset;
+ goto out;
+ }
+ iter->data_offset = 0;
+ iter->recv_buf = list_entry(iter->recv_buf->list.next,
+ struct ib_mad_recv_buf, list);
+ }
+ iter->attr = NULL;
+out:
+ return iter->attr;
+}
+
+/*
+ * Copy path records from a received response and insert them into our cache.
+ * A path record in the MADs are in network order, packed, and may
+ * span multiple MAD buffers, just to make our life hard.
+ */
+static void update_path_db(struct sa_db_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ enum sa_update_type type)
+{
+ struct ib_sa_mad_iter *iter;
+ struct ib_path_rec_info *path_info;
+ void *attr;
+ int ret;
+
+ iter = ib_sa_iter_create(mad_recv_wc);
+ if (IS_ERR(iter))
+ return;
+
+ port->update_id += (type == SA_UPDATE_FULL);
+
+ while ((attr = ib_sa_iter_next(iter)) &&
+ (path_info = kmalloc(sizeof *path_info, GFP_KERNEL))) {
+
+ ib_sa_unpack_attr(&path_info->rec, attr, IB_SA_ATTR_PATH_REC);
+
+ ret = insert_attr(&port->paths, port->update_id,
+ path_info->rec.dgid.raw, &path_info->iter);
+ if (ret) {
+ kfree(path_info);
+ break;
+ }
+ }
+ ib_sa_iter_free(iter);
+
+ if (type == SA_UPDATE_FULL)
+ remove_old_attrs(&port->paths, port->update_id);
+}
+
+static struct ib_mad_send_buf *get_sa_msg(struct sa_db_port *port,
+ struct update_info *update)
+{
+ struct ib_ah_attr ah_attr;
+ struct ib_mad_send_buf *msg;
+
+ msg = ib_create_send_mad(port->agent, 1, 0, 0, IB_MGMT_SA_HDR,
+ IB_MGMT_SA_DATA, GFP_KERNEL);
+ if (IS_ERR(msg))
+ return NULL;
+
+ memset(&ah_attr, 0, sizeof ah_attr);
+ ah_attr.dlid = port->sm_lid;
+ ah_attr.sl = port->sm_sl;
+ ah_attr.port_num = port->port_num;
+
+ msg->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
+ if (IS_ERR(msg->ah)) {
+ ib_free_send_mad(msg);
+ return NULL;
+ }
+
+ msg->timeout_ms = retry_timer;
+ msg->retries = 0;
+ msg->context[0] = port;
+ msg->context[1] = update;
+ return msg;
+}
+
+static __be64 form_tid(u32 hi_tid)
+{
+ static atomic_t tid;
+ return cpu_to_be64((((u64) hi_tid) << 32) |
+ ((u32) atomic_inc_return(&tid)));
+}
+
+static void format_path_req(struct sa_db_port *port,
+ struct update_info *update,
+ struct ib_mad_send_buf *msg)
+{
+ struct ib_sa_mad *mad = msg->mad;
+ struct ib_sa_path_rec path_rec;
+
+ mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ mad->mad_hdr.method = IB_SA_METHOD_GET_TABLE;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
+ mad->mad_hdr.tid = form_tid(msg->mad_agent->hi_tid);
+
+ mad->sa_hdr.comp_mask = IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH;
+
+ path_rec.sgid = port->gid;
+ path_rec.numb_path = (u8) paths_per_dest;
+
+ if (update->type == SA_UPDATE_ADD) {
+ mad->sa_hdr.comp_mask |= IB_SA_PATH_REC_DGID;
+ memcpy(&path_rec.dgid, &update->gid, sizeof path_rec.dgid);
+ }
+
+ ib_sa_pack_attr(mad->data, &path_rec, IB_SA_ATTR_PATH_REC);
+}
+
+static int send_query(struct sa_db_port *port,
+ struct update_info *update)
+{
+ int ret;
+
+ port->msg = get_sa_msg(port, update);
+ if (!port->msg)
+ return -ENOMEM;
+
+ format_path_req(port, update, port->msg);
+
+ ret = ib_post_send_mad(port->msg, NULL);
+ if (ret)
+ goto err;
+
+ return 0;
+
+err:
+ ib_destroy_ah(port->msg->ah);
+ ib_free_send_mad(port->msg);
+ return ret;
+}
+
+static void add_update(struct sa_db_port *port, u8 *gid,
+ enum sa_update_type type)
+{
+ struct update_info *update;
+
+ update = kmalloc(sizeof *update, GFP_KERNEL);
+ if (update) {
+ if (gid)
+ memcpy(&update->gid, gid, sizeof update->gid);
+ update->type = type;
+ list_add(&update->list, &port->update_list);
+ }
+
+ if (port->state == SA_DB_IDLE) {
+ port->state = SA_DB_REFRESH;
+ process_updates(port);
+ }
+}
+
+static void clean_update_list(struct sa_db_port *port)
+{
+ struct update_info *update;
+
+ while (!list_empty(&port->update_list)) {
+ update = list_entry(port->update_list.next,
+ struct update_info, list);
+ list_del(&update->list);
+ kfree(update);
+ }
+}
+
+static int notice_handler(int status, struct ib_inform_info *info,
+ struct ib_sa_notice *notice)
+{
+ struct sa_db_port *port = info->context;
+ struct ib_sa_notice_data_gid *gid_data;
+ struct ib_inform_info **pinfo;
+ enum sa_update_type type;
+
+ if (info->trap_number == IB_SA_SM_TRAP_GID_IN_SERVICE) {
+ pinfo = &port->in_info;
+ type = SA_UPDATE_ADD;
+ } else {
+ pinfo = &port->out_info;
+ type = SA_UPDATE_REMOVE;
+ }
+
+ mutex_lock(&lock);
+ if (port->state == SA_DB_DESTROY || !*pinfo) {
+ mutex_unlock(&lock);
+ return 0;
+ }
+
+ if (notice) {
+ gid_data = (struct ib_sa_notice_data_gid *)
+ ¬ice->data_details;
+ add_update(port, gid_data->gid, type);
+ mutex_unlock(&lock);
+ } else if (status == -ENETRESET) {
+ *pinfo = NULL;
+ mutex_unlock(&lock);
+ } else {
+ if (status)
+ *pinfo = ERR_PTR(-EINVAL);
+ port->state = SA_DB_IDLE;
+ clean_update_list(port);
+ mutex_unlock(&lock);
+ queue_work(sa_wq, &port->work);
+ }
+
+ return status;
+}
+
+static int reg_in_info(struct sa_db_port *port)
+{
+ int ret = 0;
+
+ port->in_info = ib_sa_register_inform_info(&sa_client,
+ port->dev->device,
+ port->port_num,
+ IB_SA_SM_TRAP_GID_IN_SERVICE,
+ GFP_KERNEL, notice_handler,
+ port);
+ if (IS_ERR(port->in_info))
+ ret = PTR_ERR(port->in_info);
+
+ return ret;
+}
+
+static int reg_out_info(struct sa_db_port *port)
+{
+ int ret = 0;
+
+ port->out_info = ib_sa_register_inform_info(&sa_client,
+ port->dev->device,
+ port->port_num,
+ IB_SA_SM_TRAP_GID_OUT_OF_SERVICE,
+ GFP_KERNEL, notice_handler,
+ port);
+ if (IS_ERR(port->out_info))
+ ret = PTR_ERR(port->out_info);
+
+ return ret;
+}
+
+static void unsubscribe_port(struct sa_db_port *port)
+{
+ if (port->in_info && !IS_ERR(port->in_info))
+ ib_sa_unregister_inform_info(port->in_info);
+
+ if (port->out_info && !IS_ERR(port->out_info))
+ ib_sa_unregister_inform_info(port->out_info);
+
+ port->out_info = NULL;
+ port->in_info = NULL;
+
+}
+
+static void cleanup_port(struct sa_db_port *port)
+{
+ unsubscribe_port(port);
+
+ clean_update_list(port);
+ remove_all_attrs(&port->paths);
+}
+
+static int update_port_info(struct sa_db_port *port)
+{
+ struct ib_port_attr port_attr;
+ int ret;
+
+ ret = ib_query_port(port->dev->device, port->port_num, &port_attr);
+ if (ret)
+ return ret;
+
+ if (port_attr.state != IB_PORT_ACTIVE)
+ return -ENODATA;
+
+ port->sm_lid = port_attr.sm_lid;
+ port->sm_sl = port_attr.sm_sl;
+ return 0;
+}
+
+static void process_updates(struct sa_db_port *port)
+{
+ struct update_info *update;
+ struct ib_sa_attr_list *attr_list;
+ int ret;
+
+ if (!paths_per_dest || update_port_info(port)) {
+ cleanup_port(port);
+ goto out;
+ }
+
+ /* Event registration is an optimization, so ignore failures. */
+ if (subscribe_inform_info) {
+ if (!port->out_info) {
+ ret = reg_out_info(port);
+ if (!ret)
+ return;
+ }
+
+ if (!port->in_info) {
+ ret = reg_in_info(port);
+ if (!ret)
+ return;
+ }
+ } else
+ unsubscribe_port(port);
+
+ while (!list_empty(&port->update_list)) {
+ update = list_entry(port->update_list.next,
+ struct update_info, list);
+
+ if (update->type == SA_UPDATE_REMOVE) {
+ write_lock_irq(&rwlock);
+ attr_list = find_attr_list(&port->paths,
+ update->gid.raw);
+ if (attr_list)
+ remove_attr(&port->paths, attr_list);
+ write_unlock_irq(&rwlock);
+ } else {
+ ret = send_query(port, update);
+ if (!ret)
+ return;
+
+ }
+ list_del(&update->list);
+ kfree(update);
+ }
+out:
+ port->state = SA_DB_IDLE;
+}
+
+static void refresh_port_db(struct sa_db_port *port)
+{
+ if (port->state == SA_DB_DESTROY)
+ return;
+
+ if (port->state == SA_DB_REFRESH) {
+ clean_update_list(port);
+ ib_cancel_mad(port->agent, port->msg);
+ }
+
+ add_update(port, NULL, SA_UPDATE_FULL);
+}
+
+static void refresh_dev_db(struct sa_db_device *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->port_count; i++)
+ refresh_port_db(&dev->port[i]);
+}
+
+static void refresh_db(void)
+{
+ struct sa_db_device *dev;
+
+ list_for_each_entry(dev, &dev_list, list)
+ refresh_dev_db(dev);
+}
+
+static int do_refresh(const char *val, struct kernel_param *kp)
+{
+ mutex_lock(&lock);
+ refresh_db();
+ mutex_unlock(&lock);
+ return 0;
+}
+
+static int get_lookup_method(char *buf, struct kernel_param *kp)
+{
+ return sprintf(buf,
+ "%c %d round robin\n"
+ "%c %d random",
+ (lookup_method == SA_DB_LOOKUP_LEAST_USED) ? '*' : ' ',
+ SA_DB_LOOKUP_LEAST_USED,
+ (lookup_method == SA_DB_LOOKUP_RANDOM) ? '*' : ' ',
+ SA_DB_LOOKUP_RANDOM);
+}
+
+static int set_lookup_method(const char *val, struct kernel_param *kp)
+{
+ unsigned long method;
+ int ret = 0;
+
+ method = simple_strtoul(val, NULL, 0);
+
+ switch (method) {
+ case SA_DB_LOOKUP_LEAST_USED:
+ case SA_DB_LOOKUP_RANDOM:
+ lookup_method = method;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int set_paths_per_dest(const char *val, struct kernel_param *kp)
+{
+ int ret;
+
+ mutex_lock(&lock);
+ ret = param_set_ulong(val, kp);
+ if (ret)
+ goto out;
+
+ if (paths_per_dest > SA_DB_MAX_PATHS_PER_DEST)
+ paths_per_dest = SA_DB_MAX_PATHS_PER_DEST;
+ refresh_db();
+out:
+ mutex_unlock(&lock);
+ return ret;
+}
+
+static int set_subscribe_inform_info(const char *val, struct kernel_param *kp)
+{
+ int ret;
+
+ ret = param_set_bool(val, kp);
+ if (ret)
+ return ret;
+
+ return do_refresh(val, kp);
+}
+
+static void port_work_handler(struct work_struct *work)
+{
+ struct sa_db_port *port;
+
+ port = container_of(work, typeof(*port), work);
+ mutex_lock(&lock);
+ refresh_port_db(port);
+ mutex_unlock(&lock);
+}
+
+static void handle_event(struct ib_event_handler *event_handler,
+ struct ib_event *event)
+{
+ struct sa_db_device *dev;
+ struct sa_db_port *port;
+
+ dev = container_of(event_handler, typeof(*dev), event_handler);
+ port = &dev->port[event->element.port_num - dev->start_port];
+
+ switch (event->event) {
+ case IB_EVENT_PORT_ERR:
+ case IB_EVENT_LID_CHANGE:
+ case IB_EVENT_SM_CHANGE:
+ case IB_EVENT_CLIENT_REREGISTER:
+ case IB_EVENT_PKEY_CHANGE:
+ case IB_EVENT_PORT_ACTIVE:
+ queue_work(sa_wq, &port->work);
+ break;
+ default:
+ break;
+ }
+}
+
+static void ib_free_path_iter(struct ib_sa_attr_iter *iter)
+{
+ read_unlock_irqrestore(&rwlock, iter->flags);
+}
+
+static int ib_create_path_iter(struct ib_device *device, u8 port_num,
+ union ib_gid *dgid, struct ib_sa_attr_iter *iter)
+{
+ struct sa_db_device *dev;
+ struct sa_db_port *port;
+ struct ib_sa_attr_list *list;
+
+ dev = ib_get_client_data(device, &sa_db_client);
+ if (!dev)
+ return -ENODEV;
+
+ port = &dev->port[port_num - dev->start_port];
+
+ read_lock_irqsave(&rwlock, iter->flags);
+ list = find_attr_list(&port->paths, dgid->raw);
+ if (!list) {
+ ib_free_path_iter(iter);
+ return -ENODATA;
+ }
+
+ iter->iter = &list->iter;
+ return 0;
+}
+
+static struct ib_sa_path_rec *ib_get_next_path(struct ib_sa_attr_iter *iter)
+{
+ struct ib_path_rec_info *next_path;
+
+ iter->iter = iter->iter->next;
+ if (iter->iter) {
+ next_path = container_of(iter->iter, struct ib_path_rec_info, iter);
+ return &next_path->rec;
+ } else
+ return NULL;
+}
+
+static int cmp_rec(struct ib_sa_path_rec *src,
+ struct ib_sa_path_rec *dst, ib_sa_comp_mask comp_mask)
+{
+ /* DGID check already done */
+ if (comp_mask & IB_SA_PATH_REC_SGID &&
+ memcmp(&src->sgid, &dst->sgid, sizeof src->sgid))
+ return -EINVAL;
+ if (comp_mask & IB_SA_PATH_REC_DLID && src->dlid != dst->dlid)
+ return -EINVAL;
+ if (comp_mask & IB_SA_PATH_REC_SLID && src->slid != dst->slid)
+ return -EINVAL;
+ if (comp_mask & IB_SA_PATH_REC_RAW_TRAFFIC &&
+ src->raw_traffic != dst->raw_traffic)
+ return -EINVAL;
+
+ if (comp_mask & IB_SA_PATH_REC_FLOW_LABEL &&
+ src->flow_label != dst->flow_label)
+ return -EINVAL;
+ if (comp_mask & IB_SA_PATH_REC_HOP_LIMIT &&
+ src->hop_limit != dst->hop_limit)
+ return -EINVAL;
+ if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS &&
+ src->traffic_class != dst->traffic_class)
+ return -EINVAL;
+ if (comp_mask & IB_SA_PATH_REC_REVERSIBLE &&
+ dst->reversible && !src->reversible)
+ return -EINVAL;
+ /* Numb path check already done */
+ if (comp_mask & IB_SA_PATH_REC_PKEY && src->pkey != dst->pkey)
+ return -EINVAL;
+
+ if (comp_mask & IB_SA_PATH_REC_SL && src->sl != dst->sl)
+ return -EINVAL;
+
+ if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_MTU_SELECTOR,
+ IB_SA_PATH_REC_MTU, dst->mtu_selector,
+ src->mtu, dst->mtu))
+ return -EINVAL;
+ if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_RATE_SELECTOR,
+ IB_SA_PATH_REC_RATE, dst->rate_selector,
+ src->rate, dst->rate))
+ return -EINVAL;
+ if (ib_sa_check_selector(comp_mask,
+ IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR,
+ IB_SA_PATH_REC_PACKET_LIFE_TIME,
+ dst->packet_life_time_selector,
+ src->packet_life_time, dst->packet_life_time))
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct ib_sa_path_rec *get_random_path(struct ib_sa_attr_iter *iter,
+ struct ib_sa_path_rec *req_path,
+ ib_sa_comp_mask comp_mask)
+{
+ struct ib_sa_path_rec *path, *rand_path = NULL;
+ int num, count = 0;
+
+ for (path = ib_get_next_path(iter); path;
+ path = ib_get_next_path(iter)) {
+ if (!cmp_rec(path, req_path, comp_mask)) {
+ get_random_bytes(&num, sizeof num);
+ if ((num % ++count) == 0)
+ rand_path = path;
+ }
+ }
+
+ return rand_path;
+}
+
+static struct ib_sa_path_rec *get_next_path(struct ib_sa_attr_iter *iter,
+ struct ib_sa_path_rec *req_path,
+ ib_sa_comp_mask comp_mask)
+{
+ struct ib_path_rec_info *cur_path, *next_path = NULL;
+ struct ib_sa_path_rec *path;
+ unsigned long lookups = ~0;
+
+ for (path = ib_get_next_path(iter); path;
+ path = ib_get_next_path(iter)) {
+ if (!cmp_rec(path, req_path, comp_mask)) {
+
+ cur_path = container_of(iter->iter, struct ib_path_rec_info,
+ iter);
+ if (cur_path->lookups < lookups) {
+ lookups = cur_path->lookups;
+ next_path = cur_path;
+ }
+ }
+ }
+
+ if (next_path) {
+ next_path->lookups++;
+ return &next_path->rec;
+ } else
+ return NULL;
+}
+
+static void report_path(struct work_struct *work)
+{
+ struct sa_path_request *req;
+
+ req = container_of(work, struct sa_path_request, work);
+ req->callback(0, &req->path_rec, req->context);
+ ib_sa_client_put(req->client);
+ kfree(req);
+}
+
+/**
+ * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
+ * @device:device to send query on
+ * @port_num: port number to send query on
+ * @rec:Path Record to send in query
+ * @comp_mask:component mask to send in query
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when query completes, times out or is
+ * canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:query context, used to cancel query
+ *
+ * Send a Path Record Get query to the SA to look up a path. The
+ * callback function will be called when the query completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query. The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_path_rec_get() is negative, it is an
+ * error code. Otherwise it is a query ID that can be used to cancel
+ * the query.
+ */
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_path_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct sa_path_request *req;
+ struct ib_sa_attr_iter iter;
+ struct ib_sa_path_rec *path_rec;
+ int ret;
+
+ if (!paths_per_dest)
+ goto query_sa;
+
+ if (!(comp_mask & IB_SA_PATH_REC_DGID) ||
+ !(comp_mask & IB_SA_PATH_REC_NUMB_PATH) || rec->numb_path != 1)
+ goto query_sa;
+
+ req = kmalloc(sizeof *req, gfp_mask);
+ if (!req)
+ goto query_sa;
+
+ ret = ib_create_path_iter(device, port_num, &rec->dgid, &iter);
+ if (ret)
+ goto free_req;
+
+ if (lookup_method == SA_DB_LOOKUP_RANDOM)
+ path_rec = get_random_path(&iter, rec, comp_mask);
+ else
+ path_rec = get_next_path(&iter, rec, comp_mask);
+
+ if (!path_rec)
+ goto free_iter;
+
+ memcpy(&req->path_rec, path_rec, sizeof *path_rec);
+ ib_free_path_iter(&iter);
+
+ INIT_WORK(&req->work, report_path);
+ req->client = client;
+ req->callback = callback;
+ req->context = context;
+
+ ib_sa_client_get(client);
+ queue_work(sa_wq, &req->work);
+ *sa_query = ERR_PTR(-EEXIST);
+ return 0;
+
+free_iter:
+ ib_free_path_iter(&iter);
+free_req:
+ kfree(req);
+query_sa:
+ return ib_sa_path_rec_query(client, device, port_num, rec, comp_mask,
+ timeout_ms, gfp_mask, callback, context,
+ sa_query);
+}
+EXPORT_SYMBOL(ib_sa_path_rec_get);
+
+static void recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct sa_db_port *port;
+ struct update_info *update;
+ struct ib_mad_send_buf *msg;
+ enum sa_update_type type;
+
+ msg = (struct ib_mad_send_buf *) (unsigned long) mad_recv_wc->wc->wr_id;
+ port = msg->context[0];
+ update = msg->context[1];
+
+ mutex_lock(&lock);
+ if (port->state == SA_DB_DESTROY ||
+ update != list_entry(port->update_list.next,
+ struct update_info, list)) {
+ mutex_unlock(&lock);
+ } else {
+ type = update->type;
+ mutex_unlock(&lock);
+ update_path_db(mad_agent->context, mad_recv_wc, type);
+ }
+
+ ib_free_recv_mad(mad_recv_wc);
+}
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct ib_mad_send_buf *msg;
+ struct sa_db_port *port;
+ struct update_info *update;
+ int ret;
+
+ msg = mad_send_wc->send_buf;
+ port = msg->context[0];
+ update = msg->context[1];
+
+ mutex_lock(&lock);
+ if (port->state == SA_DB_DESTROY)
+ goto unlock;
+
+ if (update == list_entry(port->update_list.next,
+ struct update_info, list)) {
+
+ if (mad_send_wc->status == IB_WC_RESP_TIMEOUT_ERR &&
+ msg->timeout_ms < SA_DB_MAX_RETRY_TIMER) {
+
+ msg->timeout_ms <<= 1;
+ ret = ib_post_send_mad(msg, NULL);
+ if (!ret) {
+ mutex_unlock(&lock);
+ return;
+ }
+ }
+ list_del(&update->list);
+ kfree(update);
+ }
+ process_updates(port);
+unlock:
+ mutex_unlock(&lock);
+
+ ib_destroy_ah(msg->ah);
+ ib_free_send_mad(msg);
+}
+
+static int init_port(struct sa_db_device *dev, int port_num)
+{
+ struct sa_db_port *port;
+ int ret;
+
+ port = &dev->port[port_num - dev->start_port];
+ port->dev = dev;
+ port->port_num = port_num;
+ INIT_WORK(&port->work, port_work_handler);
+ port->paths = RB_ROOT;
+ INIT_LIST_HEAD(&port->update_list);
+
+ ret = ib_get_cached_gid(dev->device, port_num, 0, &port->gid);
+ if (ret)
+ return ret;
+
+ port->agent = ib_register_mad_agent(dev->device, port_num, IB_QPT_GSI,
+ NULL, IB_MGMT_RMPP_VERSION,
+ send_handler, recv_handler, port);
+ if (IS_ERR(port->agent))
+ ret = PTR_ERR(port->agent);
+
+ return ret;
+}
+
+static void destroy_port(struct sa_db_port *port)
+{
+ mutex_lock(&lock);
+ port->state = SA_DB_DESTROY;
+ mutex_unlock(&lock);
+
+ ib_unregister_mad_agent(port->agent);
+ cleanup_port(port);
+ flush_workqueue(sa_wq);
+}
+
+static void sa_db_add_dev(struct ib_device *device)
+{
+ struct sa_db_device *dev;
+ struct sa_db_port *port;
+ int s, e, i, ret;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH) {
+ s = e = 0;
+ } else {
+ s = 1;
+ e = device->phys_port_cnt;
+ }
+
+ dev = kzalloc(sizeof *dev + (e - s + 1) * sizeof *port, GFP_KERNEL);
+ if (!dev)
+ return;
+
+ dev->start_port = s;
+ dev->port_count = e - s + 1;
+ dev->device = device;
+ for (i = 0; i < dev->port_count; i++) {
+ ret = init_port(dev, s + i);
+ if (ret)
+ goto err;
+ }
+
+ ib_set_client_data(device, &sa_db_client, dev);
+
+ INIT_IB_EVENT_HANDLER(&dev->event_handler, device, handle_event);
+
+ mutex_lock(&lock);
+ list_add_tail(&dev->list, &dev_list);
+ refresh_dev_db(dev);
+ mutex_unlock(&lock);
+
+ ib_register_event_handler(&dev->event_handler);
+ return;
+err:
+ while (i--)
+ destroy_port(&dev->port[i]);
+ kfree(dev);
+}
+
+static void sa_db_remove_dev(struct ib_device *device)
+{
+ struct sa_db_device *dev;
+ int i;
+
+ dev = ib_get_client_data(device, &sa_db_client);
+ if (!dev)
+ return;
+
+ ib_unregister_event_handler(&dev->event_handler);
+ flush_workqueue(sa_wq);
+
+ for (i = 0; i < dev->port_count; i++)
+ destroy_port(&dev->port[i]);
+
+ mutex_lock(&lock);
+ list_del(&dev->list);
+ mutex_unlock(&lock);
+
+ kfree(dev);
+}
+
+int sa_db_init(void)
+{
+ int ret;
+
+ rwlock_init(&rwlock);
+ sa_wq = create_singlethread_workqueue("local_sa");
+ if (!sa_wq)
+ return -ENOMEM;
+
+ ib_sa_register_client(&sa_client);
+ ret = ib_register_client(&sa_db_client);
+ if (ret)
+ goto err;
+
+ return 0;
+
+err:
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(sa_wq);
+ return ret;
+}
+
+void sa_db_cleanup(void)
+{
+ ib_unregister_client(&sa_db_client);
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(sa_wq);
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/core/local_sa.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/mad.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/mad.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/mad.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,3057 @@
+/*
+ * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <rdma/ib_cache.h>
+
+#include "mad_priv.h"
+#include "mad_rmpp.h"
+#include "smi.h"
+#include "agent.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("kernel IB MAD API");
+MODULE_AUTHOR("Hal Rosenstock");
+MODULE_AUTHOR("Sean Hefty");
+
+int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
+int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
+
+module_param_named(send_queue_size, mad_sendq_size, int, 0444);
+MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
+module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
+MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
+
+static struct kmem_cache *ib_mad_cache;
+
+static struct list_head ib_mad_port_list;
+static u32 ib_mad_client_id = 0;
+
+/* Port list lock */
+static spinlock_t ib_mad_port_list_lock;
+
+
+/* Forward declarations */
+static int method_in_use(struct ib_mad_mgmt_method_table **method,
+ struct ib_mad_reg_req *mad_reg_req);
+static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
+static struct ib_mad_agent_private *find_mad_agent(
+ struct ib_mad_port_private *port_priv,
+ struct ib_mad *mad);
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_private *mad);
+static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
+static void timeout_sends(struct work_struct *work);
+static void local_completions(struct work_struct *work);
+static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
+ struct ib_mad_agent_private *agent_priv,
+ u8 mgmt_class);
+static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
+ struct ib_mad_agent_private *agent_priv);
+
+/*
+ * Returns a ib_mad_port_private structure or NULL for a device/port
+ * Assumes ib_mad_port_list_lock is being held
+ */
+static inline struct ib_mad_port_private *
+__ib_get_mad_port(struct ib_device *device, int port_num)
+{
+ struct ib_mad_port_private *entry;
+
+ list_for_each_entry(entry, &ib_mad_port_list, port_list) {
+ if (entry->device == device && entry->port_num == port_num)
+ return entry;
+ }
+ return NULL;
+}
+
+/*
+ * Wrapper function to return a ib_mad_port_private structure or NULL
+ * for a device/port
+ */
+static inline struct ib_mad_port_private *
+ib_get_mad_port(struct ib_device *device, int port_num)
+{
+ struct ib_mad_port_private *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ entry = __ib_get_mad_port(device, port_num);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
+ return entry;
+}
+
+static inline u8 convert_mgmt_class(u8 mgmt_class)
+{
+ /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
+ return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
+ 0 : mgmt_class;
+}
+
+static int get_spl_qp_index(enum ib_qp_type qp_type)
+{
+ switch (qp_type)
+ {
+ case IB_QPT_SMI:
+ return 0;
+ case IB_QPT_GSI:
+ return 1;
+ default:
+ return -1;
+ }
+}
+
+static int vendor_class_index(u8 mgmt_class)
+{
+ return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
+}
+
+static int is_vendor_class(u8 mgmt_class)
+{
+ if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
+ (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
+ return 0;
+ return 1;
+}
+
+static int is_vendor_oui(char *oui)
+{
+ if (oui[0] || oui[1] || oui[2])
+ return 1;
+ return 0;
+}
+
+static int is_vendor_method_in_use(
+ struct ib_mad_mgmt_vendor_class *vendor_class,
+ struct ib_mad_reg_req *mad_reg_req)
+{
+ struct ib_mad_mgmt_method_table *method;
+ int i;
+
+ for (i = 0; i < MAX_MGMT_OUI; i++) {
+ if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
+ method = vendor_class->method_table[i];
+ if (method) {
+ if (method_in_use(&method, mad_reg_req))
+ return 1;
+ else
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+int ib_response_mad(struct ib_mad *mad)
+{
+ return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) ||
+ (mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
+ ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) &&
+ (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP)));
+}
+EXPORT_SYMBOL(ib_response_mad);
+
+static void timeout_callback(unsigned long data)
+{
+ struct ib_mad_agent_private *mad_agent_priv =
+ (struct ib_mad_agent_private *) data;
+
+ queue_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->timeout_work);
+}
+
+/*
+ * ib_register_mad_agent - Register to send/receive MADs
+ */
+struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
+ u8 port_num,
+ enum ib_qp_type qp_type,
+ struct ib_mad_reg_req *mad_reg_req,
+ u8 rmpp_version,
+ ib_mad_send_handler send_handler,
+ ib_mad_recv_handler recv_handler,
+ void *context)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_reg_req *reg_req = NULL;
+ struct ib_mad_mgmt_class_table *class;
+ struct ib_mad_mgmt_vendor_class_table *vendor;
+ struct ib_mad_mgmt_vendor_class *vendor_class;
+ struct ib_mad_mgmt_method_table *method;
+ int ret2, qpn;
+ unsigned long flags;
+ u8 mgmt_class, vclass;
+
+ /* Validate parameters */
+ qpn = get_spl_qp_index(qp_type);
+ if (qpn == -1)
+ goto error1;
+
+ if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION)
+ goto error1;
+
+ /* Validate MAD registration request if supplied */
+ if (mad_reg_req) {
+ if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
+ goto error1;
+ if (!recv_handler)
+ goto error1;
+ if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
+ /*
+ * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
+ * one in this range currently allowed
+ */
+ if (mad_reg_req->mgmt_class !=
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ goto error1;
+ } else if (mad_reg_req->mgmt_class == 0) {
+ /*
+ * Class 0 is reserved in IBA and is used for
+ * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
+ */
+ goto error1;
+ } else if (is_vendor_class(mad_reg_req->mgmt_class)) {
+ /*
+ * If class is in "new" vendor range,
+ * ensure supplied OUI is not zero
+ */
+ if (!is_vendor_oui(mad_reg_req->oui))
+ goto error1;
+ }
+ /* Make sure class supplied is consistent with RMPP */
+ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
+ if (rmpp_version)
+ goto error1;
+ }
+ /* Make sure class supplied is consistent with QP type */
+ if (qp_type == IB_QPT_SMI) {
+ if ((mad_reg_req->mgmt_class !=
+ IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
+ (mad_reg_req->mgmt_class !=
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ goto error1;
+ } else {
+ if ((mad_reg_req->mgmt_class ==
+ IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
+ (mad_reg_req->mgmt_class ==
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ goto error1;
+ }
+ } else {
+ /* No registration request supplied */
+ if (!send_handler)
+ goto error1;
+ }
+
+ /* Validate device and port */
+ port_priv = ib_get_mad_port(device, port_num);
+ if (!port_priv) {
+ ret = ERR_PTR(-ENODEV);
+ goto error1;
+ }
+
+ /* Allocate structures */
+ mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
+ if (!mad_agent_priv) {
+ ret = ERR_PTR(-ENOMEM);
+ goto error1;
+ }
+
+ mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(mad_agent_priv->agent.mr)) {
+ ret = ERR_PTR(-ENOMEM);
+ goto error2;
+ }
+
+ if (mad_reg_req) {
+ reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL);
+ if (!reg_req) {
+ ret = ERR_PTR(-ENOMEM);
+ goto error3;
+ }
+ /* Make a copy of the MAD registration request */
+ memcpy(reg_req, mad_reg_req, sizeof *reg_req);
+ }
+
+ /* Now, fill in the various structures */
+ mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
+ mad_agent_priv->reg_req = reg_req;
+ mad_agent_priv->agent.rmpp_version = rmpp_version;
+ mad_agent_priv->agent.device = device;
+ mad_agent_priv->agent.recv_handler = recv_handler;
+ mad_agent_priv->agent.send_handler = send_handler;
+ mad_agent_priv->agent.context = context;
+ mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
+ mad_agent_priv->agent.port_num = port_num;
+ spin_lock_init(&mad_agent_priv->lock);
+ INIT_LIST_HEAD(&mad_agent_priv->send_list);
+ INIT_LIST_HEAD(&mad_agent_priv->wait_list);
+ INIT_LIST_HEAD(&mad_agent_priv->done_list);
+ INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
+ INIT_WORK(&mad_agent_priv->timeout_work, timeout_sends);
+ setup_timer(&mad_agent_priv->timeout_timer, timeout_callback,
+ (unsigned long) mad_agent_priv);
+ INIT_LIST_HEAD(&mad_agent_priv->local_list);
+ INIT_WORK(&mad_agent_priv->local_work, local_completions);
+ atomic_set(&mad_agent_priv->refcount, 1);
+ init_completion(&mad_agent_priv->comp);
+
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
+ mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
+
+ /*
+ * Make sure MAD registration (if supplied)
+ * is non overlapping with any existing ones
+ */
+ if (mad_reg_req) {
+ mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
+ if (!is_vendor_class(mgmt_class)) {
+ class = port_priv->version[mad_reg_req->
+ mgmt_class_version].class;
+ if (class) {
+ method = class->method_table[mgmt_class];
+ if (method) {
+ if (method_in_use(&method,
+ mad_reg_req))
+ goto error4;
+ }
+ }
+ ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
+ mgmt_class);
+ } else {
+ /* "New" vendor class range */
+ vendor = port_priv->version[mad_reg_req->
+ mgmt_class_version].vendor;
+ if (vendor) {
+ vclass = vendor_class_index(mgmt_class);
+ vendor_class = vendor->vendor_class[vclass];
+ if (vendor_class) {
+ if (is_vendor_method_in_use(
+ vendor_class,
+ mad_reg_req))
+ goto error4;
+ }
+ }
+ ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
+ }
+ if (ret2) {
+ ret = ERR_PTR(ret2);
+ goto error4;
+ }
+ }
+
+ /* Add mad agent into port's agent list */
+ list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+
+ return &mad_agent_priv->agent;
+
+error4:
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ kfree(reg_req);
+error3:
+ ib_dereg_mr(mad_agent_priv->agent.mr);
+error2:
+ kfree(mad_agent_priv);
+error1:
+ return ret;
+}
+EXPORT_SYMBOL(ib_register_mad_agent);
+
+static inline int is_snooping_sends(int mad_snoop_flags)
+{
+ return (mad_snoop_flags &
+ (/*IB_MAD_SNOOP_POSTED_SENDS |
+ IB_MAD_SNOOP_RMPP_SENDS |*/
+ IB_MAD_SNOOP_SEND_COMPLETIONS /*|
+ IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
+}
+
+static inline int is_snooping_recvs(int mad_snoop_flags)
+{
+ return (mad_snoop_flags &
+ (IB_MAD_SNOOP_RECVS /*|
+ IB_MAD_SNOOP_RMPP_RECVS*/));
+}
+
+static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_snoop_private *mad_snoop_priv)
+{
+ struct ib_mad_snoop_private **new_snoop_table;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ /* Check for empty slot in array. */
+ for (i = 0; i < qp_info->snoop_table_size; i++)
+ if (!qp_info->snoop_table[i])
+ break;
+
+ if (i == qp_info->snoop_table_size) {
+ /* Grow table. */
+ new_snoop_table = krealloc(qp_info->snoop_table,
+ sizeof mad_snoop_priv *
+ (qp_info->snoop_table_size + 1),
+ GFP_ATOMIC);
+ if (!new_snoop_table) {
+ i = -ENOMEM;
+ goto out;
+ }
+
+ qp_info->snoop_table = new_snoop_table;
+ qp_info->snoop_table_size++;
+ }
+ qp_info->snoop_table[i] = mad_snoop_priv;
+ atomic_inc(&qp_info->snoop_count);
+out:
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+ return i;
+}
+
+struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
+ u8 port_num,
+ enum ib_qp_type qp_type,
+ int mad_snoop_flags,
+ ib_mad_snoop_handler snoop_handler,
+ ib_mad_recv_handler recv_handler,
+ void *context)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_agent *ret;
+ struct ib_mad_snoop_private *mad_snoop_priv;
+ int qpn;
+
+ /* Validate parameters */
+ if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
+ (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
+ ret = ERR_PTR(-EINVAL);
+ goto error1;
+ }
+ qpn = get_spl_qp_index(qp_type);
+ if (qpn == -1) {
+ ret = ERR_PTR(-EINVAL);
+ goto error1;
+ }
+ port_priv = ib_get_mad_port(device, port_num);
+ if (!port_priv) {
+ ret = ERR_PTR(-ENODEV);
+ goto error1;
+ }
+ /* Allocate structures */
+ mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
+ if (!mad_snoop_priv) {
+ ret = ERR_PTR(-ENOMEM);
+ goto error1;
+ }
+
+ /* Now, fill in the various structures */
+ mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
+ mad_snoop_priv->agent.device = device;
+ mad_snoop_priv->agent.recv_handler = recv_handler;
+ mad_snoop_priv->agent.snoop_handler = snoop_handler;
+ mad_snoop_priv->agent.context = context;
+ mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
+ mad_snoop_priv->agent.port_num = port_num;
+ mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
+ init_completion(&mad_snoop_priv->comp);
+ mad_snoop_priv->snoop_index = register_snoop_agent(
+ &port_priv->qp_info[qpn],
+ mad_snoop_priv);
+ if (mad_snoop_priv->snoop_index < 0) {
+ ret = ERR_PTR(mad_snoop_priv->snoop_index);
+ goto error2;
+ }
+
+ atomic_set(&mad_snoop_priv->refcount, 1);
+ return &mad_snoop_priv->agent;
+
+error2:
+ kfree(mad_snoop_priv);
+error1:
+ return ret;
+}
+EXPORT_SYMBOL(ib_register_mad_snoop);
+
+static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
+{
+ if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ complete(&mad_agent_priv->comp);
+}
+
+static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
+{
+ if (atomic_dec_and_test(&mad_snoop_priv->refcount))
+ complete(&mad_snoop_priv->comp);
+}
+
+static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_port_private *port_priv;
+ unsigned long flags;
+
+ /* Note that we could still be handling received MADs */
+
+ /*
+ * Canceling all sends results in dropping received response
+ * MADs, preventing us from queuing additional work
+ */
+ cancel_mads(mad_agent_priv);
+ port_priv = mad_agent_priv->qp_info->port_priv;
+ del_timer_sync(&mad_agent_priv->timeout_timer);
+ cancel_work_sync(&mad_agent_priv->timeout_work);
+
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
+ remove_mad_reg_req(mad_agent_priv);
+ list_del(&mad_agent_priv->agent_list);
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+
+ flush_workqueue(port_priv->wq);
+ ib_cancel_rmpp_recvs(mad_agent_priv);
+
+ deref_mad_agent(mad_agent_priv);
+ wait_for_completion(&mad_agent_priv->comp);
+
+ kfree(mad_agent_priv->reg_req);
+ ib_dereg_mr(mad_agent_priv->agent.mr);
+ kfree(mad_agent_priv);
+}
+
+static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
+{
+ struct ib_mad_qp_info *qp_info;
+ unsigned long flags;
+
+ qp_info = mad_snoop_priv->qp_info;
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
+ atomic_dec(&qp_info->snoop_count);
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+
+ deref_snoop_agent(mad_snoop_priv);
+ wait_for_completion(&mad_snoop_priv->comp);
+
+ kfree(mad_snoop_priv);
+}
+
+/*
+ * ib_unregister_mad_agent - Unregisters a client from using MAD services
+ */
+int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_snoop_private *mad_snoop_priv;
+
+ /* If the TID is zero, the agent can only snoop. */
+ if (mad_agent->hi_tid) {
+ mad_agent_priv = container_of(mad_agent,
+ struct ib_mad_agent_private,
+ agent);
+ unregister_mad_agent(mad_agent_priv);
+ } else {
+ mad_snoop_priv = container_of(mad_agent,
+ struct ib_mad_snoop_private,
+ agent);
+ unregister_mad_snoop(mad_snoop_priv);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ib_unregister_mad_agent);
+
+static void dequeue_mad(struct ib_mad_list_head *mad_list)
+{
+ struct ib_mad_queue *mad_queue;
+ unsigned long flags;
+
+ BUG_ON(!mad_list->mad_queue);
+ mad_queue = mad_list->mad_queue;
+ spin_lock_irqsave(&mad_queue->lock, flags);
+ list_del(&mad_list->list);
+ mad_queue->count--;
+ spin_unlock_irqrestore(&mad_queue->lock, flags);
+}
+
+static void snoop_send(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_send_buf *send_buf,
+ struct ib_mad_send_wc *mad_send_wc,
+ int mad_snoop_flags)
+{
+ struct ib_mad_snoop_private *mad_snoop_priv;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ for (i = 0; i < qp_info->snoop_table_size; i++) {
+ mad_snoop_priv = qp_info->snoop_table[i];
+ if (!mad_snoop_priv ||
+ !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
+ continue;
+
+ atomic_inc(&mad_snoop_priv->refcount);
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+ mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
+ send_buf, mad_send_wc);
+ deref_snoop_agent(mad_snoop_priv);
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ }
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+}
+
+static void snoop_recv(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ int mad_snoop_flags)
+{
+ struct ib_mad_snoop_private *mad_snoop_priv;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ for (i = 0; i < qp_info->snoop_table_size; i++) {
+ mad_snoop_priv = qp_info->snoop_table[i];
+ if (!mad_snoop_priv ||
+ !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
+ continue;
+
+ atomic_inc(&mad_snoop_priv->refcount);
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+ mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
+ mad_recv_wc);
+ deref_snoop_agent(mad_snoop_priv);
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ }
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+}
+
+static void build_smp_wc(struct ib_qp *qp,
+ u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
+ struct ib_wc *wc)
+{
+ memset(wc, 0, sizeof *wc);
+ wc->wr_id = wr_id;
+ wc->status = IB_WC_SUCCESS;
+ wc->opcode = IB_WC_RECV;
+ wc->pkey_index = pkey_index;
+ wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
+ wc->src_qp = IB_QP0;
+ wc->qp = qp;
+ wc->slid = slid;
+ wc->sl = 0;
+ wc->dlid_path_bits = 0;
+ wc->port_num = port_num;
+}
+
+/*
+ * Return 0 if SMP is to be sent
+ * Return 1 if SMP was consumed locally (whether or not solicited)
+ * Return < 0 if error
+ */
+static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_wr_private *mad_send_wr)
+{
+ int ret = 0;
+ struct ib_smp *smp = mad_send_wr->send_buf.mad;
+ unsigned long flags;
+ struct ib_mad_local_private *local;
+ struct ib_mad_private *mad_priv;
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_agent_private *recv_mad_agent = NULL;
+ struct ib_device *device = mad_agent_priv->agent.device;
+ u8 port_num;
+ struct ib_wc mad_wc;
+ struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ port_num = send_wr->wr.ud.port_num;
+ else
+ port_num = mad_agent_priv->agent.port_num;
+
+ /*
+ * Directed route handling starts if the initial LID routed part of
+ * a request or the ending LID routed part of a response is empty.
+ * If we are at the start of the LID routed part, don't update the
+ * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec.
+ */
+ if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) !=
+ IB_LID_PERMISSIVE)
+ goto out;
+ if (smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
+ IB_SMI_DISCARD) {
+ ret = -EINVAL;
+ printk(KERN_ERR PFX "Invalid directed route\n");
+ goto out;
+ }
+
+ /* Check to post send on QP or process locally */
+ if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
+ smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
+ goto out;
+
+ local = kmalloc(sizeof *local, GFP_ATOMIC);
+ if (!local) {
+ ret = -ENOMEM;
+ printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
+ goto out;
+ }
+ local->mad_priv = NULL;
+ local->recv_mad_agent = NULL;
+ mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
+ if (!mad_priv) {
+ ret = -ENOMEM;
+ printk(KERN_ERR PFX "No memory for local response MAD\n");
+ kfree(local);
+ goto out;
+ }
+
+ build_smp_wc(mad_agent_priv->agent.qp,
+ send_wr->wr_id, be16_to_cpu(smp->dr_slid),
+ send_wr->wr.ud.pkey_index,
+ send_wr->wr.ud.port_num, &mad_wc);
+
+ /* No GRH for DR SMP */
+ ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
+ (struct ib_mad *)smp,
+ (struct ib_mad *)&mad_priv->mad);
+ switch (ret)
+ {
+ case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
+ if (ib_response_mad(&mad_priv->mad.mad) &&
+ mad_agent_priv->agent.recv_handler) {
+ local->mad_priv = mad_priv;
+ local->recv_mad_agent = mad_agent_priv;
+ /*
+ * Reference MAD agent until receive
+ * side of local completion handled
+ */
+ atomic_inc(&mad_agent_priv->refcount);
+ } else
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ break;
+ case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ break;
+ case IB_MAD_RESULT_SUCCESS:
+ /* Treat like an incoming receive MAD */
+ port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
+ mad_agent_priv->agent.port_num);
+ if (port_priv) {
+ memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad));
+ recv_mad_agent = find_mad_agent(port_priv,
+ &mad_priv->mad.mad);
+ }
+ if (!port_priv || !recv_mad_agent) {
+ /*
+ * No receiving agent so drop packet and
+ * generate send completion.
+ */
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ break;
+ }
+ local->mad_priv = mad_priv;
+ local->recv_mad_agent = recv_mad_agent;
+ break;
+ default:
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ kfree(local);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ local->mad_send_wr = mad_send_wr;
+ /* Reference MAD agent until send side of local completion handled */
+ atomic_inc(&mad_agent_priv->refcount);
+ /* Queue local completion to local list */
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ queue_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->local_work);
+ ret = 1;
+out:
+ return ret;
+}
+
+static int get_pad_size(int hdr_len, int data_len)
+{
+ int seg_size, pad;
+
+ seg_size = sizeof(struct ib_mad) - hdr_len;
+ if (data_len && seg_size) {
+ pad = seg_size - data_len % seg_size;
+ return pad == seg_size ? 0 : pad;
+ } else
+ return seg_size;
+}
+
+static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_rmpp_segment *s, *t;
+
+ list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
+ list_del(&s->list);
+ kfree(s);
+ }
+}
+
+static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
+ gfp_t gfp_mask)
+{
+ struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
+ struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
+ struct ib_rmpp_segment *seg = NULL;
+ int left, seg_size, pad;
+
+ send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
+ seg_size = send_buf->seg_size;
+ pad = send_wr->pad;
+
+ /* Allocate data segments. */
+ for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
+ seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
+ if (!seg) {
+ printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
+ "alloc failed for len %zd, gfp %#x\n",
+ sizeof (*seg) + seg_size, gfp_mask);
+ free_send_rmpp_list(send_wr);
+ return -ENOMEM;
+ }
+ seg->num = ++send_buf->seg_count;
+ list_add_tail(&seg->list, &send_wr->rmpp_list);
+ }
+
+ /* Zero any padding */
+ if (pad)
+ memset(seg->data + seg_size - pad, 0, pad);
+
+ rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
+ agent.rmpp_version;
+ rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
+ ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+
+ send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
+ struct ib_rmpp_segment, list);
+ send_wr->last_ack_seg = send_wr->cur_seg;
+ return 0;
+}
+
+struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
+ u32 remote_qpn, u16 pkey_index,
+ int rmpp_active,
+ int hdr_len, int data_len,
+ gfp_t gfp_mask)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ int pad, message_size, ret, size;
+ void *buf;
+
+ mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
+ agent);
+ pad = get_pad_size(hdr_len, data_len);
+ message_size = hdr_len + data_len + pad;
+
+ if ((!mad_agent->rmpp_version &&
+ (rmpp_active || message_size > sizeof(struct ib_mad))) ||
+ (!rmpp_active && message_size > sizeof(struct ib_mad)))
+ return ERR_PTR(-EINVAL);
+
+ size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
+ buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ mad_send_wr = buf + size;
+ INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
+ mad_send_wr->send_buf.mad = buf;
+ mad_send_wr->send_buf.hdr_len = hdr_len;
+ mad_send_wr->send_buf.data_len = data_len;
+ mad_send_wr->pad = pad;
+
+ mad_send_wr->mad_agent_priv = mad_agent_priv;
+ mad_send_wr->sg_list[0].length = hdr_len;
+ mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
+ mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
+ mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
+
+ mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
+ mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
+ mad_send_wr->send_wr.num_sge = 2;
+ mad_send_wr->send_wr.opcode = IB_WR_SEND;
+ mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
+ mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
+ mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
+ mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
+
+ if (rmpp_active) {
+ ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
+ if (ret) {
+ kfree(buf);
+ return ERR_PTR(ret);
+ }
+ }
+
+ mad_send_wr->send_buf.mad_agent = mad_agent;
+ atomic_inc(&mad_agent_priv->refcount);
+ return &mad_send_wr->send_buf;
+}
+EXPORT_SYMBOL(ib_create_send_mad);
+
+int ib_get_mad_data_offset(u8 mgmt_class)
+{
+ if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
+ return IB_MGMT_SA_HDR;
+ else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
+ (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
+ (mgmt_class == IB_MGMT_CLASS_BIS))
+ return IB_MGMT_DEVICE_HDR;
+ else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
+ (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
+ return IB_MGMT_VENDOR_HDR;
+ else
+ return IB_MGMT_MAD_HDR;
+}
+EXPORT_SYMBOL(ib_get_mad_data_offset);
+
+int ib_is_mad_class_rmpp(u8 mgmt_class)
+{
+ if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
+ (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
+ (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
+ (mgmt_class == IB_MGMT_CLASS_BIS) ||
+ ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
+ (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL(ib_is_mad_class_rmpp);
+
+void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct list_head *list;
+
+ mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+ send_buf);
+ list = &mad_send_wr->cur_seg->list;
+
+ if (mad_send_wr->cur_seg->num < seg_num) {
+ list_for_each_entry(mad_send_wr->cur_seg, list, list)
+ if (mad_send_wr->cur_seg->num == seg_num)
+ break;
+ } else if (mad_send_wr->cur_seg->num > seg_num) {
+ list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
+ if (mad_send_wr->cur_seg->num == seg_num)
+ break;
+ }
+ return mad_send_wr->cur_seg->data;
+}
+EXPORT_SYMBOL(ib_get_rmpp_segment);
+
+static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ if (mad_send_wr->send_buf.seg_count)
+ return ib_get_rmpp_segment(&mad_send_wr->send_buf,
+ mad_send_wr->seg_num);
+ else
+ return mad_send_wr->send_buf.mad +
+ mad_send_wr->send_buf.hdr_len;
+}
+
+void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_wr_private *mad_send_wr;
+
+ mad_agent_priv = container_of(send_buf->mad_agent,
+ struct ib_mad_agent_private, agent);
+ mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+ send_buf);
+
+ free_send_rmpp_list(mad_send_wr);
+ kfree(send_buf->mad);
+ deref_mad_agent(mad_agent_priv);
+}
+EXPORT_SYMBOL(ib_free_send_mad);
+
+int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_mad_qp_info *qp_info;
+ struct list_head *list;
+ struct ib_send_wr *bad_send_wr;
+ struct ib_mad_agent *mad_agent;
+ struct ib_sge *sge;
+ unsigned long flags;
+ int ret;
+
+ /* Set WR ID to find mad_send_wr upon completion */
+ qp_info = mad_send_wr->mad_agent_priv->qp_info;
+ mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
+ mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+
+ mad_agent = mad_send_wr->send_buf.mad_agent;
+ sge = mad_send_wr->sg_list;
+ sge[0].addr = ib_dma_map_single(mad_agent->device,
+ mad_send_wr->send_buf.mad,
+ sge[0].length,
+ DMA_TO_DEVICE);
+ mad_send_wr->header_mapping = sge[0].addr;
+
+ sge[1].addr = ib_dma_map_single(mad_agent->device,
+ ib_get_payload(mad_send_wr),
+ sge[1].length,
+ DMA_TO_DEVICE);
+ mad_send_wr->payload_mapping = sge[1].addr;
+
+ spin_lock_irqsave(&qp_info->send_queue.lock, flags);
+ if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
+ ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
+ &bad_send_wr);
+ list = &qp_info->send_queue.list;
+ } else {
+ ret = 0;
+ list = &qp_info->overflow_list;
+ }
+
+ if (!ret) {
+ qp_info->send_queue.count++;
+ list_add_tail(&mad_send_wr->mad_list.list, list);
+ }
+ spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+ if (ret) {
+ ib_dma_unmap_single(mad_agent->device,
+ mad_send_wr->header_mapping,
+ sge[0].length, DMA_TO_DEVICE);
+ ib_dma_unmap_single(mad_agent->device,
+ mad_send_wr->payload_mapping,
+ sge[1].length, DMA_TO_DEVICE);
+ }
+ return ret;
+}
+
+/*
+ * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
+ * with the registered client
+ */
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+ struct ib_mad_send_buf **bad_send_buf)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_buf *next_send_buf;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ unsigned long flags;
+ int ret = -EINVAL;
+
+ /* Walk list of send WRs and post each on send list */
+ for (; send_buf; send_buf = next_send_buf) {
+
+ mad_send_wr = container_of(send_buf,
+ struct ib_mad_send_wr_private,
+ send_buf);
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+
+ if (!send_buf->mad_agent->send_handler ||
+ (send_buf->timeout_ms &&
+ !send_buf->mad_agent->recv_handler)) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
+ if (mad_agent_priv->agent.rmpp_version) {
+ ret = -EINVAL;
+ goto error;
+ }
+ }
+
+ /*
+ * Save pointer to next work request to post in case the
+ * current one completes, and the user modifies the work
+ * request associated with the completion
+ */
+ next_send_buf = send_buf->next;
+ mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
+
+ if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ ret = handle_outgoing_dr_smp(mad_agent_priv,
+ mad_send_wr);
+ if (ret < 0) /* error */
+ goto error;
+ else if (ret == 1) /* locally consumed */
+ continue;
+ }
+
+ mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
+ /* Timeout will be updated after send completes */
+ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+ mad_send_wr->max_retries = send_buf->retries;
+ mad_send_wr->retries_left = send_buf->retries;
+ send_buf->retries = 0;
+ /* Reference for work request to QP + response */
+ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
+ mad_send_wr->status = IB_WC_SUCCESS;
+
+ /* Reference MAD agent until send completes */
+ atomic_inc(&mad_agent_priv->refcount);
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->send_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ if (mad_agent_priv->agent.rmpp_version) {
+ ret = ib_send_rmpp_mad(mad_send_wr);
+ if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
+ ret = ib_send_mad(mad_send_wr);
+ } else
+ ret = ib_send_mad(mad_send_wr);
+ if (ret < 0) {
+ /* Fail send request */
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_del(&mad_send_wr->agent_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ atomic_dec(&mad_agent_priv->refcount);
+ goto error;
+ }
+ }
+ return 0;
+error:
+ if (bad_send_buf)
+ *bad_send_buf = send_buf;
+ return ret;
+}
+EXPORT_SYMBOL(ib_post_send_mad);
+
+/*
+ * ib_free_recv_mad - Returns data buffers used to receive
+ * a MAD to the access layer
+ */
+void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
+ struct ib_mad_private_header *mad_priv_hdr;
+ struct ib_mad_private *priv;
+ struct list_head free_list;
+
+ INIT_LIST_HEAD(&free_list);
+ list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
+
+ list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
+ &free_list, list) {
+ mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
+ recv_buf);
+ mad_priv_hdr = container_of(mad_recv_wc,
+ struct ib_mad_private_header,
+ recv_wc);
+ priv = container_of(mad_priv_hdr, struct ib_mad_private,
+ header);
+ kmem_cache_free(ib_mad_cache, priv);
+ }
+}
+EXPORT_SYMBOL(ib_free_recv_mad);
+
+struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
+ u8 rmpp_version,
+ ib_mad_send_handler send_handler,
+ ib_mad_recv_handler recv_handler,
+ void *context)
+{
+ return ERR_PTR(-EINVAL); /* XXX: for now */
+}
+EXPORT_SYMBOL(ib_redirect_mad_qp);
+
+int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
+ struct ib_wc *wc)
+{
+ printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
+ return 0;
+}
+EXPORT_SYMBOL(ib_process_mad_wc);
+
+static int method_in_use(struct ib_mad_mgmt_method_table **method,
+ struct ib_mad_reg_req *mad_reg_req)
+{
+ int i;
+
+ for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS);
+ i < IB_MGMT_MAX_METHODS;
+ i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
+ 1+i)) {
+ if ((*method)->agent[i]) {
+ printk(KERN_ERR PFX "Method %d already in use\n", i);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
+{
+ /* Allocate management method table */
+ *method = kzalloc(sizeof **method, GFP_ATOMIC);
+ if (!*method) {
+ printk(KERN_ERR PFX "No memory for "
+ "ib_mad_mgmt_method_table\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Check to see if there are any methods still in use
+ */
+static int check_method_table(struct ib_mad_mgmt_method_table *method)
+{
+ int i;
+
+ for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
+ if (method->agent[i])
+ return 1;
+ return 0;
+}
+
+/*
+ * Check to see if there are any method tables for this class still in use
+ */
+static int check_class_table(struct ib_mad_mgmt_class_table *class)
+{
+ int i;
+
+ for (i = 0; i < MAX_MGMT_CLASS; i++)
+ if (class->method_table[i])
+ return 1;
+ return 0;
+}
+
+static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
+{
+ int i;
+
+ for (i = 0; i < MAX_MGMT_OUI; i++)
+ if (vendor_class->method_table[i])
+ return 1;
+ return 0;
+}
+
+static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
+ char *oui)
+{
+ int i;
+
+ for (i = 0; i < MAX_MGMT_OUI; i++)
+ /* Is there matching OUI for this vendor class ? */
+ if (!memcmp(vendor_class->oui[i], oui, 3))
+ return i;
+
+ return -1;
+}
+
+static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
+{
+ int i;
+
+ for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
+ if (vendor->vendor_class[i])
+ return 1;
+
+ return 0;
+}
+
+static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
+ struct ib_mad_agent_private *agent)
+{
+ int i;
+
+ /* Remove any methods for this mad agent */
+ for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
+ if (method->agent[i] == agent) {
+ method->agent[i] = NULL;
+ }
+ }
+}
+
+static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
+ struct ib_mad_agent_private *agent_priv,
+ u8 mgmt_class)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_mgmt_class_table **class;
+ struct ib_mad_mgmt_method_table **method;
+ int i, ret;
+
+ port_priv = agent_priv->qp_info->port_priv;
+ class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
+ if (!*class) {
+ /* Allocate management class table for "new" class version */
+ *class = kzalloc(sizeof **class, GFP_ATOMIC);
+ if (!*class) {
+ printk(KERN_ERR PFX "No memory for "
+ "ib_mad_mgmt_class_table\n");
+ ret = -ENOMEM;
+ goto error1;
+ }
+
+ /* Allocate method table for this management class */
+ method = &(*class)->method_table[mgmt_class];
+ if ((ret = allocate_method_table(method)))
+ goto error2;
+ } else {
+ method = &(*class)->method_table[mgmt_class];
+ if (!*method) {
+ /* Allocate method table for this management class */
+ if ((ret = allocate_method_table(method)))
+ goto error1;
+ }
+ }
+
+ /* Now, make sure methods are not already in use */
+ if (method_in_use(method, mad_reg_req))
+ goto error3;
+
+ /* Finally, add in methods being registered */
+ for (i = find_first_bit(mad_reg_req->method_mask,
+ IB_MGMT_MAX_METHODS);
+ i < IB_MGMT_MAX_METHODS;
+ i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
+ 1+i)) {
+ (*method)->agent[i] = agent_priv;
+ }
+ return 0;
+
+error3:
+ /* Remove any methods for this mad agent */
+ remove_methods_mad_agent(*method, agent_priv);
+ /* Now, check to see if there are any methods in use */
+ if (!check_method_table(*method)) {
+ /* If not, release management method table */
+ kfree(*method);
+ *method = NULL;
+ }
+ ret = -EINVAL;
+ goto error1;
+error2:
+ kfree(*class);
+ *class = NULL;
+error1:
+ return ret;
+}
+
+static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
+ struct ib_mad_agent_private *agent_priv)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_mgmt_vendor_class_table **vendor_table;
+ struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
+ struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
+ struct ib_mad_mgmt_method_table **method;
+ int i, ret = -ENOMEM;
+ u8 vclass;
+
+ /* "New" vendor (with OUI) class */
+ vclass = vendor_class_index(mad_reg_req->mgmt_class);
+ port_priv = agent_priv->qp_info->port_priv;
+ vendor_table = &port_priv->version[
+ mad_reg_req->mgmt_class_version].vendor;
+ if (!*vendor_table) {
+ /* Allocate mgmt vendor class table for "new" class version */
+ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
+ if (!vendor) {
+ printk(KERN_ERR PFX "No memory for "
+ "ib_mad_mgmt_vendor_class_table\n");
+ goto error1;
+ }
+
+ *vendor_table = vendor;
+ }
+ if (!(*vendor_table)->vendor_class[vclass]) {
+ /* Allocate table for this management vendor class */
+ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
+ if (!vendor_class) {
+ printk(KERN_ERR PFX "No memory for "
+ "ib_mad_mgmt_vendor_class\n");
+ goto error2;
+ }
+
+ (*vendor_table)->vendor_class[vclass] = vendor_class;
+ }
+ for (i = 0; i < MAX_MGMT_OUI; i++) {
+ /* Is there matching OUI for this vendor class ? */
+ if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
+ mad_reg_req->oui, 3)) {
+ method = &(*vendor_table)->vendor_class[
+ vclass]->method_table[i];
+ BUG_ON(!*method);
+ goto check_in_use;
+ }
+ }
+ for (i = 0; i < MAX_MGMT_OUI; i++) {
+ /* OUI slot available ? */
+ if (!is_vendor_oui((*vendor_table)->vendor_class[
+ vclass]->oui[i])) {
+ method = &(*vendor_table)->vendor_class[
+ vclass]->method_table[i];
+ BUG_ON(*method);
+ /* Allocate method table for this OUI */
+ if ((ret = allocate_method_table(method)))
+ goto error3;
+ memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
+ mad_reg_req->oui, 3);
+ goto check_in_use;
+ }
+ }
+ printk(KERN_ERR PFX "All OUI slots in use\n");
+ goto error3;
+
+check_in_use:
+ /* Now, make sure methods are not already in use */
+ if (method_in_use(method, mad_reg_req))
+ goto error4;
+
+ /* Finally, add in methods being registered */
+ for (i = find_first_bit(mad_reg_req->method_mask,
+ IB_MGMT_MAX_METHODS);
+ i < IB_MGMT_MAX_METHODS;
+ i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
+ 1+i)) {
+ (*method)->agent[i] = agent_priv;
+ }
+ return 0;
+
+error4:
+ /* Remove any methods for this mad agent */
+ remove_methods_mad_agent(*method, agent_priv);
+ /* Now, check to see if there are any methods in use */
+ if (!check_method_table(*method)) {
+ /* If not, release management method table */
+ kfree(*method);
+ *method = NULL;
+ }
+ ret = -EINVAL;
+error3:
+ if (vendor_class) {
+ (*vendor_table)->vendor_class[vclass] = NULL;
+ kfree(vendor_class);
+ }
+error2:
+ if (vendor) {
+ *vendor_table = NULL;
+ kfree(vendor);
+ }
+error1:
+ return ret;
+}
+
+static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_mgmt_class_table *class;
+ struct ib_mad_mgmt_method_table *method;
+ struct ib_mad_mgmt_vendor_class_table *vendor;
+ struct ib_mad_mgmt_vendor_class *vendor_class;
+ int index;
+ u8 mgmt_class;
+
+ /*
+ * Was MAD registration request supplied
+ * with original registration ?
+ */
+ if (!agent_priv->reg_req) {
+ goto out;
+ }
+
+ port_priv = agent_priv->qp_info->port_priv;
+ mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
+ class = port_priv->version[
+ agent_priv->reg_req->mgmt_class_version].class;
+ if (!class)
+ goto vendor_check;
+
+ method = class->method_table[mgmt_class];
+ if (method) {
+ /* Remove any methods for this mad agent */
+ remove_methods_mad_agent(method, agent_priv);
+ /* Now, check to see if there are any methods still in use */
+ if (!check_method_table(method)) {
+ /* If not, release management method table */
+ kfree(method);
+ class->method_table[mgmt_class] = NULL;
+ /* Any management classes left ? */
+ if (!check_class_table(class)) {
+ /* If not, release management class table */
+ kfree(class);
+ port_priv->version[
+ agent_priv->reg_req->
+ mgmt_class_version].class = NULL;
+ }
+ }
+ }
+
+vendor_check:
+ if (!is_vendor_class(mgmt_class))
+ goto out;
+
+ /* normalize mgmt_class to vendor range 2 */
+ mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
+ vendor = port_priv->version[
+ agent_priv->reg_req->mgmt_class_version].vendor;
+
+ if (!vendor)
+ goto out;
+
+ vendor_class = vendor->vendor_class[mgmt_class];
+ if (vendor_class) {
+ index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
+ if (index < 0)
+ goto out;
+ method = vendor_class->method_table[index];
+ if (method) {
+ /* Remove any methods for this mad agent */
+ remove_methods_mad_agent(method, agent_priv);
+ /*
+ * Now, check to see if there are
+ * any methods still in use
+ */
+ if (!check_method_table(method)) {
+ /* If not, release management method table */
+ kfree(method);
+ vendor_class->method_table[index] = NULL;
+ memset(vendor_class->oui[index], 0, 3);
+ /* Any OUIs left ? */
+ if (!check_vendor_class(vendor_class)) {
+ /* If not, release vendor class table */
+ kfree(vendor_class);
+ vendor->vendor_class[mgmt_class] = NULL;
+ /* Any other vendor classes left ? */
+ if (!check_vendor_table(vendor)) {
+ kfree(vendor);
+ port_priv->version[
+ agent_priv->reg_req->
+ mgmt_class_version].
+ vendor = NULL;
+ }
+ }
+ }
+ }
+ }
+
+out:
+ return;
+}
+
+static struct ib_mad_agent_private *
+find_mad_agent(struct ib_mad_port_private *port_priv,
+ struct ib_mad *mad)
+{
+ struct ib_mad_agent_private *mad_agent = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
+ if (ib_response_mad(mad)) {
+ u32 hi_tid;
+ struct ib_mad_agent_private *entry;
+
+ /*
+ * Routing is based on high 32 bits of transaction ID
+ * of MAD.
+ */
+ hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
+ list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
+ if (entry->agent.hi_tid == hi_tid) {
+ mad_agent = entry;
+ break;
+ }
+ }
+ } else {
+ struct ib_mad_mgmt_class_table *class;
+ struct ib_mad_mgmt_method_table *method;
+ struct ib_mad_mgmt_vendor_class_table *vendor;
+ struct ib_mad_mgmt_vendor_class *vendor_class;
+ struct ib_vendor_mad *vendor_mad;
+ int index;
+
+ /*
+ * Routing is based on version, class, and method
+ * For "newer" vendor MADs, also based on OUI
+ */
+ if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION)
+ goto out;
+ if (!is_vendor_class(mad->mad_hdr.mgmt_class)) {
+ class = port_priv->version[
+ mad->mad_hdr.class_version].class;
+ if (!class)
+ goto out;
+ method = class->method_table[convert_mgmt_class(
+ mad->mad_hdr.mgmt_class)];
+ if (method)
+ mad_agent = method->agent[mad->mad_hdr.method &
+ ~IB_MGMT_METHOD_RESP];
+ } else {
+ vendor = port_priv->version[
+ mad->mad_hdr.class_version].vendor;
+ if (!vendor)
+ goto out;
+ vendor_class = vendor->vendor_class[vendor_class_index(
+ mad->mad_hdr.mgmt_class)];
+ if (!vendor_class)
+ goto out;
+ /* Find matching OUI */
+ vendor_mad = (struct ib_vendor_mad *)mad;
+ index = find_vendor_oui(vendor_class, vendor_mad->oui);
+ if (index == -1)
+ goto out;
+ method = vendor_class->method_table[index];
+ if (method) {
+ mad_agent = method->agent[mad->mad_hdr.method &
+ ~IB_MGMT_METHOD_RESP];
+ }
+ }
+ }
+
+ if (mad_agent) {
+ if (mad_agent->agent.recv_handler)
+ atomic_inc(&mad_agent->refcount);
+ else {
+ printk(KERN_NOTICE PFX "No receive handler for client "
+ "%p on port %d\n",
+ &mad_agent->agent, port_priv->port_num);
+ mad_agent = NULL;
+ }
+ }
+out:
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+
+ return mad_agent;
+}
+
+static int validate_mad(struct ib_mad *mad, u32 qp_num)
+{
+ int valid = 0;
+
+ /* Make sure MAD base version is understood */
+ if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
+ printk(KERN_ERR PFX "MAD received with unsupported base "
+ "version %d\n", mad->mad_hdr.base_version);
+ goto out;
+ }
+
+ /* Filter SMI packets sent to other than QP0 */
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
+ (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+ if (qp_num == 0)
+ valid = 1;
+ } else {
+ /* Filter GSI packets sent to QP0 */
+ if (qp_num != 0)
+ valid = 1;
+ }
+
+out:
+ return valid;
+}
+
+static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_hdr *mad_hdr)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+
+ rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
+ return !mad_agent_priv->agent.rmpp_version ||
+ !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE) ||
+ (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
+}
+
+static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
+ struct ib_mad_recv_wc *rwc)
+{
+ return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class ==
+ rwc->recv_buf.mad->mad_hdr.mgmt_class;
+}
+
+static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_wr_private *wr,
+ struct ib_mad_recv_wc *rwc )
+{
+ struct ib_ah_attr attr;
+ u8 send_resp, rcv_resp;
+ union ib_gid sgid;
+ struct ib_device *device = mad_agent_priv->agent.device;
+ u8 port_num = mad_agent_priv->agent.port_num;
+ u8 lmc;
+
+ send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad);
+ rcv_resp = ib_response_mad(rwc->recv_buf.mad);
+
+ if (send_resp == rcv_resp)
+ /* both requests, or both responses. GIDs different */
+ return 0;
+
+ if (ib_query_ah(wr->send_buf.ah, &attr))
+ /* Assume not equal, to avoid false positives. */
+ return 0;
+
+ if (!!(attr.ah_flags & IB_AH_GRH) !=
+ !!(rwc->wc->wc_flags & IB_WC_GRH))
+ /* one has GID, other does not. Assume different */
+ return 0;
+
+ if (!send_resp && rcv_resp) {
+ /* is request/response. */
+ if (!(attr.ah_flags & IB_AH_GRH)) {
+ if (ib_get_cached_lmc(device, port_num, &lmc))
+ return 0;
+ return (!lmc || !((attr.src_path_bits ^
+ rwc->wc->dlid_path_bits) &
+ ((1 << lmc) - 1)));
+ } else {
+ if (ib_get_cached_gid(device, port_num,
+ attr.grh.sgid_index, &sgid))
+ return 0;
+ return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
+ 16);
+ }
+ }
+
+ if (!(attr.ah_flags & IB_AH_GRH))
+ return attr.dlid == rwc->wc->slid;
+ else
+ return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
+ 16);
+}
+
+static inline int is_direct(u8 class)
+{
+ return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
+}
+
+struct ib_mad_send_wr_private*
+ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_recv_wc *wc)
+{
+ struct ib_mad_send_wr_private *wr;
+ struct ib_mad *mad;
+
+ mad = (struct ib_mad *)wc->recv_buf.mad;
+
+ list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
+ if ((wr->tid == mad->mad_hdr.tid) &&
+ rcv_has_same_class(wr, wc) &&
+ /*
+ * Don't check GID for direct routed MADs.
+ * These might have permissive LIDs.
+ */
+ (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
+ rcv_has_same_gid(mad_agent_priv, wr, wc)))
+ return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
+ }
+
+ /*
+ * It's possible to receive the response before we've
+ * been notified that the send has completed
+ */
+ list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
+ if (is_data_mad(mad_agent_priv, wr->send_buf.mad) &&
+ wr->tid == mad->mad_hdr.tid &&
+ wr->timeout &&
+ rcv_has_same_class(wr, wc) &&
+ /*
+ * Don't check GID for direct routed MADs.
+ * These might have permissive LIDs.
+ */
+ (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
+ rcv_has_same_gid(mad_agent_priv, wr, wc)))
+ /* Verify request has not been canceled */
+ return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
+ }
+ return NULL;
+}
+
+void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ mad_send_wr->timeout = 0;
+ if (mad_send_wr->refcount == 1)
+ list_move_tail(&mad_send_wr->agent_list,
+ &mad_send_wr->mad_agent_priv->done_list);
+}
+
+static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
+ list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
+ if (mad_agent_priv->agent.rmpp_version) {
+ mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
+ mad_recv_wc);
+ if (!mad_recv_wc) {
+ deref_mad_agent(mad_agent_priv);
+ return;
+ }
+ }
+
+ /* Complete corresponding request */
+ if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
+ if (!mad_send_wr) {
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ ib_free_recv_mad(mad_recv_wc);
+ deref_mad_agent(mad_agent_priv);
+ return;
+ }
+ ib_mark_mad_done(mad_send_wr);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ /* Defined behavior is to complete response before request */
+ mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_recv_wc);
+ atomic_dec(&mad_agent_priv->refcount);
+
+ mad_send_wc.status = IB_WC_SUCCESS;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ } else {
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_recv_wc);
+ deref_mad_agent(mad_agent_priv);
+ }
+}
+
+static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc)
+{
+ struct ib_mad_qp_info *qp_info;
+ struct ib_mad_private_header *mad_priv_hdr;
+ struct ib_mad_private *recv, *response = NULL;
+ struct ib_mad_list_head *mad_list;
+ struct ib_mad_agent_private *mad_agent;
+ int port_num;
+
+ mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ qp_info = mad_list->mad_queue->qp_info;
+ dequeue_mad(mad_list);
+
+ mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
+ mad_list);
+ recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
+ ib_dma_unmap_single(port_priv->device,
+ recv->header.mapping,
+ sizeof(struct ib_mad_private) -
+ sizeof(struct ib_mad_private_header),
+ DMA_FROM_DEVICE);
+
+ /* Setup MAD receive work completion from "normal" work completion */
+ recv->header.wc = *wc;
+ recv->header.recv_wc.wc = &recv->header.wc;
+ recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
+ recv->header.recv_wc.recv_buf.mad = &recv->mad.mad;
+ recv->header.recv_wc.recv_buf.grh = &recv->grh;
+
+ if (atomic_read(&qp_info->snoop_count))
+ snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
+
+ /* Validate MAD */
+ if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
+ goto out;
+
+ response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
+ if (!response) {
+ printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
+ "for response buffer\n");
+ goto out;
+ }
+
+ if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH)
+ port_num = wc->port_num;
+ else
+ port_num = port_priv->port_num;
+
+ if (recv->mad.mad.mad_hdr.mgmt_class ==
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ enum smi_forward_action retsmi;
+
+ if (smi_handle_dr_smp_recv(&recv->mad.smp,
+ port_priv->device->node_type,
+ port_num,
+ port_priv->device->phys_port_cnt) ==
+ IB_SMI_DISCARD)
+ goto out;
+
+ retsmi = smi_check_forward_dr_smp(&recv->mad.smp);
+ if (retsmi == IB_SMI_LOCAL)
+ goto local;
+
+ if (retsmi == IB_SMI_SEND) { /* don't forward */
+ if (smi_handle_dr_smp_send(&recv->mad.smp,
+ port_priv->device->node_type,
+ port_num) == IB_SMI_DISCARD)
+ goto out;
+
+ if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
+ goto out;
+ } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) {
+ /* forward case for switches */
+ memcpy(response, recv, sizeof(*response));
+ response->header.recv_wc.wc = &response->header.wc;
+ response->header.recv_wc.recv_buf.mad = &response->mad.mad;
+ response->header.recv_wc.recv_buf.grh = &response->grh;
+
+ agent_send_response(&response->mad.mad,
+ &response->grh, wc,
+ port_priv->device,
+ smi_get_fwd_port(&recv->mad.smp),
+ qp_info->qp->qp_num);
+
+ goto out;
+ }
+ }
+
+local:
+ /* Give driver "right of first refusal" on incoming MAD */
+ if (port_priv->device->process_mad) {
+ int ret;
+
+ ret = port_priv->device->process_mad(port_priv->device, 0,
+ port_priv->port_num,
+ wc, &recv->grh,
+ &recv->mad.mad,
+ &response->mad.mad);
+ if (ret & IB_MAD_RESULT_SUCCESS) {
+ if (ret & IB_MAD_RESULT_CONSUMED)
+ goto out;
+ if (ret & IB_MAD_RESULT_REPLY) {
+ agent_send_response(&response->mad.mad,
+ &recv->grh, wc,
+ port_priv->device,
+ port_num,
+ qp_info->qp->qp_num);
+ goto out;
+ }
+ }
+ }
+
+ mad_agent = find_mad_agent(port_priv, &recv->mad.mad);
+ if (mad_agent) {
+ ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
+ /*
+ * recv is freed up in error cases in ib_mad_complete_recv
+ * or via recv_handler in ib_mad_complete_recv()
+ */
+ recv = NULL;
+ }
+
+out:
+ /* Post another receive request for this QP */
+ if (response) {
+ ib_mad_post_receive_mads(qp_info, response);
+ if (recv)
+ kmem_cache_free(ib_mad_cache, recv);
+ } else
+ ib_mad_post_receive_mads(qp_info, recv);
+}
+
+static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+
+ if (list_empty(&mad_agent_priv->wait_list)) {
+ del_timer(&mad_agent_priv->timeout_timer);
+ } else {
+ mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
+ struct ib_mad_send_wr_private,
+ agent_list);
+
+ if (time_after(mad_agent_priv->timeout,
+ mad_send_wr->timeout)) {
+ mad_agent_priv->timeout = mad_send_wr->timeout;
+ mod_timer(&mad_agent_priv->timeout_timer,
+ mad_send_wr->timeout);
+ }
+ }
+}
+
+static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_wr_private *temp_mad_send_wr;
+ struct list_head *list_item;
+ unsigned long delay;
+
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+ list_del(&mad_send_wr->agent_list);
+
+ delay = mad_send_wr->timeout;
+ mad_send_wr->timeout += jiffies;
+
+ if (delay) {
+ list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
+ temp_mad_send_wr = list_entry(list_item,
+ struct ib_mad_send_wr_private,
+ agent_list);
+ if (time_after(mad_send_wr->timeout,
+ temp_mad_send_wr->timeout))
+ break;
+ }
+ } else
+ list_item = &mad_agent_priv->wait_list;
+ list_add(&mad_send_wr->agent_list, list_item);
+
+ /* Reschedule a work item if we have a shorter timeout */
+ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
+ mod_timer(&mad_agent_priv->timeout_timer,
+ mad_send_wr->timeout);
+}
+
+void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
+ int timeout_ms)
+{
+ mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
+ wait_for_response(mad_send_wr);
+}
+
+/*
+ * Process a send work completion
+ */
+void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ unsigned long flags;
+ int ret;
+
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ if (mad_agent_priv->agent.rmpp_version) {
+ ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
+ if (ret == IB_RMPP_RESULT_CONSUMED)
+ goto done;
+ } else
+ ret = IB_RMPP_RESULT_UNHANDLED;
+
+ if (mad_send_wc->status != IB_WC_SUCCESS &&
+ mad_send_wr->status == IB_WC_SUCCESS) {
+ mad_send_wr->status = mad_send_wc->status;
+ mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
+ }
+
+ if (--mad_send_wr->refcount > 0) {
+ if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
+ mad_send_wr->status == IB_WC_SUCCESS) {
+ wait_for_response(mad_send_wr);
+ }
+ goto done;
+ }
+
+ /* Remove send from MAD agent and notify client of completion */
+ list_del(&mad_send_wr->agent_list);
+ adjust_timeout(mad_agent_priv);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ if (mad_send_wr->status != IB_WC_SUCCESS )
+ mad_send_wc->status = mad_send_wr->status;
+ if (ret == IB_RMPP_RESULT_INTERNAL)
+ ib_rmpp_send_handler(mad_send_wc);
+ else
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ mad_send_wc);
+
+ /* Release reference on agent taken when sending */
+ deref_mad_agent(mad_agent_priv);
+ return;
+done:
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+}
+
+static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc)
+{
+ struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
+ struct ib_mad_list_head *mad_list;
+ struct ib_mad_qp_info *qp_info;
+ struct ib_mad_queue *send_queue;
+ struct ib_send_wr *bad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
+ unsigned long flags;
+ int ret;
+
+ mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
+ mad_list);
+ send_queue = mad_list->mad_queue;
+ qp_info = send_queue->qp_info;
+
+retry:
+ ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
+ mad_send_wr->header_mapping,
+ mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
+ ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
+ mad_send_wr->payload_mapping,
+ mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
+ queued_send_wr = NULL;
+ spin_lock_irqsave(&send_queue->lock, flags);
+ list_del(&mad_list->list);
+
+ /* Move queued send to the send queue */
+ if (send_queue->count-- > send_queue->max_active) {
+ mad_list = container_of(qp_info->overflow_list.next,
+ struct ib_mad_list_head, list);
+ queued_send_wr = container_of(mad_list,
+ struct ib_mad_send_wr_private,
+ mad_list);
+ list_move_tail(&mad_list->list, &send_queue->list);
+ }
+ spin_unlock_irqrestore(&send_queue->lock, flags);
+
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_send_wc.status = wc->status;
+ mad_send_wc.vendor_err = wc->vendor_err;
+ if (atomic_read(&qp_info->snoop_count))
+ snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
+ IB_MAD_SNOOP_SEND_COMPLETIONS);
+ ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+
+ if (queued_send_wr) {
+ ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
+ &bad_send_wr);
+ if (ret) {
+ printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
+ mad_send_wr = queued_send_wr;
+ wc->status = IB_WC_LOC_QP_OP_ERR;
+ goto retry;
+ }
+ }
+}
+
+static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_list_head *mad_list;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp_info->send_queue.lock, flags);
+ list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
+ mad_send_wr = container_of(mad_list,
+ struct ib_mad_send_wr_private,
+ mad_list);
+ mad_send_wr->retry = 1;
+ }
+ spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+}
+
+static void mad_error_handler(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc)
+{
+ struct ib_mad_list_head *mad_list;
+ struct ib_mad_qp_info *qp_info;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ int ret;
+
+ /* Determine if failure was a send or receive */
+ mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ qp_info = mad_list->mad_queue->qp_info;
+ if (mad_list->mad_queue == &qp_info->recv_queue)
+ /*
+ * Receive errors indicate that the QP has entered the error
+ * state - error handling/shutdown code will cleanup
+ */
+ return;
+
+ /*
+ * Send errors will transition the QP to SQE - move
+ * QP to RTS and repost flushed work requests
+ */
+ mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
+ mad_list);
+ if (wc->status == IB_WC_WR_FLUSH_ERR) {
+ if (mad_send_wr->retry) {
+ /* Repost send */
+ struct ib_send_wr *bad_send_wr;
+
+ mad_send_wr->retry = 0;
+ ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
+ &bad_send_wr);
+ if (ret)
+ ib_mad_send_done_handler(port_priv, wc);
+ } else
+ ib_mad_send_done_handler(port_priv, wc);
+ } else {
+ struct ib_qp_attr *attr;
+
+ /* Transition QP to RTS and fail offending send */
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (attr) {
+ attr->qp_state = IB_QPS_RTS;
+ attr->cur_qp_state = IB_QPS_SQE;
+ ret = ib_modify_qp(qp_info->qp, attr,
+ IB_QP_STATE | IB_QP_CUR_STATE);
+ kfree(attr);
+ if (ret)
+ printk(KERN_ERR PFX "mad_error_handler - "
+ "ib_modify_qp to RTS : %d\n", ret);
+ else
+ mark_sends_for_retry(qp_info);
+ }
+ ib_mad_send_done_handler(port_priv, wc);
+ }
+}
+
+/*
+ * IB MAD completion callback
+ */
+static void ib_mad_completion_handler(struct work_struct *work)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_wc wc;
+
+ port_priv = container_of(work, struct ib_mad_port_private, work);
+ ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+
+ while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
+ if (wc.status == IB_WC_SUCCESS) {
+ switch (wc.opcode) {
+ case IB_WC_SEND:
+ ib_mad_send_done_handler(port_priv, &wc);
+ break;
+ case IB_WC_RECV:
+ ib_mad_recv_done_handler(port_priv, &wc);
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ } else
+ mad_error_handler(port_priv, &wc);
+ }
+}
+
+static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
+{
+ unsigned long flags;
+ struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
+ struct list_head cancel_list;
+
+ INIT_LIST_HEAD(&cancel_list);
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
+ &mad_agent_priv->send_list, agent_list) {
+ if (mad_send_wr->status == IB_WC_SUCCESS) {
+ mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+ mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
+ }
+ }
+
+ /* Empty wait list to prevent receives from finding a request */
+ list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ /* Report all cancelled requests */
+ mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
+ mad_send_wc.vendor_err = 0;
+
+ list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
+ &cancel_list, agent_list) {
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ list_del(&mad_send_wr->agent_list);
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+ atomic_dec(&mad_agent_priv->refcount);
+ }
+}
+
+static struct ib_mad_send_wr_private*
+find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_buf *send_buf)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+
+ list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
+ agent_list) {
+ if (&mad_send_wr->send_buf == send_buf)
+ return mad_send_wr;
+ }
+
+ list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
+ agent_list) {
+ if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
+ &mad_send_wr->send_buf == send_buf)
+ return mad_send_wr;
+ }
+ return NULL;
+}
+
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ unsigned long flags;
+ int active;
+
+ mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
+ agent);
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
+ if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ return -EINVAL;
+ }
+
+ active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
+ if (!timeout_ms) {
+ mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+ mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
+ }
+
+ mad_send_wr->send_buf.timeout_ms = timeout_ms;
+ if (active)
+ mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
+ else
+ ib_reset_mad_timeout(mad_send_wr, timeout_ms);
+
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(ib_modify_mad);
+
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf)
+{
+ ib_modify_mad(mad_agent, send_buf, 0);
+}
+EXPORT_SYMBOL(ib_cancel_mad);
+
+static void local_completions(struct work_struct *work)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_local_private *local;
+ struct ib_mad_agent_private *recv_mad_agent;
+ unsigned long flags;
+ int free_mad;
+ struct ib_wc wc;
+ struct ib_mad_send_wc mad_send_wc;
+
+ mad_agent_priv =
+ container_of(work, struct ib_mad_agent_private, local_work);
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ while (!list_empty(&mad_agent_priv->local_list)) {
+ local = list_entry(mad_agent_priv->local_list.next,
+ struct ib_mad_local_private,
+ completion_list);
+ list_del(&local->completion_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ free_mad = 0;
+ if (local->mad_priv) {
+ recv_mad_agent = local->recv_mad_agent;
+ if (!recv_mad_agent) {
+ printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
+ free_mad = 1;
+ goto local_send_completion;
+ }
+
+ /*
+ * Defined behavior is to complete response
+ * before request
+ */
+ build_smp_wc(recv_mad_agent->agent.qp,
+ (unsigned long) local->mad_send_wr,
+ be16_to_cpu(IB_LID_PERMISSIVE),
+ 0, recv_mad_agent->agent.port_num, &wc);
+
+ local->mad_priv->header.recv_wc.wc = &wc;
+ local->mad_priv->header.recv_wc.mad_len =
+ sizeof(struct ib_mad);
+ INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
+ list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
+ &local->mad_priv->header.recv_wc.rmpp_list);
+ local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
+ local->mad_priv->header.recv_wc.recv_buf.mad =
+ &local->mad_priv->mad.mad;
+ if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
+ snoop_recv(recv_mad_agent->qp_info,
+ &local->mad_priv->header.recv_wc,
+ IB_MAD_SNOOP_RECVS);
+ recv_mad_agent->agent.recv_handler(
+ &recv_mad_agent->agent,
+ &local->mad_priv->header.recv_wc);
+ spin_lock_irqsave(&recv_mad_agent->lock, flags);
+ atomic_dec(&recv_mad_agent->refcount);
+ spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
+ }
+
+local_send_completion:
+ /* Complete send */
+ mad_send_wc.status = IB_WC_SUCCESS;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
+ if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
+ snoop_send(mad_agent_priv->qp_info,
+ &local->mad_send_wr->send_buf,
+ &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ atomic_dec(&mad_agent_priv->refcount);
+ if (free_mad)
+ kmem_cache_free(ib_mad_cache, local->mad_priv);
+ kfree(local);
+ }
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+}
+
+static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ int ret;
+
+ if (!mad_send_wr->retries_left)
+ return -ETIMEDOUT;
+
+ mad_send_wr->retries_left--;
+ mad_send_wr->send_buf.retries++;
+
+ mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+
+ if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
+ ret = ib_retry_rmpp(mad_send_wr);
+ switch (ret) {
+ case IB_RMPP_RESULT_UNHANDLED:
+ ret = ib_send_mad(mad_send_wr);
+ break;
+ case IB_RMPP_RESULT_CONSUMED:
+ ret = 0;
+ break;
+ default:
+ ret = -ECOMM;
+ break;
+ }
+ } else
+ ret = ib_send_mad(mad_send_wr);
+
+ if (!ret) {
+ mad_send_wr->refcount++;
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_send_wr->mad_agent_priv->send_list);
+ }
+ return ret;
+}
+
+static void timeout_sends(struct work_struct *work)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
+ unsigned long flags;
+
+ mad_agent_priv = container_of(work, struct ib_mad_agent_private,
+ timeout_work);
+ mad_send_wc.vendor_err = 0;
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ while (!list_empty(&mad_agent_priv->wait_list)) {
+ mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
+ struct ib_mad_send_wr_private,
+ agent_list);
+
+ if (time_after(mad_send_wr->timeout, jiffies)) {
+ mod_timer(&mad_agent_priv->timeout_timer,
+ mad_send_wr->timeout);
+ break;
+ }
+
+ list_del(&mad_send_wr->agent_list);
+ if (mad_send_wr->status == IB_WC_SUCCESS &&
+ !retry_send(mad_send_wr))
+ continue;
+
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ if (mad_send_wr->status == IB_WC_SUCCESS)
+ mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
+ else
+ mad_send_wc.status = mad_send_wr->status;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+
+ atomic_dec(&mad_agent_priv->refcount);
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ }
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+}
+
+static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
+{
+ struct ib_mad_port_private *port_priv = cq->cq_context;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ if (!list_empty(&port_priv->port_list))
+ queue_work(port_priv->wq, &port_priv->work);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+}
+
+/*
+ * Allocate receive MADs and post receive WRs for them
+ */
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_private *mad)
+{
+ unsigned long flags;
+ int post, ret;
+ struct ib_mad_private *mad_priv;
+ struct ib_sge sg_list;
+ struct ib_recv_wr recv_wr, *bad_recv_wr;
+ struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
+
+ /* Initialize common scatter list fields */
+ sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
+ sg_list.lkey = (*qp_info->port_priv->mr).lkey;
+
+ /* Initialize common receive WR fields */
+ recv_wr.next = NULL;
+ recv_wr.sg_list = &sg_list;
+ recv_wr.num_sge = 1;
+
+ do {
+ /* Allocate and map receive buffer */
+ if (mad) {
+ mad_priv = mad;
+ mad = NULL;
+ } else {
+ mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
+ if (!mad_priv) {
+ printk(KERN_ERR PFX "No memory for receive buffer\n");
+ ret = -ENOMEM;
+ break;
+ }
+ }
+ sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
+ &mad_priv->grh,
+ sizeof *mad_priv -
+ sizeof mad_priv->header,
+ DMA_FROM_DEVICE);
+ mad_priv->header.mapping = sg_list.addr;
+ recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
+ mad_priv->header.mad_list.mad_queue = recv_queue;
+
+ /* Post receive WR */
+ spin_lock_irqsave(&recv_queue->lock, flags);
+ post = (++recv_queue->count < recv_queue->max_active);
+ list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
+ spin_unlock_irqrestore(&recv_queue->lock, flags);
+ ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
+ if (ret) {
+ spin_lock_irqsave(&recv_queue->lock, flags);
+ list_del(&mad_priv->header.mad_list.list);
+ recv_queue->count--;
+ spin_unlock_irqrestore(&recv_queue->lock, flags);
+ ib_dma_unmap_single(qp_info->port_priv->device,
+ mad_priv->header.mapping,
+ sizeof *mad_priv -
+ sizeof mad_priv->header,
+ DMA_FROM_DEVICE);
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
+ break;
+ }
+ } while (post);
+
+ return ret;
+}
+
+/*
+ * Return all the posted receive MADs
+ */
+static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
+{
+ struct ib_mad_private_header *mad_priv_hdr;
+ struct ib_mad_private *recv;
+ struct ib_mad_list_head *mad_list;
+
+ if (!qp_info->qp)
+ return;
+
+ while (!list_empty(&qp_info->recv_queue.list)) {
+
+ mad_list = list_entry(qp_info->recv_queue.list.next,
+ struct ib_mad_list_head, list);
+ mad_priv_hdr = container_of(mad_list,
+ struct ib_mad_private_header,
+ mad_list);
+ recv = container_of(mad_priv_hdr, struct ib_mad_private,
+ header);
+
+ /* Remove from posted receive MAD list */
+ list_del(&mad_list->list);
+
+ ib_dma_unmap_single(qp_info->port_priv->device,
+ recv->header.mapping,
+ sizeof(struct ib_mad_private) -
+ sizeof(struct ib_mad_private_header),
+ DMA_FROM_DEVICE);
+ kmem_cache_free(ib_mad_cache, recv);
+ }
+
+ qp_info->recv_queue.count = 0;
+}
+
+/*
+ * Start the port
+ */
+static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
+{
+ int ret, i;
+ struct ib_qp_attr *attr;
+ struct ib_qp *qp;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr) {
+ printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < IB_MAD_QPS_CORE; i++) {
+ qp = port_priv->qp_info[i].qp;
+ if (!qp)
+ continue;
+
+ /*
+ * PKey index for QP1 is irrelevant but
+ * one is needed for the Reset to Init transition
+ */
+ attr->qp_state = IB_QPS_INIT;
+ attr->pkey_index = 0;
+ attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
+ ret = ib_modify_qp(qp, attr, IB_QP_STATE |
+ IB_QP_PKEY_INDEX | IB_QP_QKEY);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't change QP%d state to "
+ "INIT: %d\n", i, ret);
+ goto out;
+ }
+
+ attr->qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(qp, attr, IB_QP_STATE);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't change QP%d state to "
+ "RTR: %d\n", i, ret);
+ goto out;
+ }
+
+ attr->qp_state = IB_QPS_RTS;
+ attr->sq_psn = IB_MAD_SEND_Q_PSN;
+ ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't change QP%d state to "
+ "RTS: %d\n", i, ret);
+ goto out;
+ }
+ }
+
+ ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+ if (ret) {
+ printk(KERN_ERR PFX "Failed to request completion "
+ "notification: %d\n", ret);
+ goto out;
+ }
+
+ for (i = 0; i < IB_MAD_QPS_CORE; i++) {
+ if (!port_priv->qp_info[i].qp)
+ continue;
+
+ ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't post receive WRs\n");
+ goto out;
+ }
+ }
+out:
+ kfree(attr);
+ return ret;
+}
+
+static void qp_event_handler(struct ib_event *event, void *qp_context)
+{
+ struct ib_mad_qp_info *qp_info = qp_context;
+
+ /* It's worse than that! He's dead, Jim! */
+ printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
+ event->event, qp_info->qp->qp_num);
+}
+
+static void init_mad_queue(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_queue *mad_queue)
+{
+ mad_queue->qp_info = qp_info;
+ mad_queue->count = 0;
+ spin_lock_init(&mad_queue->lock);
+ INIT_LIST_HEAD(&mad_queue->list);
+}
+
+static void init_mad_qp(struct ib_mad_port_private *port_priv,
+ struct ib_mad_qp_info *qp_info)
+{
+ qp_info->port_priv = port_priv;
+ init_mad_queue(qp_info, &qp_info->send_queue);
+ init_mad_queue(qp_info, &qp_info->recv_queue);
+ INIT_LIST_HEAD(&qp_info->overflow_list);
+ spin_lock_init(&qp_info->snoop_lock);
+ qp_info->snoop_table = NULL;
+ qp_info->snoop_table_size = 0;
+ atomic_set(&qp_info->snoop_count, 0);
+}
+
+static int create_mad_qp(struct ib_mad_qp_info *qp_info,
+ enum ib_qp_type qp_type)
+{
+ struct ib_qp_init_attr qp_init_attr;
+ int ret;
+
+ memset(&qp_init_attr, 0, sizeof qp_init_attr);
+ qp_init_attr.send_cq = qp_info->port_priv->cq;
+ qp_init_attr.recv_cq = qp_info->port_priv->cq;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.cap.max_send_wr = mad_sendq_size;
+ qp_init_attr.cap.max_recv_wr = mad_recvq_size;
+ qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
+ qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
+ qp_init_attr.qp_type = qp_type;
+ qp_init_attr.port_num = qp_info->port_priv->port_num;
+ qp_init_attr.qp_context = qp_info;
+ qp_init_attr.event_handler = qp_event_handler;
+ qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
+ if (IS_ERR(qp_info->qp)) {
+ printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
+ get_spl_qp_index(qp_type));
+ ret = PTR_ERR(qp_info->qp);
+ goto error;
+ }
+ /* Use minimum queue sizes unless the CQ is resized */
+ qp_info->send_queue.max_active = mad_sendq_size;
+ qp_info->recv_queue.max_active = mad_recvq_size;
+ return 0;
+
+error:
+ return ret;
+}
+
+static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
+{
+ if (!qp_info->qp)
+ return;
+
+ ib_destroy_qp(qp_info->qp);
+ kfree(qp_info->snoop_table);
+}
+
+/*
+ * Open the port
+ * Create the QP, PD, MR, and CQ if needed
+ */
+static int ib_mad_port_open(struct ib_device *device,
+ int port_num)
+{
+ int ret, cq_size;
+ struct ib_mad_port_private *port_priv;
+ unsigned long flags;
+ char name[sizeof "ib_mad123"];
+ int has_smi;
+
+ /* Create new device info */
+ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
+ if (!port_priv) {
+ printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
+ return -ENOMEM;
+ }
+
+ port_priv->device = device;
+ port_priv->port_num = port_num;
+ spin_lock_init(&port_priv->reg_lock);
+ INIT_LIST_HEAD(&port_priv->agent_list);
+ init_mad_qp(port_priv, &port_priv->qp_info[0]);
+ init_mad_qp(port_priv, &port_priv->qp_info[1]);
+
+ cq_size = mad_sendq_size + mad_recvq_size;
+ has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND;
+ if (has_smi)
+ cq_size *= 2;
+
+ port_priv->cq = ib_create_cq(port_priv->device,
+ ib_mad_thread_completion_handler,
+ NULL, port_priv, cq_size, 0);
+ if (IS_ERR(port_priv->cq)) {
+ printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
+ ret = PTR_ERR(port_priv->cq);
+ goto error3;
+ }
+
+ port_priv->pd = ib_alloc_pd(device);
+ if (IS_ERR(port_priv->pd)) {
+ printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
+ ret = PTR_ERR(port_priv->pd);
+ goto error4;
+ }
+
+ port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(port_priv->mr)) {
+ printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
+ ret = PTR_ERR(port_priv->mr);
+ goto error5;
+ }
+
+ if (has_smi) {
+ ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
+ if (ret)
+ goto error6;
+ }
+ ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
+ if (ret)
+ goto error7;
+
+ snprintf(name, sizeof name, "ib_mad%d", port_num);
+ port_priv->wq = create_singlethread_workqueue(name);
+ if (!port_priv->wq) {
+ ret = -ENOMEM;
+ goto error8;
+ }
+ INIT_WORK(&port_priv->work, ib_mad_completion_handler);
+
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ list_add_tail(&port_priv->port_list, &ib_mad_port_list);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
+ ret = ib_mad_port_start(port_priv);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't start port\n");
+ goto error9;
+ }
+
+ return 0;
+
+error9:
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ list_del_init(&port_priv->port_list);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
+ destroy_workqueue(port_priv->wq);
+error8:
+ destroy_mad_qp(&port_priv->qp_info[1]);
+error7:
+ destroy_mad_qp(&port_priv->qp_info[0]);
+error6:
+ ib_dereg_mr(port_priv->mr);
+error5:
+ ib_dealloc_pd(port_priv->pd);
+error4:
+ ib_destroy_cq(port_priv->cq);
+ cleanup_recv_queue(&port_priv->qp_info[1]);
+ cleanup_recv_queue(&port_priv->qp_info[0]);
+error3:
+ kfree(port_priv);
+
+ return ret;
+}
+
+/*
+ * Close the port
+ * If there are no classes using the port, free the port
+ * resources (CQ, MR, PD, QP) and remove the port's info structure
+ */
+static int ib_mad_port_close(struct ib_device *device, int port_num)
+{
+ struct ib_mad_port_private *port_priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ port_priv = __ib_get_mad_port(device, port_num);
+ if (port_priv == NULL) {
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+ printk(KERN_ERR PFX "Port %d not found\n", port_num);
+ return -ENODEV;
+ }
+ list_del_init(&port_priv->port_list);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
+ destroy_workqueue(port_priv->wq);
+ destroy_mad_qp(&port_priv->qp_info[1]);
+ destroy_mad_qp(&port_priv->qp_info[0]);
+ ib_dereg_mr(port_priv->mr);
+ ib_dealloc_pd(port_priv->pd);
+ ib_destroy_cq(port_priv->cq);
+ cleanup_recv_queue(&port_priv->qp_info[1]);
+ cleanup_recv_queue(&port_priv->qp_info[0]);
+ /* XXX: Handle deallocation of MAD registration tables */
+
+ kfree(port_priv);
+
+ return 0;
+}
+
+static void ib_mad_init_device(struct ib_device *device)
+{
+ int start, end, i;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH) {
+ start = 0;
+ end = 0;
+ } else {
+ start = 1;
+ end = device->phys_port_cnt;
+ }
+
+ for (i = start; i <= end; i++) {
+ if (ib_mad_port_open(device, i)) {
+ printk(KERN_ERR PFX "Couldn't open %s port %d\n",
+ device->name, i);
+ goto error;
+ }
+ if (ib_agent_port_open(device, i)) {
+ printk(KERN_ERR PFX "Couldn't open %s port %d "
+ "for agents\n",
+ device->name, i);
+ goto error_agent;
+ }
+ }
+ return;
+
+error_agent:
+ if (ib_mad_port_close(device, i))
+ printk(KERN_ERR PFX "Couldn't close %s port %d\n",
+ device->name, i);
+
+error:
+ i--;
+
+ while (i >= start) {
+ if (ib_agent_port_close(device, i))
+ printk(KERN_ERR PFX "Couldn't close %s port %d "
+ "for agents\n",
+ device->name, i);
+ if (ib_mad_port_close(device, i))
+ printk(KERN_ERR PFX "Couldn't close %s port %d\n",
+ device->name, i);
+ i--;
+ }
+}
+
+static void ib_mad_remove_device(struct ib_device *device)
+{
+ int i, num_ports, cur_port;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH) {
+ num_ports = 1;
+ cur_port = 0;
+ } else {
+ num_ports = device->phys_port_cnt;
+ cur_port = 1;
+ }
+ for (i = 0; i < num_ports; i++, cur_port++) {
+ if (ib_agent_port_close(device, cur_port))
+ printk(KERN_ERR PFX "Couldn't close %s port %d "
+ "for agents\n",
+ device->name, cur_port);
+ if (ib_mad_port_close(device, cur_port))
+ printk(KERN_ERR PFX "Couldn't close %s port %d\n",
+ device->name, cur_port);
+ }
+}
+
+static struct ib_client mad_client = {
+ .name = "mad",
+ .add = ib_mad_init_device,
+ .remove = ib_mad_remove_device
+};
+
+static int __init ib_mad_init_module(void)
+{
+ int ret;
+
+ mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
+ mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
+
+ mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
+ mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
+
+ spin_lock_init(&ib_mad_port_list_lock);
+
+ ib_mad_cache = kmem_cache_create("ib_mad",
+ sizeof(struct ib_mad_private),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!ib_mad_cache) {
+ printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
+ ret = -ENOMEM;
+ goto error1;
+ }
+
+ INIT_LIST_HEAD(&ib_mad_port_list);
+
+ if (ib_register_client(&mad_client)) {
+ printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
+ ret = -EINVAL;
+ goto error2;
+ }
+
+ return 0;
+
+error2:
+ kmem_cache_destroy(ib_mad_cache);
+error1:
+ return ret;
+}
+
+static void __exit ib_mad_cleanup_module(void)
+{
+ ib_unregister_client(&mad_client);
+ kmem_cache_destroy(ib_mad_cache);
+}
+
+module_init(ib_mad_init_module);
+module_exit(ib_mad_cleanup_module);
+
Property changes on: trunk/sys/ofed/drivers/infiniband/core/mad.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/mad_priv.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/mad_priv.h (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/mad_priv.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __IB_MAD_PRIV_H__
+#define __IB_MAD_PRIV_H__
+
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <linux/workqueue.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+
+
+#define PFX "ib_mad: "
+
+#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
+
+/* QP and CQ parameters */
+#define IB_MAD_QP_SEND_SIZE 128
+#define IB_MAD_QP_RECV_SIZE 512
+#define IB_MAD_QP_MIN_SIZE 64
+#define IB_MAD_QP_MAX_SIZE 8192
+#define IB_MAD_SEND_REQ_MAX_SG 2
+#define IB_MAD_RECV_REQ_MAX_SG 1
+
+#define IB_MAD_SEND_Q_PSN 0
+
+/* Registration table sizes */
+#define MAX_MGMT_CLASS 80
+#define MAX_MGMT_VERSION 8
+#define MAX_MGMT_OUI 8
+#define MAX_MGMT_VENDOR_RANGE2 (IB_MGMT_CLASS_VENDOR_RANGE2_END - \
+ IB_MGMT_CLASS_VENDOR_RANGE2_START + 1)
+
+struct ib_mad_list_head {
+ struct list_head list;
+ struct ib_mad_queue *mad_queue;
+};
+
+struct ib_mad_private_header {
+ struct ib_mad_list_head mad_list;
+ struct ib_mad_recv_wc recv_wc;
+ struct ib_wc wc;
+ u64 mapping;
+} __attribute__ ((packed));
+
+struct ib_mad_private {
+ struct ib_mad_private_header header;
+ struct ib_grh grh;
+ union {
+ struct ib_mad mad;
+ struct ib_rmpp_mad rmpp_mad;
+ struct ib_smp smp;
+ } mad;
+} __attribute__ ((packed));
+
+struct ib_rmpp_segment {
+ struct list_head list;
+ u32 num;
+ u8 data[0];
+};
+
+struct ib_mad_agent_private {
+ struct list_head agent_list;
+ struct ib_mad_agent agent;
+ struct ib_mad_reg_req *reg_req;
+ struct ib_mad_qp_info *qp_info;
+
+ spinlock_t lock;
+ struct list_head send_list;
+ struct list_head wait_list;
+ struct list_head done_list;
+ struct work_struct timeout_work;
+ struct timer_list timeout_timer;
+ unsigned long timeout;
+ struct list_head local_list;
+ struct work_struct local_work;
+ struct list_head rmpp_list;
+
+ atomic_t refcount;
+ struct completion comp;
+};
+
+struct ib_mad_snoop_private {
+ struct ib_mad_agent agent;
+ struct ib_mad_qp_info *qp_info;
+ int snoop_index;
+ int mad_snoop_flags;
+ atomic_t refcount;
+ struct completion comp;
+};
+
+struct ib_mad_send_wr_private {
+ struct ib_mad_list_head mad_list;
+ struct list_head agent_list;
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_buf send_buf;
+ u64 header_mapping;
+ u64 payload_mapping;
+ struct ib_send_wr send_wr;
+ struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
+ __be64 tid;
+ unsigned long timeout;
+ int max_retries;
+ int retries_left;
+ int retry;
+ int refcount;
+ enum ib_wc_status status;
+
+ /* RMPP control */
+ struct list_head rmpp_list;
+ struct ib_rmpp_segment *last_ack_seg;
+ struct ib_rmpp_segment *cur_seg;
+ int last_ack;
+ int seg_num;
+ int newwin;
+ int pad;
+};
+
+struct ib_mad_local_private {
+ struct list_head completion_list;
+ struct ib_mad_private *mad_priv;
+ struct ib_mad_agent_private *recv_mad_agent;
+ struct ib_mad_send_wr_private *mad_send_wr;
+};
+
+struct ib_mad_mgmt_method_table {
+ struct ib_mad_agent_private *agent[IB_MGMT_MAX_METHODS];
+};
+
+struct ib_mad_mgmt_class_table {
+ struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS];
+};
+
+struct ib_mad_mgmt_vendor_class {
+ u8 oui[MAX_MGMT_OUI][3];
+ struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_OUI];
+};
+
+struct ib_mad_mgmt_vendor_class_table {
+ struct ib_mad_mgmt_vendor_class *vendor_class[MAX_MGMT_VENDOR_RANGE2];
+};
+
+struct ib_mad_mgmt_version_table {
+ struct ib_mad_mgmt_class_table *class;
+ struct ib_mad_mgmt_vendor_class_table *vendor;
+};
+
+struct ib_mad_queue {
+ spinlock_t lock;
+ struct list_head list;
+ int count;
+ int max_active;
+ struct ib_mad_qp_info *qp_info;
+};
+
+struct ib_mad_qp_info {
+ struct ib_mad_port_private *port_priv;
+ struct ib_qp *qp;
+ struct ib_mad_queue send_queue;
+ struct ib_mad_queue recv_queue;
+ struct list_head overflow_list;
+ spinlock_t snoop_lock;
+ struct ib_mad_snoop_private **snoop_table;
+ int snoop_table_size;
+ atomic_t snoop_count;
+};
+
+struct ib_mad_port_private {
+ struct list_head port_list;
+ struct ib_device *device;
+ int port_num;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+
+ spinlock_t reg_lock;
+ struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
+ struct list_head agent_list;
+ struct workqueue_struct *wq;
+ struct work_struct work;
+ struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
+};
+
+int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
+
+struct ib_mad_send_wr_private *
+ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_recv_wc *mad_recv_wc);
+
+void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_send_wc *mad_send_wc);
+
+void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
+
+void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
+ int timeout_ms);
+
+#endif /* __IB_MAD_PRIV_H__ */
Property changes on: trunk/sys/ofed/drivers/infiniband/core/mad_priv.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,951 @@
+/*
+ * Copyright (c) 2005 Intel Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mad_priv.h"
+#include "mad_rmpp.h"
+
+enum rmpp_state {
+ RMPP_STATE_ACTIVE,
+ RMPP_STATE_TIMEOUT,
+ RMPP_STATE_COMPLETE,
+ RMPP_STATE_CANCELING
+};
+
+struct mad_rmpp_recv {
+ struct ib_mad_agent_private *agent;
+ struct list_head list;
+ struct delayed_work timeout_work;
+ struct delayed_work cleanup_work;
+ struct completion comp;
+ enum rmpp_state state;
+ spinlock_t lock;
+ atomic_t refcount;
+
+ struct ib_ah *ah;
+ struct ib_mad_recv_wc *rmpp_wc;
+ struct ib_mad_recv_buf *cur_seg_buf;
+ int last_ack;
+ int seg_num;
+ int newwin;
+ int repwin;
+
+ __be64 tid;
+ u32 src_qp;
+ u16 slid;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+};
+
+static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
+{
+ if (atomic_dec_and_test(&rmpp_recv->refcount))
+ complete(&rmpp_recv->comp);
+}
+
+static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
+{
+ deref_rmpp_recv(rmpp_recv);
+ wait_for_completion(&rmpp_recv->comp);
+ ib_destroy_ah(rmpp_recv->ah);
+ kfree(rmpp_recv);
+}
+
+void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
+{
+ struct mad_rmpp_recv *rmpp_recv, *temp_rmpp_recv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&agent->lock, flags);
+ list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+ if (rmpp_recv->state != RMPP_STATE_COMPLETE)
+ ib_free_recv_mad(rmpp_recv->rmpp_wc);
+ rmpp_recv->state = RMPP_STATE_CANCELING;
+ }
+ spin_unlock_irqrestore(&agent->lock, flags);
+
+ list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+ cancel_delayed_work(&rmpp_recv->timeout_work);
+ cancel_delayed_work(&rmpp_recv->cleanup_work);
+ }
+
+ flush_workqueue(agent->qp_info->port_priv->wq);
+
+ list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
+ &agent->rmpp_list, list) {
+ list_del(&rmpp_recv->list);
+ destroy_rmpp_recv(rmpp_recv);
+ }
+}
+
+static void format_ack(struct ib_mad_send_buf *msg,
+ struct ib_rmpp_mad *data,
+ struct mad_rmpp_recv *rmpp_recv)
+{
+ struct ib_rmpp_mad *ack = msg->mad;
+ unsigned long flags;
+
+ memcpy(ack, &data->mad_hdr, msg->hdr_len);
+
+ ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+ ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
+ ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+
+ spin_lock_irqsave(&rmpp_recv->lock, flags);
+ rmpp_recv->last_ack = rmpp_recv->seg_num;
+ ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num);
+ ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin);
+ spin_unlock_irqrestore(&rmpp_recv->lock, flags);
+}
+
+static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
+ struct ib_mad_recv_wc *recv_wc)
+{
+ struct ib_mad_send_buf *msg;
+ int ret, hdr_len;
+
+ hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
+ msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
+ recv_wc->wc->pkey_index, 1, hdr_len,
+ 0, GFP_KERNEL);
+ if (IS_ERR(msg))
+ return;
+
+ format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
+ msg->ah = rmpp_recv->ah;
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ ib_free_send_mad(msg);
+}
+
+static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
+ struct ib_mad_recv_wc *recv_wc)
+{
+ struct ib_mad_send_buf *msg;
+ struct ib_ah *ah;
+ int hdr_len;
+
+ ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
+ recv_wc->recv_buf.grh, agent->port_num);
+ if (IS_ERR(ah))
+ return (void *) ah;
+
+ hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
+ msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
+ recv_wc->wc->pkey_index, 1,
+ hdr_len, 0, GFP_KERNEL);
+ if (IS_ERR(msg))
+ ib_destroy_ah(ah);
+ else {
+ msg->ah = ah;
+ msg->context[0] = ah;
+ }
+
+ return msg;
+}
+
+static void ack_ds_ack(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *recv_wc)
+{
+ struct ib_mad_send_buf *msg;
+ struct ib_rmpp_mad *rmpp_mad;
+ int ret;
+
+ msg = alloc_response_msg(&agent->agent, recv_wc);
+ if (IS_ERR(msg))
+ return;
+
+ rmpp_mad = msg->mad;
+ memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
+
+ rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+ ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+ rmpp_mad->rmpp_hdr.seg_num = 0;
+ rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ ib_destroy_ah(msg->ah);
+ ib_free_send_mad(msg);
+ }
+}
+
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
+{
+ if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
+ ib_destroy_ah(mad_send_wc->send_buf->ah);
+ ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+static void nack_recv(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *recv_wc, u8 rmpp_status)
+{
+ struct ib_mad_send_buf *msg;
+ struct ib_rmpp_mad *rmpp_mad;
+ int ret;
+
+ msg = alloc_response_msg(&agent->agent, recv_wc);
+ if (IS_ERR(msg))
+ return;
+
+ rmpp_mad = msg->mad;
+ memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
+
+ rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+ rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION;
+ rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ABORT;
+ ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+ rmpp_mad->rmpp_hdr.rmpp_status = rmpp_status;
+ rmpp_mad->rmpp_hdr.seg_num = 0;
+ rmpp_mad->rmpp_hdr.paylen_newwin = 0;
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ ib_destroy_ah(msg->ah);
+ ib_free_send_mad(msg);
+ }
+}
+
+static void recv_timeout_handler(struct work_struct *work)
+{
+ struct mad_rmpp_recv *rmpp_recv =
+ container_of(work, struct mad_rmpp_recv, timeout_work.work);
+ struct ib_mad_recv_wc *rmpp_wc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
+ if (rmpp_recv->state != RMPP_STATE_ACTIVE) {
+ spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
+ return;
+ }
+ rmpp_recv->state = RMPP_STATE_TIMEOUT;
+ list_del(&rmpp_recv->list);
+ spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
+
+ rmpp_wc = rmpp_recv->rmpp_wc;
+ nack_recv(rmpp_recv->agent, rmpp_wc, IB_MGMT_RMPP_STATUS_T2L);
+ destroy_rmpp_recv(rmpp_recv);
+ ib_free_recv_mad(rmpp_wc);
+}
+
+static void recv_cleanup_handler(struct work_struct *work)
+{
+ struct mad_rmpp_recv *rmpp_recv =
+ container_of(work, struct mad_rmpp_recv, cleanup_work.work);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
+ if (rmpp_recv->state == RMPP_STATE_CANCELING) {
+ spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
+ return;
+ }
+ list_del(&rmpp_recv->list);
+ spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
+ destroy_rmpp_recv(rmpp_recv);
+}
+
+static struct mad_rmpp_recv *
+create_rmpp_recv(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct mad_rmpp_recv *rmpp_recv;
+ struct ib_mad_hdr *mad_hdr;
+
+ rmpp_recv = kmalloc(sizeof *rmpp_recv, GFP_KERNEL);
+ if (!rmpp_recv)
+ return NULL;
+
+ rmpp_recv->ah = ib_create_ah_from_wc(agent->agent.qp->pd,
+ mad_recv_wc->wc,
+ mad_recv_wc->recv_buf.grh,
+ agent->agent.port_num);
+ if (IS_ERR(rmpp_recv->ah))
+ goto error;
+
+ rmpp_recv->agent = agent;
+ init_completion(&rmpp_recv->comp);
+ INIT_DELAYED_WORK(&rmpp_recv->timeout_work, recv_timeout_handler);
+ INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler);
+ spin_lock_init(&rmpp_recv->lock);
+ rmpp_recv->state = RMPP_STATE_ACTIVE;
+ atomic_set(&rmpp_recv->refcount, 1);
+
+ rmpp_recv->rmpp_wc = mad_recv_wc;
+ rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf;
+ rmpp_recv->newwin = 1;
+ rmpp_recv->seg_num = 1;
+ rmpp_recv->last_ack = 0;
+ rmpp_recv->repwin = 1;
+
+ mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
+ rmpp_recv->tid = mad_hdr->tid;
+ rmpp_recv->src_qp = mad_recv_wc->wc->src_qp;
+ rmpp_recv->slid = mad_recv_wc->wc->slid;
+ rmpp_recv->mgmt_class = mad_hdr->mgmt_class;
+ rmpp_recv->class_version = mad_hdr->class_version;
+ rmpp_recv->method = mad_hdr->method;
+ return rmpp_recv;
+
+error: kfree(rmpp_recv);
+ return NULL;
+}
+
+static struct mad_rmpp_recv *
+find_rmpp_recv(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct mad_rmpp_recv *rmpp_recv;
+ struct ib_mad_hdr *mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
+
+ list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+ if (rmpp_recv->tid == mad_hdr->tid &&
+ rmpp_recv->src_qp == mad_recv_wc->wc->src_qp &&
+ rmpp_recv->slid == mad_recv_wc->wc->slid &&
+ rmpp_recv->mgmt_class == mad_hdr->mgmt_class &&
+ rmpp_recv->class_version == mad_hdr->class_version &&
+ rmpp_recv->method == mad_hdr->method)
+ return rmpp_recv;
+ }
+ return NULL;
+}
+
+static struct mad_rmpp_recv *
+acquire_rmpp_recv(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct mad_rmpp_recv *rmpp_recv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&agent->lock, flags);
+ rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
+ if (rmpp_recv)
+ atomic_inc(&rmpp_recv->refcount);
+ spin_unlock_irqrestore(&agent->lock, flags);
+ return rmpp_recv;
+}
+
+static struct mad_rmpp_recv *
+insert_rmpp_recv(struct ib_mad_agent_private *agent,
+ struct mad_rmpp_recv *rmpp_recv)
+{
+ struct mad_rmpp_recv *cur_rmpp_recv;
+
+ cur_rmpp_recv = find_rmpp_recv(agent, rmpp_recv->rmpp_wc);
+ if (!cur_rmpp_recv)
+ list_add_tail(&rmpp_recv->list, &agent->rmpp_list);
+
+ return cur_rmpp_recv;
+}
+
+static inline int get_last_flag(struct ib_mad_recv_buf *seg)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+
+ rmpp_mad = (struct ib_rmpp_mad *) seg->mad;
+ return ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_LAST;
+}
+
+static inline int get_seg_num(struct ib_mad_recv_buf *seg)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+
+ rmpp_mad = (struct ib_rmpp_mad *) seg->mad;
+ return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
+}
+
+static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list,
+ struct ib_mad_recv_buf *seg)
+{
+ if (seg->list.next == rmpp_list)
+ return NULL;
+
+ return container_of(seg->list.next, struct ib_mad_recv_buf, list);
+}
+
+static inline int window_size(struct ib_mad_agent_private *agent)
+{
+ return max(agent->qp_info->recv_queue.max_active >> 3, 1);
+}
+
+static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
+ int seg_num)
+{
+ struct ib_mad_recv_buf *seg_buf;
+ int cur_seg_num;
+
+ list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
+ cur_seg_num = get_seg_num(seg_buf);
+ if (seg_num > cur_seg_num)
+ return seg_buf;
+ if (seg_num == cur_seg_num)
+ break;
+ }
+ return NULL;
+}
+
+static void update_seg_num(struct mad_rmpp_recv *rmpp_recv,
+ struct ib_mad_recv_buf *new_buf)
+{
+ struct list_head *rmpp_list = &rmpp_recv->rmpp_wc->rmpp_list;
+
+ while (new_buf && (get_seg_num(new_buf) == rmpp_recv->seg_num + 1)) {
+ rmpp_recv->cur_seg_buf = new_buf;
+ rmpp_recv->seg_num++;
+ new_buf = get_next_seg(rmpp_list, new_buf);
+ }
+}
+
+static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+ int hdr_size, data_size, pad;
+
+ rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad;
+
+ hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
+ data_size = sizeof(struct ib_rmpp_mad) - hdr_size;
+ pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
+ if (pad > IB_MGMT_RMPP_DATA || pad < 0)
+ pad = 0;
+
+ return hdr_size + rmpp_recv->seg_num * data_size - pad;
+}
+
+static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
+{
+ struct ib_mad_recv_wc *rmpp_wc;
+
+ ack_recv(rmpp_recv, rmpp_recv->rmpp_wc);
+ if (rmpp_recv->seg_num > 1)
+ cancel_delayed_work(&rmpp_recv->timeout_work);
+
+ rmpp_wc = rmpp_recv->rmpp_wc;
+ rmpp_wc->mad_len = get_mad_len(rmpp_recv);
+ /* 10 seconds until we can find the packet lifetime */
+ queue_delayed_work(rmpp_recv->agent->qp_info->port_priv->wq,
+ &rmpp_recv->cleanup_work, msecs_to_jiffies(10000));
+ return rmpp_wc;
+}
+
+static struct ib_mad_recv_wc *
+continue_rmpp(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct mad_rmpp_recv *rmpp_recv;
+ struct ib_mad_recv_buf *prev_buf;
+ struct ib_mad_recv_wc *done_wc;
+ int seg_num;
+ unsigned long flags;
+
+ rmpp_recv = acquire_rmpp_recv(agent, mad_recv_wc);
+ if (!rmpp_recv)
+ goto drop1;
+
+ seg_num = get_seg_num(&mad_recv_wc->recv_buf);
+
+ spin_lock_irqsave(&rmpp_recv->lock, flags);
+ if ((rmpp_recv->state == RMPP_STATE_TIMEOUT) ||
+ (seg_num > rmpp_recv->newwin))
+ goto drop3;
+
+ if ((seg_num <= rmpp_recv->last_ack) ||
+ (rmpp_recv->state == RMPP_STATE_COMPLETE)) {
+ spin_unlock_irqrestore(&rmpp_recv->lock, flags);
+ ack_recv(rmpp_recv, mad_recv_wc);
+ goto drop2;
+ }
+
+ prev_buf = find_seg_location(&rmpp_recv->rmpp_wc->rmpp_list, seg_num);
+ if (!prev_buf)
+ goto drop3;
+
+ done_wc = NULL;
+ list_add(&mad_recv_wc->recv_buf.list, &prev_buf->list);
+ if (rmpp_recv->cur_seg_buf == prev_buf) {
+ update_seg_num(rmpp_recv, &mad_recv_wc->recv_buf);
+ if (get_last_flag(rmpp_recv->cur_seg_buf)) {
+ rmpp_recv->state = RMPP_STATE_COMPLETE;
+ spin_unlock_irqrestore(&rmpp_recv->lock, flags);
+ done_wc = complete_rmpp(rmpp_recv);
+ goto out;
+ } else if (rmpp_recv->seg_num == rmpp_recv->newwin) {
+ rmpp_recv->newwin += window_size(agent);
+ spin_unlock_irqrestore(&rmpp_recv->lock, flags);
+ ack_recv(rmpp_recv, mad_recv_wc);
+ goto out;
+ }
+ }
+ spin_unlock_irqrestore(&rmpp_recv->lock, flags);
+out:
+ deref_rmpp_recv(rmpp_recv);
+ return done_wc;
+
+drop3: spin_unlock_irqrestore(&rmpp_recv->lock, flags);
+drop2: deref_rmpp_recv(rmpp_recv);
+drop1: ib_free_recv_mad(mad_recv_wc);
+ return NULL;
+}
+
+static struct ib_mad_recv_wc *
+start_rmpp(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct mad_rmpp_recv *rmpp_recv;
+ unsigned long flags;
+
+ rmpp_recv = create_rmpp_recv(agent, mad_recv_wc);
+ if (!rmpp_recv) {
+ ib_free_recv_mad(mad_recv_wc);
+ return NULL;
+ }
+
+ spin_lock_irqsave(&agent->lock, flags);
+ if (insert_rmpp_recv(agent, rmpp_recv)) {
+ spin_unlock_irqrestore(&agent->lock, flags);
+ /* duplicate first MAD */
+ destroy_rmpp_recv(rmpp_recv);
+ return continue_rmpp(agent, mad_recv_wc);
+ }
+ atomic_inc(&rmpp_recv->refcount);
+
+ if (get_last_flag(&mad_recv_wc->recv_buf)) {
+ rmpp_recv->state = RMPP_STATE_COMPLETE;
+ spin_unlock_irqrestore(&agent->lock, flags);
+ complete_rmpp(rmpp_recv);
+ } else {
+ spin_unlock_irqrestore(&agent->lock, flags);
+ /* 40 seconds until we can find the packet lifetimes */
+ queue_delayed_work(agent->qp_info->port_priv->wq,
+ &rmpp_recv->timeout_work,
+ msecs_to_jiffies(40000));
+ rmpp_recv->newwin += window_size(agent);
+ ack_recv(rmpp_recv, mad_recv_wc);
+ mad_recv_wc = NULL;
+ }
+ deref_rmpp_recv(rmpp_recv);
+ return mad_recv_wc;
+}
+
+static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+ int timeout;
+ u32 paylen = 0;
+
+ rmpp_mad = mad_send_wr->send_buf.mad;
+ ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+ rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num);
+
+ if (mad_send_wr->seg_num == 1) {
+ rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
+ paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA -
+ mad_send_wr->pad;
+ }
+
+ if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) {
+ rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
+ paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad;
+ }
+ rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
+
+ /* 2 seconds for an ACK until we can find the packet lifetime */
+ timeout = mad_send_wr->send_buf.timeout_ms;
+ if (!timeout || timeout > 2000)
+ mad_send_wr->timeout = msecs_to_jiffies(2000);
+
+ return ib_send_mad(mad_send_wr);
+}
+
+static void abort_send(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_send_wc wc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&agent->lock, flags);
+ mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
+ if (!mad_send_wr)
+ goto out; /* Unmatched send */
+
+ if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
+ (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
+ goto out; /* Send is already done */
+
+ ib_mark_mad_done(mad_send_wr);
+ spin_unlock_irqrestore(&agent->lock, flags);
+
+ wc.status = IB_WC_REM_ABORT_ERR;
+ wc.vendor_err = rmpp_status;
+ wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &wc);
+ return;
+out:
+ spin_unlock_irqrestore(&agent->lock, flags);
+}
+
+static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
+ int seg_num)
+{
+ struct list_head *list;
+
+ wr->last_ack = seg_num;
+ list = &wr->last_ack_seg->list;
+ list_for_each_entry(wr->last_ack_seg, list, list)
+ if (wr->last_ack_seg->num == seg_num)
+ break;
+}
+
+static void process_ds_ack(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc, int newwin)
+{
+ struct mad_rmpp_recv *rmpp_recv;
+
+ rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
+ if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
+ rmpp_recv->repwin = newwin;
+}
+
+static void process_rmpp_ack(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_rmpp_mad *rmpp_mad;
+ unsigned long flags;
+ int seg_num, newwin, ret;
+
+ rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
+ if (rmpp_mad->rmpp_hdr.rmpp_status) {
+ abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
+ nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
+ return;
+ }
+
+ seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
+ newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
+ if (newwin < seg_num) {
+ abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
+ nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
+ return;
+ }
+
+ spin_lock_irqsave(&agent->lock, flags);
+ mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
+ if (!mad_send_wr) {
+ if (!seg_num)
+ process_ds_ack(agent, mad_recv_wc, newwin);
+ goto out; /* Unmatched or DS RMPP ACK */
+ }
+
+ if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
+ (mad_send_wr->timeout)) {
+ spin_unlock_irqrestore(&agent->lock, flags);
+ ack_ds_ack(agent, mad_recv_wc);
+ return; /* Repeated ACK for DS RMPP transaction */
+ }
+
+ if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
+ (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
+ goto out; /* Send is already done */
+
+ if (seg_num > mad_send_wr->send_buf.seg_count ||
+ seg_num > mad_send_wr->newwin) {
+ spin_unlock_irqrestore(&agent->lock, flags);
+ abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
+ nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
+ return;
+ }
+
+ if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack)
+ goto out; /* Old ACK */
+
+ if (seg_num > mad_send_wr->last_ack) {
+ adjust_last_ack(mad_send_wr, seg_num);
+ mad_send_wr->retries_left = mad_send_wr->max_retries;
+ }
+ mad_send_wr->newwin = newwin;
+ if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
+ /* If no response is expected, the ACK completes the send */
+ if (!mad_send_wr->send_buf.timeout_ms) {
+ struct ib_mad_send_wc wc;
+
+ ib_mark_mad_done(mad_send_wr);
+ spin_unlock_irqrestore(&agent->lock, flags);
+
+ wc.status = IB_WC_SUCCESS;
+ wc.vendor_err = 0;
+ wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &wc);
+ return;
+ }
+ if (mad_send_wr->refcount == 1)
+ ib_reset_mad_timeout(mad_send_wr,
+ mad_send_wr->send_buf.timeout_ms);
+ spin_unlock_irqrestore(&agent->lock, flags);
+ ack_ds_ack(agent, mad_recv_wc);
+ return;
+ } else if (mad_send_wr->refcount == 1 &&
+ mad_send_wr->seg_num < mad_send_wr->newwin &&
+ mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
+ /* Send failure will just result in a timeout/retry */
+ ret = send_next_seg(mad_send_wr);
+ if (ret)
+ goto out;
+
+ mad_send_wr->refcount++;
+ list_move_tail(&mad_send_wr->agent_list,
+ &mad_send_wr->mad_agent_priv->send_list);
+ }
+out:
+ spin_unlock_irqrestore(&agent->lock, flags);
+}
+
+static struct ib_mad_recv_wc *
+process_rmpp_data(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_rmpp_hdr *rmpp_hdr;
+ u8 rmpp_status;
+
+ rmpp_hdr = &((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr;
+
+ if (rmpp_hdr->rmpp_status) {
+ rmpp_status = IB_MGMT_RMPP_STATUS_BAD_STATUS;
+ goto bad;
+ }
+
+ if (rmpp_hdr->seg_num == cpu_to_be32(1)) {
+ if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) {
+ rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG;
+ goto bad;
+ }
+ return start_rmpp(agent, mad_recv_wc);
+ } else {
+ if (ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST) {
+ rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG;
+ goto bad;
+ }
+ return continue_rmpp(agent, mad_recv_wc);
+ }
+bad:
+ nack_recv(agent, mad_recv_wc, rmpp_status);
+ ib_free_recv_mad(mad_recv_wc);
+ return NULL;
+}
+
+static void process_rmpp_stop(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+
+ rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
+
+ if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) {
+ abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
+ nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
+ } else
+ abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
+}
+
+static void process_rmpp_abort(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+
+ rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
+
+ if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN ||
+ rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) {
+ abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
+ nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
+ } else
+ abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
+}
+
+struct ib_mad_recv_wc *
+ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+
+ rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
+ if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE))
+ return mad_recv_wc;
+
+ if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) {
+ abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
+ nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
+ goto out;
+ }
+
+ switch (rmpp_mad->rmpp_hdr.rmpp_type) {
+ case IB_MGMT_RMPP_TYPE_DATA:
+ return process_rmpp_data(agent, mad_recv_wc);
+ case IB_MGMT_RMPP_TYPE_ACK:
+ process_rmpp_ack(agent, mad_recv_wc);
+ break;
+ case IB_MGMT_RMPP_TYPE_STOP:
+ process_rmpp_stop(agent, mad_recv_wc);
+ break;
+ case IB_MGMT_RMPP_TYPE_ABORT:
+ process_rmpp_abort(agent, mad_recv_wc);
+ break;
+ default:
+ abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
+ nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
+ break;
+ }
+out:
+ ib_free_recv_mad(mad_recv_wc);
+ return NULL;
+}
+
+static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
+ struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
+ struct mad_rmpp_recv *rmpp_recv;
+ struct ib_ah_attr ah_attr;
+ unsigned long flags;
+ int newwin = 1;
+
+ if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
+ goto out;
+
+ spin_lock_irqsave(&agent->lock, flags);
+ list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+ if (rmpp_recv->tid != mad_hdr->tid ||
+ rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
+ rmpp_recv->class_version != mad_hdr->class_version ||
+ (rmpp_recv->method & IB_MGMT_METHOD_RESP))
+ continue;
+
+ if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+ continue;
+
+ if (rmpp_recv->slid == ah_attr.dlid) {
+ newwin = rmpp_recv->repwin;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&agent->lock, flags);
+out:
+ return newwin;
+}
+
+int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+ int ret;
+
+ rmpp_mad = mad_send_wr->send_buf.mad;
+ if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE))
+ return IB_RMPP_RESULT_UNHANDLED;
+
+ if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
+ mad_send_wr->seg_num = 1;
+ return IB_RMPP_RESULT_INTERNAL;
+ }
+
+ mad_send_wr->newwin = init_newwin(mad_send_wr);
+
+ /* We need to wait for the final ACK even if there isn't a response */
+ mad_send_wr->refcount += (mad_send_wr->timeout == 0);
+ ret = send_next_seg(mad_send_wr);
+ if (!ret)
+ return IB_RMPP_RESULT_CONSUMED;
+ return ret;
+}
+
+int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+ int ret;
+
+ rmpp_mad = mad_send_wr->send_buf.mad;
+ if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE))
+ return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
+
+ if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
+ return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
+
+ if (mad_send_wc->status != IB_WC_SUCCESS ||
+ mad_send_wr->status != IB_WC_SUCCESS)
+ return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */
+
+ if (!mad_send_wr->timeout)
+ return IB_RMPP_RESULT_PROCESSED; /* Response received */
+
+ if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
+ mad_send_wr->timeout =
+ msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+ return IB_RMPP_RESULT_PROCESSED; /* Send done */
+ }
+
+ if (mad_send_wr->seg_num == mad_send_wr->newwin ||
+ mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count)
+ return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */
+
+ ret = send_next_seg(mad_send_wr);
+ if (ret) {
+ mad_send_wc->status = IB_WC_GENERAL_ERR;
+ return IB_RMPP_RESULT_PROCESSED;
+ }
+ return IB_RMPP_RESULT_CONSUMED;
+}
+
+int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+ int ret;
+
+ rmpp_mad = mad_send_wr->send_buf.mad;
+ if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE))
+ return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
+
+ if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count)
+ return IB_RMPP_RESULT_PROCESSED;
+
+ mad_send_wr->seg_num = mad_send_wr->last_ack;
+ mad_send_wr->cur_seg = mad_send_wr->last_ack_seg;
+
+ ret = send_next_seg(mad_send_wr);
+ if (ret)
+ return IB_RMPP_RESULT_PROCESSED;
+
+ return IB_RMPP_RESULT_CONSUMED;
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.h (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2005 Intel Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MAD_RMPP_H__
+#define __MAD_RMPP_H__
+
+enum {
+ IB_RMPP_RESULT_PROCESSED,
+ IB_RMPP_RESULT_CONSUMED,
+ IB_RMPP_RESULT_INTERNAL,
+ IB_RMPP_RESULT_UNHANDLED
+};
+
+int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr);
+
+struct ib_mad_recv_wc *
+ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
+ struct ib_mad_recv_wc *mad_recv_wc);
+
+int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_send_wc *mad_send_wc);
+
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc);
+
+void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent);
+
+int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr);
+
+#endif /* __MAD_RMPP_H__ */
Property changes on: trunk/sys/ofed/drivers/infiniband/core/mad_rmpp.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/multicast.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/multicast.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/multicast.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,868 @@
+/*
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/random.h>
+
+#include <rdma/ib_cache.h>
+#include "sa.h"
+
+static void mcast_add_one(struct ib_device *device);
+static void mcast_remove_one(struct ib_device *device);
+
+static struct ib_client mcast_client = {
+ .name = "ib_multicast",
+ .add = mcast_add_one,
+ .remove = mcast_remove_one
+};
+
+static struct ib_sa_client sa_client;
+static struct workqueue_struct *mcast_wq;
+static union ib_gid mgid0;
+
+struct mcast_device;
+
+struct mcast_port {
+ struct mcast_device *dev;
+ spinlock_t lock;
+ struct rb_root table;
+ atomic_t refcount;
+ struct completion comp;
+ u8 port_num;
+};
+
+struct mcast_device {
+ struct ib_device *device;
+ struct ib_event_handler event_handler;
+ int start_port;
+ int end_port;
+ struct mcast_port port[0];
+};
+
+enum mcast_state {
+ MCAST_JOINING,
+ MCAST_MEMBER,
+ MCAST_ERROR,
+};
+
+enum mcast_group_state {
+ MCAST_IDLE,
+ MCAST_BUSY,
+ MCAST_GROUP_ERROR,
+ MCAST_PKEY_EVENT
+};
+
+enum {
+ MCAST_INVALID_PKEY_INDEX = 0xFFFF
+};
+
+struct mcast_member;
+
+struct mcast_group {
+ struct ib_sa_mcmember_rec rec;
+ struct rb_node node;
+ struct mcast_port *port;
+ spinlock_t lock;
+ struct work_struct work;
+ struct list_head pending_list;
+ struct list_head active_list;
+ struct mcast_member *last_join;
+ int members[3];
+ atomic_t refcount;
+ enum mcast_group_state state;
+ struct ib_sa_query *query;
+ int query_id;
+ u16 pkey_index;
+ u8 leave_state;
+ int retries;
+};
+
+struct mcast_member {
+ struct ib_sa_multicast multicast;
+ struct ib_sa_client *client;
+ struct mcast_group *group;
+ struct list_head list;
+ enum mcast_state state;
+ atomic_t refcount;
+ struct completion comp;
+};
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context);
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context);
+
+static struct mcast_group *mcast_find(struct mcast_port *port,
+ union ib_gid *mgid)
+{
+ struct rb_node *node = port->table.rb_node;
+ struct mcast_group *group;
+ int ret;
+
+ while (node) {
+ group = rb_entry(node, struct mcast_group, node);
+ ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
+ if (!ret)
+ return group;
+
+ if (ret < 0)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return NULL;
+}
+
+static struct mcast_group *mcast_insert(struct mcast_port *port,
+ struct mcast_group *group,
+ int allow_duplicates)
+{
+ struct rb_node **link = &port->table.rb_node;
+ struct rb_node *parent = NULL;
+ struct mcast_group *cur_group;
+ int ret;
+
+ while (*link) {
+ parent = *link;
+ cur_group = rb_entry(parent, struct mcast_group, node);
+
+ ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
+ sizeof group->rec.mgid);
+ if (ret < 0)
+ link = &(*link)->rb_left;
+ else if (ret > 0)
+ link = &(*link)->rb_right;
+ else if (allow_duplicates)
+ link = &(*link)->rb_left;
+ else
+ return cur_group;
+ }
+ rb_link_node(&group->node, parent, link);
+ rb_insert_color(&group->node, &port->table);
+ return NULL;
+}
+
+static void deref_port(struct mcast_port *port)
+{
+ if (atomic_dec_and_test(&port->refcount))
+ complete(&port->comp);
+}
+
+static void release_group(struct mcast_group *group)
+{
+ struct mcast_port *port = group->port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ if (atomic_dec_and_test(&group->refcount)) {
+ rb_erase(&group->node, &port->table);
+ spin_unlock_irqrestore(&port->lock, flags);
+ kfree(group);
+ deref_port(port);
+ } else
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void deref_member(struct mcast_member *member)
+{
+ if (atomic_dec_and_test(&member->refcount))
+ complete(&member->comp);
+}
+
+static void queue_join(struct mcast_member *member)
+{
+ struct mcast_group *group = member->group;
+ unsigned long flags;
+
+ spin_lock_irqsave(&group->lock, flags);
+ list_add_tail(&member->list, &group->pending_list);
+ if (group->state == MCAST_IDLE) {
+ group->state = MCAST_BUSY;
+ atomic_inc(&group->refcount);
+ queue_work(mcast_wq, &group->work);
+ }
+ spin_unlock_irqrestore(&group->lock, flags);
+}
+
+/*
+ * A multicast group has three types of members: full member, non member, and
+ * send only member. We need to keep track of the number of members of each
+ * type based on their join state. Adjust the number of members the belong to
+ * the specified join states.
+ */
+static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
+{
+ int i;
+
+ for (i = 0; i < 3; i++, join_state >>= 1)
+ if (join_state & 0x1)
+ group->members[i] += inc;
+}
+
+/*
+ * If a multicast group has zero members left for a particular join state, but
+ * the group is still a member with the SA, we need to leave that join state.
+ * Determine which join states we still belong to, but that do not have any
+ * active members.
+ */
+static u8 get_leave_state(struct mcast_group *group)
+{
+ u8 leave_state = 0;
+ int i;
+
+ for (i = 0; i < 3; i++)
+ if (!group->members[i])
+ leave_state |= (0x1 << i);
+
+ return leave_state & group->rec.join_state;
+}
+
+static int cmp_rec(struct ib_sa_mcmember_rec *src,
+ struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
+{
+ /* MGID must already match */
+
+ if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID &&
+ memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid))
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
+ return -EINVAL;
+ if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
+ IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
+ src->mtu, dst->mtu))
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
+ src->traffic_class != dst->traffic_class)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
+ return -EINVAL;
+ if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
+ IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
+ src->rate, dst->rate))
+ return -EINVAL;
+ if (ib_sa_check_selector(comp_mask,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
+ dst->packet_life_time_selector,
+ src->packet_life_time, dst->packet_life_time))
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
+ src->flow_label != dst->flow_label)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
+ src->hop_limit != dst->hop_limit)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope)
+ return -EINVAL;
+
+ /* join_state checked separately, proxy_join ignored */
+
+ return 0;
+}
+
+static int send_join(struct mcast_group *group, struct mcast_member *member)
+{
+ struct mcast_port *port = group->port;
+ int ret;
+
+ group->last_join = member;
+ ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+ port->port_num, IB_MGMT_METHOD_SET,
+ &member->multicast.rec,
+ member->multicast.comp_mask,
+ 3000, GFP_KERNEL, join_handler, group,
+ &group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static int send_leave(struct mcast_group *group, u8 leave_state)
+{
+ struct mcast_port *port = group->port;
+ struct ib_sa_mcmember_rec rec;
+ int ret;
+
+ rec = group->rec;
+ rec.join_state = leave_state;
+ group->leave_state = leave_state;
+
+ ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+ port->port_num, IB_SA_METHOD_DELETE, &rec,
+ IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_JOIN_STATE,
+ 3000, GFP_KERNEL, leave_handler,
+ group, &group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static void join_group(struct mcast_group *group, struct mcast_member *member,
+ u8 join_state)
+{
+ member->state = MCAST_MEMBER;
+ adjust_membership(group, join_state, 1);
+ group->rec.join_state |= join_state;
+ member->multicast.rec = group->rec;
+ member->multicast.rec.join_state = join_state;
+ list_move(&member->list, &group->active_list);
+}
+
+static int fail_join(struct mcast_group *group, struct mcast_member *member,
+ int status)
+{
+ spin_lock_irq(&group->lock);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ return member->multicast.callback(status, &member->multicast);
+}
+
+static void process_group_error(struct mcast_group *group)
+{
+ struct mcast_member *member;
+ int ret = 0;
+ u16 pkey_index;
+
+ if (group->state == MCAST_PKEY_EVENT)
+ ret = ib_find_pkey(group->port->dev->device,
+ group->port->port_num,
+ be16_to_cpu(group->rec.pkey), &pkey_index);
+
+ spin_lock_irq(&group->lock);
+ if (group->state == MCAST_PKEY_EVENT && !ret &&
+ group->pkey_index == pkey_index)
+ goto out;
+
+ while (!list_empty(&group->active_list)) {
+ member = list_entry(group->active_list.next,
+ struct mcast_member, list);
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ adjust_membership(group, member->multicast.rec.join_state, -1);
+ member->state = MCAST_ERROR;
+ spin_unlock_irq(&group->lock);
+
+ ret = member->multicast.callback(-ENETRESET,
+ &member->multicast);
+ deref_member(member);
+ if (ret)
+ ib_sa_free_multicast(&member->multicast);
+ spin_lock_irq(&group->lock);
+ }
+
+ group->rec.join_state = 0;
+out:
+ group->state = MCAST_BUSY;
+ spin_unlock_irq(&group->lock);
+}
+
+static void mcast_work_handler(struct work_struct *work)
+{
+ struct mcast_group *group;
+ struct mcast_member *member;
+ struct ib_sa_multicast *multicast;
+ int status, ret;
+ u8 join_state;
+
+ group = container_of(work, typeof(*group), work);
+retest:
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->pending_list) ||
+ (group->state != MCAST_BUSY)) {
+
+ if (group->state != MCAST_BUSY) {
+ spin_unlock_irq(&group->lock);
+ process_group_error(group);
+ goto retest;
+ }
+
+ member = list_entry(group->pending_list.next,
+ struct mcast_member, list);
+ multicast = &member->multicast;
+ join_state = multicast->rec.join_state;
+ atomic_inc(&member->refcount);
+
+ if (join_state == (group->rec.join_state & join_state)) {
+ status = cmp_rec(&group->rec, &multicast->rec,
+ multicast->comp_mask);
+ if (!status)
+ join_group(group, member, join_state);
+ else
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ ret = multicast->callback(status, multicast);
+ } else {
+ spin_unlock_irq(&group->lock);
+ status = send_join(group, member);
+ if (!status) {
+ deref_member(member);
+ return;
+ }
+ ret = fail_join(group, member, status);
+ }
+
+ deref_member(member);
+ if (ret)
+ ib_sa_free_multicast(&member->multicast);
+ spin_lock_irq(&group->lock);
+ }
+
+ join_state = get_leave_state(group);
+ if (join_state) {
+ group->rec.join_state &= ~join_state;
+ spin_unlock_irq(&group->lock);
+ if (send_leave(group, join_state))
+ goto retest;
+ } else {
+ group->state = MCAST_IDLE;
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+}
+
+/*
+ * Fail a join request if it is still active - at the head of the pending queue.
+ */
+static void process_join_error(struct mcast_group *group, int status)
+{
+ struct mcast_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ member = list_entry(group->pending_list.next,
+ struct mcast_member, list);
+ if (group->last_join == member) {
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ ret = member->multicast.callback(status, &member->multicast);
+ deref_member(member);
+ if (ret)
+ ib_sa_free_multicast(&member->multicast);
+ } else
+ spin_unlock_irq(&group->lock);
+}
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context)
+{
+ struct mcast_group *group = context;
+ u16 pkey_index = MCAST_INVALID_PKEY_INDEX;
+
+ if (status)
+ process_join_error(group, status);
+ else {
+ ib_find_pkey(group->port->dev->device, group->port->port_num,
+ be16_to_cpu(rec->pkey), &pkey_index);
+
+ spin_lock_irq(&group->port->lock);
+ group->rec = *rec;
+ if (group->state == MCAST_BUSY &&
+ group->pkey_index == MCAST_INVALID_PKEY_INDEX)
+ group->pkey_index = pkey_index;
+ if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+ rb_erase(&group->node, &group->port->table);
+ mcast_insert(group->port, group, 1);
+ }
+ spin_unlock_irq(&group->port->lock);
+ }
+ mcast_work_handler(&group->work);
+}
+
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context)
+{
+ struct mcast_group *group = context;
+
+ if (status && (group->retries > 0) &&
+ !send_leave(group, group->leave_state))
+ group->retries--;
+ else
+ mcast_work_handler(&group->work);
+}
+
+static struct mcast_group *acquire_group(struct mcast_port *port,
+ union ib_gid *mgid, gfp_t gfp_mask)
+{
+ struct mcast_group *group, *cur_group;
+ unsigned long flags;
+ int is_mgid0;
+
+ is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
+ if (!is_mgid0) {
+ spin_lock_irqsave(&port->lock, flags);
+ group = mcast_find(port, mgid);
+ if (group)
+ goto found;
+ spin_unlock_irqrestore(&port->lock, flags);
+ }
+
+ group = kzalloc(sizeof *group, gfp_mask);
+ if (!group)
+ return NULL;
+
+ group->retries = 3;
+ group->port = port;
+ group->rec.mgid = *mgid;
+ group->pkey_index = MCAST_INVALID_PKEY_INDEX;
+ INIT_LIST_HEAD(&group->pending_list);
+ INIT_LIST_HEAD(&group->active_list);
+ INIT_WORK(&group->work, mcast_work_handler);
+ spin_lock_init(&group->lock);
+
+ spin_lock_irqsave(&port->lock, flags);
+ cur_group = mcast_insert(port, group, is_mgid0);
+ if (cur_group) {
+ kfree(group);
+ group = cur_group;
+ } else
+ atomic_inc(&port->refcount);
+found:
+ atomic_inc(&group->refcount);
+ spin_unlock_irqrestore(&port->lock, flags);
+ return group;
+}
+
+/*
+ * We serialize all join requests to a single group to make our lives much
+ * easier. Otherwise, two users could try to join the same group
+ * simultaneously, with different configurations, one could leave while the
+ * join is in progress, etc., which makes locking around error recovery
+ * difficult.
+ */
+struct ib_sa_multicast *
+ib_sa_join_multicast(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_sa_multicast *multicast),
+ void *context)
+{
+ struct mcast_device *dev;
+ struct mcast_member *member;
+ struct ib_sa_multicast *multicast;
+ int ret;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ member = kmalloc(sizeof *member, gfp_mask);
+ if (!member)
+ return ERR_PTR(-ENOMEM);
+
+ ib_sa_client_get(client);
+ member->client = client;
+ member->multicast.rec = *rec;
+ member->multicast.comp_mask = comp_mask;
+ member->multicast.callback = callback;
+ member->multicast.context = context;
+ init_completion(&member->comp);
+ atomic_set(&member->refcount, 1);
+ member->state = MCAST_JOINING;
+
+ member->group = acquire_group(&dev->port[port_num - dev->start_port],
+ &rec->mgid, gfp_mask);
+ if (!member->group) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /*
+ * The user will get the multicast structure in their callback. They
+ * could then free the multicast structure before we can return from
+ * this routine. So we save the pointer to return before queuing
+ * any callback.
+ */
+ multicast = &member->multicast;
+ queue_join(member);
+ return multicast;
+
+err:
+ ib_sa_client_put(client);
+ kfree(member);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_sa_join_multicast);
+
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
+{
+ struct mcast_member *member;
+ struct mcast_group *group;
+
+ member = container_of(multicast, struct mcast_member, multicast);
+ group = member->group;
+
+ spin_lock_irq(&group->lock);
+ if (member->state == MCAST_MEMBER)
+ adjust_membership(group, multicast->rec.join_state, -1);
+
+ list_del_init(&member->list);
+
+ if (group->state == MCAST_IDLE) {
+ group->state = MCAST_BUSY;
+ spin_unlock_irq(&group->lock);
+ /* Continue to hold reference on group until callback */
+ queue_work(mcast_wq, &group->work);
+ } else {
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+
+ deref_member(member);
+ wait_for_completion(&member->comp);
+ ib_sa_client_put(member->client);
+ kfree(member);
+}
+EXPORT_SYMBOL(ib_sa_free_multicast);
+
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+ union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
+{
+ struct mcast_device *dev;
+ struct mcast_port *port;
+ struct mcast_group *group;
+ unsigned long flags;
+ int ret = 0;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return -ENODEV;
+
+ port = &dev->port[port_num - dev->start_port];
+ spin_lock_irqsave(&port->lock, flags);
+ group = mcast_find(port, mgid);
+ if (group)
+ *rec = group->rec;
+ else
+ ret = -EADDRNOTAVAIL;
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
+
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ struct ib_ah_attr *ah_attr)
+{
+ int ret;
+ u16 gid_index;
+ u8 p;
+
+ ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+ if (ret)
+ return ret;
+
+ memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->dlid = be16_to_cpu(rec->mlid);
+ ah_attr->sl = rec->sl;
+ ah_attr->port_num = port_num;
+ ah_attr->static_rate = rec->rate;
+
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.dgid = rec->mgid;
+
+ ah_attr->grh.sgid_index = (u8) gid_index;
+ ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
+ ah_attr->grh.hop_limit = rec->hop_limit;
+ ah_attr->grh.traffic_class = rec->traffic_class;
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_mcmember);
+
+static void mcast_groups_event(struct mcast_port *port,
+ enum mcast_group_state state)
+{
+ struct mcast_group *group;
+ struct rb_node *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ for (node = rb_first(&port->table); node; node = rb_next(node)) {
+ group = rb_entry(node, struct mcast_group, node);
+ spin_lock(&group->lock);
+ if (group->state == MCAST_IDLE) {
+ atomic_inc(&group->refcount);
+ queue_work(mcast_wq, &group->work);
+ }
+ if (group->state != MCAST_GROUP_ERROR)
+ group->state = state;
+ spin_unlock(&group->lock);
+ }
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void mcast_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct mcast_device *dev;
+ int index;
+
+ dev = container_of(handler, struct mcast_device, event_handler);
+ if (rdma_port_get_link_layer(dev->device, event->element.port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
+ return;
+
+ index = event->element.port_num - dev->start_port;
+
+ switch (event->event) {
+ case IB_EVENT_PORT_ERR:
+ case IB_EVENT_LID_CHANGE:
+ case IB_EVENT_SM_CHANGE:
+ case IB_EVENT_CLIENT_REREGISTER:
+ mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
+ break;
+ case IB_EVENT_PKEY_CHANGE:
+ mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT);
+ break;
+ default:
+ break;
+ }
+}
+
+static void mcast_add_one(struct ib_device *device)
+{
+ struct mcast_device *dev;
+ struct mcast_port *port;
+ int i;
+ int count = 0;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+ GFP_KERNEL);
+ if (!dev)
+ return;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ dev->start_port = dev->end_port = 0;
+ else {
+ dev->start_port = 1;
+ dev->end_port = device->phys_port_cnt;
+ }
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ if (rdma_port_get_link_layer(device, dev->start_port + i) !=
+ IB_LINK_LAYER_INFINIBAND)
+ continue;
+ port = &dev->port[i];
+ port->dev = dev;
+ port->port_num = dev->start_port + i;
+ spin_lock_init(&port->lock);
+ port->table = RB_ROOT;
+ init_completion(&port->comp);
+ atomic_set(&port->refcount, 1);
+ ++count;
+ }
+
+ if (!count) {
+ kfree(dev);
+ return;
+ }
+
+ dev->device = device;
+ ib_set_client_data(device, &mcast_client, dev);
+
+ INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
+ ib_register_event_handler(&dev->event_handler);
+}
+
+static void mcast_remove_one(struct ib_device *device)
+{
+ struct mcast_device *dev;
+ struct mcast_port *port;
+ int i;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return;
+
+ ib_unregister_event_handler(&dev->event_handler);
+ flush_workqueue(mcast_wq);
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ if (rdma_port_get_link_layer(device, dev->start_port + i) ==
+ IB_LINK_LAYER_INFINIBAND) {
+ port = &dev->port[i];
+ deref_port(port);
+ wait_for_completion(&port->comp);
+ }
+ }
+
+ kfree(dev);
+}
+
+int mcast_init(void)
+{
+ int ret;
+
+ mcast_wq = create_singlethread_workqueue("ib_mcast");
+ if (!mcast_wq)
+ return -ENOMEM;
+
+ ib_sa_register_client(&sa_client);
+
+ ret = ib_register_client(&mcast_client);
+ if (ret)
+ goto err;
+ return 0;
+
+err:
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(mcast_wq);
+ return ret;
+}
+
+void mcast_cleanup(void)
+{
+ ib_unregister_client(&mcast_client);
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(mcast_wq);
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/core/multicast.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/notice.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/notice.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/notice.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,749 @@
+/*
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/bitops.h>
+#include <linux/random.h>
+
+#include "sa.h"
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static void inform_add_one(struct ib_device *device);
+static void inform_remove_one(struct ib_device *device);
+
+static struct ib_client inform_client = {
+ .name = "ib_notice",
+ .add = inform_add_one,
+ .remove = inform_remove_one
+};
+
+static struct ib_sa_client sa_client;
+static struct workqueue_struct *inform_wq;
+
+struct inform_device;
+
+struct inform_port {
+ struct inform_device *dev;
+ spinlock_t lock;
+ struct rb_root table;
+ atomic_t refcount;
+ struct completion comp;
+ u8 port_num;
+};
+
+struct inform_device {
+ struct ib_device *device;
+ struct ib_event_handler event_handler;
+ int start_port;
+ int end_port;
+ struct inform_port port[0];
+};
+
+enum inform_state {
+ INFORM_IDLE,
+ INFORM_REGISTERING,
+ INFORM_MEMBER,
+ INFORM_BUSY,
+ INFORM_ERROR
+};
+
+struct inform_member;
+
+struct inform_group {
+ u16 trap_number;
+ struct rb_node node;
+ struct inform_port *port;
+ spinlock_t lock;
+ struct work_struct work;
+ struct list_head pending_list;
+ struct list_head active_list;
+ struct list_head notice_list;
+ struct inform_member *last_join;
+ int members;
+ enum inform_state join_state; /* State relative to SA */
+ atomic_t refcount;
+ enum inform_state state;
+ struct ib_sa_query *query;
+ int query_id;
+};
+
+struct inform_member {
+ struct ib_inform_info info;
+ struct ib_sa_client *client;
+ struct inform_group *group;
+ struct list_head list;
+ enum inform_state state;
+ atomic_t refcount;
+ struct completion comp;
+};
+
+struct inform_notice {
+ struct list_head list;
+ struct ib_sa_notice notice;
+};
+
+static void reg_handler(int status, struct ib_sa_inform *inform,
+ void *context);
+static void unreg_handler(int status, struct ib_sa_inform *inform,
+ void *context);
+
+static struct inform_group *inform_find(struct inform_port *port,
+ u16 trap_number)
+{
+ struct rb_node *node = port->table.rb_node;
+ struct inform_group *group;
+
+ while (node) {
+ group = rb_entry(node, struct inform_group, node);
+ if (trap_number < group->trap_number)
+ node = node->rb_left;
+ else if (trap_number > group->trap_number)
+ node = node->rb_right;
+ else
+ return group;
+ }
+ return NULL;
+}
+
+static struct inform_group *inform_insert(struct inform_port *port,
+ struct inform_group *group)
+{
+ struct rb_node **link = &port->table.rb_node;
+ struct rb_node *parent = NULL;
+ struct inform_group *cur_group;
+
+ while (*link) {
+ parent = *link;
+ cur_group = rb_entry(parent, struct inform_group, node);
+ if (group->trap_number < cur_group->trap_number)
+ link = &(*link)->rb_left;
+ else if (group->trap_number > cur_group->trap_number)
+ link = &(*link)->rb_right;
+ else
+ return cur_group;
+ }
+ rb_link_node(&group->node, parent, link);
+ rb_insert_color(&group->node, &port->table);
+ return NULL;
+}
+
+static void deref_port(struct inform_port *port)
+{
+ if (atomic_dec_and_test(&port->refcount))
+ complete(&port->comp);
+}
+
+static void release_group(struct inform_group *group)
+{
+ struct inform_port *port = group->port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ if (atomic_dec_and_test(&group->refcount)) {
+ rb_erase(&group->node, &port->table);
+ spin_unlock_irqrestore(&port->lock, flags);
+ kfree(group);
+ deref_port(port);
+ } else
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void deref_member(struct inform_member *member)
+{
+ if (atomic_dec_and_test(&member->refcount))
+ complete(&member->comp);
+}
+
+static void queue_reg(struct inform_member *member)
+{
+ struct inform_group *group = member->group;
+ unsigned long flags;
+
+ spin_lock_irqsave(&group->lock, flags);
+ list_add(&member->list, &group->pending_list);
+ if (group->state == INFORM_IDLE) {
+ group->state = INFORM_BUSY;
+ atomic_inc(&group->refcount);
+ queue_work(inform_wq, &group->work);
+ }
+ spin_unlock_irqrestore(&group->lock, flags);
+}
+
+static int send_reg(struct inform_group *group, struct inform_member *member)
+{
+ struct inform_port *port = group->port;
+ struct ib_sa_inform inform;
+ int ret;
+
+ memset(&inform, 0, sizeof inform);
+ inform.lid_range_begin = cpu_to_be16(0xFFFF);
+ inform.is_generic = 1;
+ inform.subscribe = 1;
+ inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
+ inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number);
+ inform.trap.generic.resp_time = 19;
+ inform.trap.generic.producer_type =
+ cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
+
+ group->last_join = member;
+ ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
+ port->port_num, &inform, 3000, GFP_KERNEL,
+ reg_handler, group,&group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static int send_unreg(struct inform_group *group)
+{
+ struct inform_port *port = group->port;
+ struct ib_sa_inform inform;
+ int ret;
+
+ memset(&inform, 0, sizeof inform);
+ inform.lid_range_begin = cpu_to_be16(0xFFFF);
+ inform.is_generic = 1;
+ inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
+ inform.trap.generic.trap_num = cpu_to_be16(group->trap_number);
+ inform.trap.generic.qpn = IB_QP1;
+ inform.trap.generic.resp_time = 19;
+ inform.trap.generic.producer_type =
+ cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
+
+ ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
+ port->port_num, &inform, 3000, GFP_KERNEL,
+ unreg_handler, group, &group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static void join_group(struct inform_group *group, struct inform_member *member)
+{
+ member->state = INFORM_MEMBER;
+ group->members++;
+ list_move(&member->list, &group->active_list);
+}
+
+static int fail_join(struct inform_group *group, struct inform_member *member,
+ int status)
+{
+ spin_lock_irq(&group->lock);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ return member->info.callback(status, &member->info, NULL);
+}
+
+static void process_group_error(struct inform_group *group)
+{
+ struct inform_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->active_list)) {
+ member = list_entry(group->active_list.next,
+ struct inform_member, list);
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ group->members--;
+ member->state = INFORM_ERROR;
+ spin_unlock_irq(&group->lock);
+
+ ret = member->info.callback(-ENETRESET, &member->info, NULL);
+ deref_member(member);
+ if (ret)
+ ib_sa_unregister_inform_info(&member->info);
+ spin_lock_irq(&group->lock);
+ }
+
+ group->join_state = INFORM_IDLE;
+ group->state = INFORM_BUSY;
+ spin_unlock_irq(&group->lock);
+}
+
+/*
+ * Report a notice to all active subscribers. We use a temporary list to
+ * handle unsubscription requests while the notice is being reported, which
+ * avoids holding the group lock while in the user's callback.
+ */
+static void process_notice(struct inform_group *group,
+ struct inform_notice *info_notice)
+{
+ struct inform_member *member;
+ struct list_head list;
+ int ret;
+
+ INIT_LIST_HEAD(&list);
+
+ spin_lock_irq(&group->lock);
+ list_splice_init(&group->active_list, &list);
+ while (!list_empty(&list)) {
+
+ member = list_entry(list.next, struct inform_member, list);
+ atomic_inc(&member->refcount);
+ list_move(&member->list, &group->active_list);
+ spin_unlock_irq(&group->lock);
+
+ ret = member->info.callback(0, &member->info,
+ &info_notice->notice);
+ deref_member(member);
+ if (ret)
+ ib_sa_unregister_inform_info(&member->info);
+ spin_lock_irq(&group->lock);
+ }
+ spin_unlock_irq(&group->lock);
+}
+
+static void inform_work_handler(struct work_struct *work)
+{
+ struct inform_group *group;
+ struct inform_member *member;
+ struct ib_inform_info *info;
+ struct inform_notice *info_notice;
+ int status, ret;
+
+ group = container_of(work, typeof(*group), work);
+retest:
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->pending_list) ||
+ !list_empty(&group->notice_list) ||
+ (group->state == INFORM_ERROR)) {
+
+ if (group->state == INFORM_ERROR) {
+ spin_unlock_irq(&group->lock);
+ process_group_error(group);
+ goto retest;
+ }
+
+ if (!list_empty(&group->notice_list)) {
+ info_notice = list_entry(group->notice_list.next,
+ struct inform_notice, list);
+ list_del(&info_notice->list);
+ spin_unlock_irq(&group->lock);
+ process_notice(group, info_notice);
+ kfree(info_notice);
+ goto retest;
+ }
+
+ member = list_entry(group->pending_list.next,
+ struct inform_member, list);
+ info = &member->info;
+ atomic_inc(&member->refcount);
+
+ if (group->join_state == INFORM_MEMBER) {
+ join_group(group, member);
+ spin_unlock_irq(&group->lock);
+ ret = info->callback(0, info, NULL);
+ } else {
+ spin_unlock_irq(&group->lock);
+ status = send_reg(group, member);
+ if (!status) {
+ deref_member(member);
+ return;
+ }
+ ret = fail_join(group, member, status);
+ }
+
+ deref_member(member);
+ if (ret)
+ ib_sa_unregister_inform_info(&member->info);
+ spin_lock_irq(&group->lock);
+ }
+
+ if (!group->members && (group->join_state == INFORM_MEMBER)) {
+ group->join_state = INFORM_IDLE;
+ spin_unlock_irq(&group->lock);
+ if (send_unreg(group))
+ goto retest;
+ } else {
+ group->state = INFORM_IDLE;
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+}
+
+/*
+ * Fail a join request if it is still active - at the head of the pending queue.
+ */
+static void process_join_error(struct inform_group *group, int status)
+{
+ struct inform_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ member = list_entry(group->pending_list.next,
+ struct inform_member, list);
+ if (group->last_join == member) {
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ ret = member->info.callback(status, &member->info, NULL);
+ deref_member(member);
+ if (ret)
+ ib_sa_unregister_inform_info(&member->info);
+ } else
+ spin_unlock_irq(&group->lock);
+}
+
+static void reg_handler(int status, struct ib_sa_inform *inform, void *context)
+{
+ struct inform_group *group = context;
+
+ if (status)
+ process_join_error(group, status);
+ else
+ group->join_state = INFORM_MEMBER;
+
+ inform_work_handler(&group->work);
+}
+
+static void unreg_handler(int status, struct ib_sa_inform *rec, void *context)
+{
+ struct inform_group *group = context;
+
+ inform_work_handler(&group->work);
+}
+
+int notice_dispatch(struct ib_device *device, u8 port_num,
+ struct ib_sa_notice *notice)
+{
+ struct inform_device *dev;
+ struct inform_port *port;
+ struct inform_group *group;
+ struct inform_notice *info_notice;
+
+ dev = ib_get_client_data(device, &inform_client);
+ if (!dev)
+ return 0; /* No one to give notice to. */
+
+ port = &dev->port[port_num - dev->start_port];
+ spin_lock_irq(&port->lock);
+ group = inform_find(port, __be16_to_cpu(notice->trap.
+ generic.trap_num));
+ if (!group) {
+ spin_unlock_irq(&port->lock);
+ return 0;
+ }
+
+ atomic_inc(&group->refcount);
+ spin_unlock_irq(&port->lock);
+
+ info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL);
+ if (!info_notice) {
+ release_group(group);
+ return -ENOMEM;
+ }
+
+ info_notice->notice = *notice;
+
+ spin_lock_irq(&group->lock);
+ list_add(&info_notice->list, &group->notice_list);
+ if (group->state == INFORM_IDLE) {
+ group->state = INFORM_BUSY;
+ spin_unlock_irq(&group->lock);
+ inform_work_handler(&group->work);
+ } else {
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+
+ return 0;
+}
+
+static struct inform_group *acquire_group(struct inform_port *port,
+ u16 trap_number, gfp_t gfp_mask)
+{
+ struct inform_group *group, *cur_group;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ group = inform_find(port, trap_number);
+ if (group)
+ goto found;
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ group = kzalloc(sizeof *group, gfp_mask);
+ if (!group)
+ return NULL;
+
+ group->port = port;
+ group->trap_number = trap_number;
+ INIT_LIST_HEAD(&group->pending_list);
+ INIT_LIST_HEAD(&group->active_list);
+ INIT_LIST_HEAD(&group->notice_list);
+ INIT_WORK(&group->work, inform_work_handler);
+ spin_lock_init(&group->lock);
+
+ spin_lock_irqsave(&port->lock, flags);
+ cur_group = inform_insert(port, group);
+ if (cur_group) {
+ kfree(group);
+ group = cur_group;
+ } else
+ atomic_inc(&port->refcount);
+found:
+ atomic_inc(&group->refcount);
+ spin_unlock_irqrestore(&port->lock, flags);
+ return group;
+}
+
+/*
+ * We serialize all join requests to a single group to make our lives much
+ * easier. Otherwise, two users could try to join the same group
+ * simultaneously, with different configurations, one could leave while the
+ * join is in progress, etc., which makes locking around error recovery
+ * difficult.
+ */
+struct ib_inform_info *
+ib_sa_register_inform_info(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ u16 trap_number, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_inform_info *info,
+ struct ib_sa_notice *notice),
+ void *context)
+{
+ struct inform_device *dev;
+ struct inform_member *member;
+ struct ib_inform_info *info;
+ int ret;
+
+ dev = ib_get_client_data(device, &inform_client);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ member = kzalloc(sizeof *member, gfp_mask);
+ if (!member)
+ return ERR_PTR(-ENOMEM);
+
+ ib_sa_client_get(client);
+ member->client = client;
+ member->info.trap_number = trap_number;
+ member->info.callback = callback;
+ member->info.context = context;
+ init_completion(&member->comp);
+ atomic_set(&member->refcount, 1);
+ member->state = INFORM_REGISTERING;
+
+ member->group = acquire_group(&dev->port[port_num - dev->start_port],
+ trap_number, gfp_mask);
+ if (!member->group) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /*
+ * The user will get the info structure in their callback. They
+ * could then free the info structure before we can return from
+ * this routine. So we save the pointer to return before queuing
+ * any callback.
+ */
+ info = &member->info;
+ queue_reg(member);
+ return info;
+
+err:
+ ib_sa_client_put(member->client);
+ kfree(member);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_sa_register_inform_info);
+
+void ib_sa_unregister_inform_info(struct ib_inform_info *info)
+{
+ struct inform_member *member;
+ struct inform_group *group;
+
+ member = container_of(info, struct inform_member, info);
+ group = member->group;
+
+ spin_lock_irq(&group->lock);
+ if (member->state == INFORM_MEMBER)
+ group->members--;
+
+ list_del_init(&member->list);
+
+ if (group->state == INFORM_IDLE) {
+ group->state = INFORM_BUSY;
+ spin_unlock_irq(&group->lock);
+ /* Continue to hold reference on group until callback */
+ queue_work(inform_wq, &group->work);
+ } else {
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+
+ deref_member(member);
+ wait_for_completion(&member->comp);
+ ib_sa_client_put(member->client);
+ kfree(member);
+}
+EXPORT_SYMBOL(ib_sa_unregister_inform_info);
+
+static void inform_groups_lost(struct inform_port *port)
+{
+ struct inform_group *group;
+ struct rb_node *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ for (node = rb_first(&port->table); node; node = rb_next(node)) {
+ group = rb_entry(node, struct inform_group, node);
+ spin_lock(&group->lock);
+ if (group->state == INFORM_IDLE) {
+ atomic_inc(&group->refcount);
+ queue_work(inform_wq, &group->work);
+ }
+ group->state = INFORM_ERROR;
+ spin_unlock(&group->lock);
+ }
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void inform_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct inform_device *dev;
+
+ dev = container_of(handler, struct inform_device, event_handler);
+
+ switch (event->event) {
+ case IB_EVENT_PORT_ERR:
+ case IB_EVENT_LID_CHANGE:
+ case IB_EVENT_SM_CHANGE:
+ case IB_EVENT_CLIENT_REREGISTER:
+ inform_groups_lost(&dev->port[event->element.port_num -
+ dev->start_port]);
+ break;
+ default:
+ break;
+ }
+}
+
+static void inform_add_one(struct ib_device *device)
+{
+ struct inform_device *dev;
+ struct inform_port *port;
+ int i;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+ GFP_KERNEL);
+ if (!dev)
+ return;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ dev->start_port = dev->end_port = 0;
+ else {
+ dev->start_port = 1;
+ dev->end_port = device->phys_port_cnt;
+ }
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ port = &dev->port[i];
+ port->dev = dev;
+ port->port_num = dev->start_port + i;
+ spin_lock_init(&port->lock);
+ port->table = RB_ROOT;
+ init_completion(&port->comp);
+ atomic_set(&port->refcount, 1);
+ }
+
+ dev->device = device;
+ ib_set_client_data(device, &inform_client, dev);
+
+ INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler);
+ ib_register_event_handler(&dev->event_handler);
+}
+
+static void inform_remove_one(struct ib_device *device)
+{
+ struct inform_device *dev;
+ struct inform_port *port;
+ int i;
+
+ dev = ib_get_client_data(device, &inform_client);
+ if (!dev)
+ return;
+
+ ib_unregister_event_handler(&dev->event_handler);
+ flush_workqueue(inform_wq);
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ port = &dev->port[i];
+ deref_port(port);
+ wait_for_completion(&port->comp);
+ }
+
+ kfree(dev);
+}
+
+int notice_init(void)
+{
+ int ret;
+
+ inform_wq = create_singlethread_workqueue("ib_inform");
+ if (!inform_wq)
+ return -ENOMEM;
+
+ ib_sa_register_client(&sa_client);
+
+ ret = ib_register_client(&inform_client);
+ if (ret)
+ goto err;
+ return 0;
+
+err:
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(inform_wq);
+ return ret;
+}
+
+void notice_cleanup(void)
+{
+ ib_unregister_client(&inform_client);
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(inform_wq);
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/core/notice.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/packer.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/packer.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/packer.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/string.h>
+
+#include <rdma/ib_pack.h>
+
+static u64 value_read(int offset, int size, void *structure)
+{
+ switch (size) {
+ case 1: return *(u8 *) (structure + offset);
+ case 2: return be16_to_cpup((__be16 *) (structure + offset));
+ case 4: return be32_to_cpup((__be32 *) (structure + offset));
+ case 8: return be64_to_cpup((__be64 *) (structure + offset));
+ default:
+ printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
+ return 0;
+ }
+}
+
+/**
+ * ib_pack - Pack a structure into a buffer
+ * @desc:Array of structure field descriptions
+ * @desc_len:Number of entries in @desc
+ * @structure:Structure to pack from
+ * @buf:Buffer to pack into
+ *
+ * ib_pack() packs a list of structure fields into a buffer,
+ * controlled by the array of fields in @desc.
+ */
+void ib_pack(const struct ib_field *desc,
+ int desc_len,
+ void *structure,
+ void *buf)
+{
+ int i;
+
+ for (i = 0; i < desc_len; ++i) {
+ if (desc[i].size_bits <= 32) {
+ int shift;
+ u32 val;
+ __be32 mask;
+ __be32 *addr;
+
+ shift = 32 - desc[i].offset_bits - desc[i].size_bits;
+ if (desc[i].struct_size_bytes)
+ val = value_read(desc[i].struct_offset_bytes,
+ desc[i].struct_size_bytes,
+ structure) << shift;
+ else
+ val = 0;
+
+ mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift);
+ addr = (__be32 *) buf + desc[i].offset_words;
+ *addr = (*addr & ~mask) | (cpu_to_be32(val) & mask);
+ } else if (desc[i].size_bits <= 64) {
+ int shift;
+ u64 val;
+ __be64 mask;
+ __be64 *addr;
+
+ shift = 64 - desc[i].offset_bits - desc[i].size_bits;
+ if (desc[i].struct_size_bytes)
+ val = value_read(desc[i].struct_offset_bytes,
+ desc[i].struct_size_bytes,
+ structure) << shift;
+ else
+ val = 0;
+
+ mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift);
+ addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
+ *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
+ } else {
+ if (desc[i].offset_bits % 8 ||
+ desc[i].size_bits % 8) {
+ printk(KERN_WARNING "Structure field %s of size %d "
+ "bits is not byte-aligned\n",
+ desc[i].field_name, desc[i].size_bits);
+ }
+
+ if (desc[i].struct_size_bytes)
+ memcpy(buf + desc[i].offset_words * 4 +
+ desc[i].offset_bits / 8,
+ structure + desc[i].struct_offset_bytes,
+ desc[i].size_bits / 8);
+ else
+ memset(buf + desc[i].offset_words * 4 +
+ desc[i].offset_bits / 8,
+ 0,
+ desc[i].size_bits / 8);
+ }
+ }
+}
+EXPORT_SYMBOL(ib_pack);
+
+static void value_write(int offset, int size, u64 val, void *structure)
+{
+ switch (size * 8) {
+ case 8: *( u8 *) (structure + offset) = val; break;
+ case 16: *(__be16 *) (structure + offset) = cpu_to_be16(val); break;
+ case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break;
+ case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break;
+ default:
+ printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
+ }
+}
+
+/**
+ * ib_unpack - Unpack a buffer into a structure
+ * @desc:Array of structure field descriptions
+ * @desc_len:Number of entries in @desc
+ * @buf:Buffer to unpack from
+ * @structure:Structure to unpack into
+ *
+ * ib_pack() unpacks a list of structure fields from a buffer,
+ * controlled by the array of fields in @desc.
+ */
+void ib_unpack(const struct ib_field *desc,
+ int desc_len,
+ void *buf,
+ void *structure)
+{
+ int i;
+
+ for (i = 0; i < desc_len; ++i) {
+ if (!desc[i].struct_size_bytes)
+ continue;
+
+ if (desc[i].size_bits <= 32) {
+ int shift;
+ u32 val;
+ u32 mask;
+ __be32 *addr;
+
+ shift = 32 - desc[i].offset_bits - desc[i].size_bits;
+ mask = ((1ull << desc[i].size_bits) - 1) << shift;
+ addr = (__be32 *) buf + desc[i].offset_words;
+ val = (be32_to_cpup(addr) & mask) >> shift;
+ value_write(desc[i].struct_offset_bytes,
+ desc[i].struct_size_bytes,
+ val,
+ structure);
+ } else if (desc[i].size_bits <= 64) {
+ int shift;
+ u64 val;
+ u64 mask;
+ __be64 *addr;
+
+ shift = 64 - desc[i].offset_bits - desc[i].size_bits;
+ mask = (~0ull >> (64 - desc[i].size_bits)) << shift;
+ addr = (__be64 *) buf + desc[i].offset_words;
+ val = (be64_to_cpup(addr) & mask) >> shift;
+ value_write(desc[i].struct_offset_bytes,
+ desc[i].struct_size_bytes,
+ val,
+ structure);
+ } else {
+ if (desc[i].offset_bits % 8 ||
+ desc[i].size_bits % 8) {
+ printk(KERN_WARNING "Structure field %s of size %d "
+ "bits is not byte-aligned\n",
+ desc[i].field_name, desc[i].size_bits);
+ }
+
+ memcpy(structure + desc[i].struct_offset_bytes,
+ buf + desc[i].offset_words * 4 +
+ desc[i].offset_bits / 8,
+ desc[i].size_bits / 8);
+ }
+ }
+}
+EXPORT_SYMBOL(ib_unpack);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/packer.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/sa.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/sa.h (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/sa.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef SA_H
+#define SA_H
+
+#include <rdma/ib_sa.h>
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+ atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+ if (atomic_dec_and_test(&client->users))
+ complete(&client->comp);
+}
+
+int ib_sa_check_selector(ib_sa_comp_mask comp_mask,
+ ib_sa_comp_mask selector_mask,
+ ib_sa_comp_mask value_mask,
+ u8 selector, u8 src_value, u8 dst_value);
+
+int ib_sa_pack_attr(void *dst, void *src, int attr_id);
+
+int ib_sa_unpack_attr(void *dst, void *src, int attr_id);
+
+int ib_sa_path_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_path_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+int sa_db_init(void);
+void sa_db_cleanup(void);
+
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ u8 method,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_mcmember_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+int mcast_init(void);
+void mcast_cleanup(void);
+
+int ib_sa_informinfo_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_inform *rec,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_inform *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+int notice_dispatch(struct ib_device *device, u8 port_num,
+ struct ib_sa_notice *notice);
+
+int notice_init(void);
+void notice_cleanup(void);
+
+#endif /* SA_H */
Property changes on: trunk/sys/ofed/drivers/infiniband/core/sa.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/sa_query.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/sa_query.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/sa_query.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,1500 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/random.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/kref.h>
+#include <linux/idr.h>
+#include <linux/workqueue.h>
+
+#include <rdma/ib_pack.h>
+#include <rdma/ib_cache.h>
+#include "sa.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand subnet administration query support");
+MODULE_LICENSE("Dual BSD/GPL");
+
+struct ib_sa_sm_ah {
+ struct ib_ah *ah;
+ struct kref ref;
+ u16 pkey_index;
+ u8 src_path_mask;
+};
+
+struct ib_sa_port {
+ struct ib_mad_agent *agent;
+ struct ib_mad_agent *notice_agent;
+ struct ib_sa_sm_ah *sm_ah;
+ struct work_struct update_task;
+ spinlock_t ah_lock;
+ u8 port_num;
+ struct ib_device *device;
+};
+
+struct ib_sa_device {
+ int start_port, end_port;
+ struct ib_event_handler event_handler;
+ struct ib_sa_port port[0];
+};
+
+struct ib_sa_query {
+ void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
+ void (*release)(struct ib_sa_query *);
+ struct ib_sa_client *client;
+ struct ib_sa_port *port;
+ struct ib_mad_send_buf *mad_buf;
+ struct ib_sa_sm_ah *sm_ah;
+ int id;
+};
+
+struct ib_sa_service_query {
+ void (*callback)(int, struct ib_sa_service_rec *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
+struct ib_sa_path_query {
+ void (*callback)(int, struct ib_sa_path_rec *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
+struct ib_sa_mcmember_query {
+ void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
+struct ib_sa_inform_query {
+ void (*callback)(int, struct ib_sa_inform *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
+static void ib_sa_add_one(struct ib_device *device);
+static void ib_sa_remove_one(struct ib_device *device);
+
+static struct ib_client sa_client = {
+ .name = "sa",
+ .add = ib_sa_add_one,
+ .remove = ib_sa_remove_one
+};
+
+static spinlock_t idr_lock;
+static DEFINE_IDR(query_idr);
+
+static spinlock_t tid_lock;
+static u32 tid;
+
+#define PATH_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
+ .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \
+ .field_name = "sa_path_rec:" #field
+
+static const struct ib_field path_rec_table[] = {
+ { PATH_REC_FIELD(service_id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { PATH_REC_FIELD(dgid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { PATH_REC_FIELD(sgid),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { PATH_REC_FIELD(dlid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { PATH_REC_FIELD(slid),
+ .offset_words = 10,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { PATH_REC_FIELD(raw_traffic),
+ .offset_words = 11,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 11,
+ .offset_bits = 1,
+ .size_bits = 3 },
+ { PATH_REC_FIELD(flow_label),
+ .offset_words = 11,
+ .offset_bits = 4,
+ .size_bits = 20 },
+ { PATH_REC_FIELD(hop_limit),
+ .offset_words = 11,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { PATH_REC_FIELD(traffic_class),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { PATH_REC_FIELD(reversible),
+ .offset_words = 12,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { PATH_REC_FIELD(numb_path),
+ .offset_words = 12,
+ .offset_bits = 9,
+ .size_bits = 7 },
+ { PATH_REC_FIELD(pkey),
+ .offset_words = 12,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { PATH_REC_FIELD(qos_class),
+ .offset_words = 13,
+ .offset_bits = 0,
+ .size_bits = 12 },
+ { PATH_REC_FIELD(sl),
+ .offset_words = 13,
+ .offset_bits = 12,
+ .size_bits = 4 },
+ { PATH_REC_FIELD(mtu_selector),
+ .offset_words = 13,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { PATH_REC_FIELD(mtu),
+ .offset_words = 13,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { PATH_REC_FIELD(rate_selector),
+ .offset_words = 13,
+ .offset_bits = 24,
+ .size_bits = 2 },
+ { PATH_REC_FIELD(rate),
+ .offset_words = 13,
+ .offset_bits = 26,
+ .size_bits = 6 },
+ { PATH_REC_FIELD(packet_life_time_selector),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 2 },
+ { PATH_REC_FIELD(packet_life_time),
+ .offset_words = 14,
+ .offset_bits = 2,
+ .size_bits = 6 },
+ { PATH_REC_FIELD(preference),
+ .offset_words = 14,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 16,
+ .size_bits = 48 },
+};
+
+#define MCMEMBER_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
+ .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
+ .field_name = "sa_mcmember_rec:" #field
+
+static const struct ib_field mcmember_rec_table[] = {
+ { MCMEMBER_REC_FIELD(mgid),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { MCMEMBER_REC_FIELD(port_gid),
+ .offset_words = 4,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { MCMEMBER_REC_FIELD(qkey),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { MCMEMBER_REC_FIELD(mlid),
+ .offset_words = 9,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { MCMEMBER_REC_FIELD(mtu_selector),
+ .offset_words = 9,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { MCMEMBER_REC_FIELD(mtu),
+ .offset_words = 9,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { MCMEMBER_REC_FIELD(traffic_class),
+ .offset_words = 9,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { MCMEMBER_REC_FIELD(pkey),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { MCMEMBER_REC_FIELD(rate_selector),
+ .offset_words = 10,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { MCMEMBER_REC_FIELD(rate),
+ .offset_words = 10,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { MCMEMBER_REC_FIELD(packet_life_time_selector),
+ .offset_words = 10,
+ .offset_bits = 24,
+ .size_bits = 2 },
+ { MCMEMBER_REC_FIELD(packet_life_time),
+ .offset_words = 10,
+ .offset_bits = 26,
+ .size_bits = 6 },
+ { MCMEMBER_REC_FIELD(sl),
+ .offset_words = 11,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { MCMEMBER_REC_FIELD(flow_label),
+ .offset_words = 11,
+ .offset_bits = 4,
+ .size_bits = 20 },
+ { MCMEMBER_REC_FIELD(hop_limit),
+ .offset_words = 11,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { MCMEMBER_REC_FIELD(scope),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { MCMEMBER_REC_FIELD(join_state),
+ .offset_words = 12,
+ .offset_bits = 4,
+ .size_bits = 4 },
+ { MCMEMBER_REC_FIELD(proxy_join),
+ .offset_words = 12,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 12,
+ .offset_bits = 9,
+ .size_bits = 23 },
+};
+
+#define SERVICE_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \
+ .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \
+ .field_name = "sa_service_rec:" #field
+
+static const struct ib_field service_rec_table[] = {
+ { SERVICE_REC_FIELD(id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { SERVICE_REC_FIELD(gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(pkey),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { SERVICE_REC_FIELD(lease),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { SERVICE_REC_FIELD(key),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(name),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 64*8 },
+ { SERVICE_REC_FIELD(data8),
+ .offset_words = 28,
+ .offset_bits = 0,
+ .size_bits = 16*8 },
+ { SERVICE_REC_FIELD(data16),
+ .offset_words = 32,
+ .offset_bits = 0,
+ .size_bits = 8*16 },
+ { SERVICE_REC_FIELD(data32),
+ .offset_words = 36,
+ .offset_bits = 0,
+ .size_bits = 4*32 },
+ { SERVICE_REC_FIELD(data64),
+ .offset_words = 40,
+ .offset_bits = 0,
+ .size_bits = 2*64 },
+};
+
+#define INFORM_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_inform, field), \
+ .struct_size_bytes = sizeof ((struct ib_sa_inform *) 0)->field, \
+ .field_name = "sa_inform:" #field
+
+static const struct ib_field inform_table[] = {
+ { INFORM_FIELD(gid),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { INFORM_FIELD(lid_range_begin),
+ .offset_words = 4,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { INFORM_FIELD(lid_range_end),
+ .offset_words = 4,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 5,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { INFORM_FIELD(is_generic),
+ .offset_words = 5,
+ .offset_bits = 16,
+ .size_bits = 8 },
+ { INFORM_FIELD(subscribe),
+ .offset_words = 5,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { INFORM_FIELD(type),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { INFORM_FIELD(trap.generic.trap_num),
+ .offset_words = 6,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { INFORM_FIELD(trap.generic.qpn),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 24 },
+ { RESERVED,
+ .offset_words = 7,
+ .offset_bits = 24,
+ .size_bits = 3 },
+ { INFORM_FIELD(trap.generic.resp_time),
+ .offset_words = 7,
+ .offset_bits = 27,
+ .size_bits = 5 },
+ { RESERVED,
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { INFORM_FIELD(trap.generic.producer_type),
+ .offset_words = 8,
+ .offset_bits = 8,
+ .size_bits = 24 },
+};
+
+#define NOTICE_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_notice, field), \
+ .struct_size_bytes = sizeof ((struct ib_sa_notice *) 0)->field, \
+ .field_name = "sa_notice:" #field
+
+static const struct ib_field notice_table[] = {
+ { NOTICE_FIELD(is_generic),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { NOTICE_FIELD(type),
+ .offset_words = 0,
+ .offset_bits = 1,
+ .size_bits = 7 },
+ { NOTICE_FIELD(trap.generic.producer_type),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 24 },
+ { NOTICE_FIELD(trap.generic.trap_num),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { NOTICE_FIELD(issuer_lid),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { NOTICE_FIELD(notice_toggle),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { NOTICE_FIELD(notice_count),
+ .offset_words = 2,
+ .offset_bits = 1,
+ .size_bits = 15 },
+ { NOTICE_FIELD(data_details),
+ .offset_words = 2,
+ .offset_bits = 16,
+ .size_bits = 432 },
+ { NOTICE_FIELD(issuer_gid),
+ .offset_words = 16,
+ .offset_bits = 0,
+ .size_bits = 128 },
+};
+
+int ib_sa_check_selector(ib_sa_comp_mask comp_mask,
+ ib_sa_comp_mask selector_mask,
+ ib_sa_comp_mask value_mask,
+ u8 selector, u8 src_value, u8 dst_value)
+{
+ int err;
+
+ if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
+ return 0;
+
+ switch (selector) {
+ case IB_SA_GT:
+ err = (src_value <= dst_value);
+ break;
+ case IB_SA_LT:
+ err = (src_value >= dst_value);
+ break;
+ case IB_SA_EQ:
+ err = (src_value != dst_value);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+int ib_sa_pack_attr(void *dst, void *src, int attr_id)
+{
+ switch (attr_id) {
+ case IB_SA_ATTR_PATH_REC:
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int ib_sa_unpack_attr(void *dst, void *src, int attr_id)
+{
+ switch (attr_id) {
+ case IB_SA_ATTR_PATH_REC:
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void free_sm_ah(struct kref *kref)
+{
+ struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
+
+ ib_destroy_ah(sm_ah->ah);
+ kfree(sm_ah);
+}
+
+static void update_sm_ah(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, update_task);
+ struct ib_sa_sm_ah *new_ah;
+ struct ib_port_attr port_attr;
+ struct ib_ah_attr ah_attr;
+
+ if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
+ printk(KERN_WARNING "Couldn't query port\n");
+ return;
+ }
+
+ new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
+ if (!new_ah) {
+ printk(KERN_WARNING "Couldn't allocate new SM AH\n");
+ return;
+ }
+
+ kref_init(&new_ah->ref);
+ new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
+
+ new_ah->pkey_index = 0;
+ if (ib_find_pkey(port->agent->device, port->port_num,
+ IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
+ printk(KERN_ERR "Couldn't find index for default PKey\n");
+
+ memset(&ah_attr, 0, sizeof ah_attr);
+ ah_attr.dlid = port_attr.sm_lid;
+ ah_attr.sl = port_attr.sm_sl;
+ ah_attr.port_num = port->port_num;
+
+ new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
+ if (IS_ERR(new_ah->ah)) {
+ printk(KERN_WARNING "Couldn't create new SM AH\n");
+ kfree(new_ah);
+ return;
+ }
+
+ spin_lock_irq(&port->ah_lock);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = new_ah;
+ spin_unlock_irq(&port->ah_lock);
+
+}
+
+static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
+{
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER) {
+ unsigned long flags;
+ struct ib_sa_device *sa_dev =
+ container_of(handler, typeof(*sa_dev), event_handler);
+ struct ib_sa_port *port =
+ &sa_dev->port[event->element.port_num - sa_dev->start_port];
+
+ if (rdma_port_get_link_layer(handler->device, port->port_num) != IB_LINK_LAYER_INFINIBAND)
+ return;
+
+ spin_lock_irqsave(&port->ah_lock, flags);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = NULL;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ schedule_work(&sa_dev->port[event->element.port_num -
+ sa_dev->start_port].update_task);
+ }
+}
+
+void ib_sa_register_client(struct ib_sa_client *client)
+{
+ atomic_set(&client->users, 1);
+ init_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_register_client);
+
+void ib_sa_unregister_client(struct ib_sa_client *client)
+{
+ ib_sa_client_put(client);
+ wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_unregister_client);
+
+/**
+ * ib_sa_cancel_query - try to cancel an SA query
+ * @id:ID of query to cancel
+ * @query:query pointer to cancel
+ *
+ * Try to cancel an SA query. If the id and query don't match up or
+ * the query has already completed, nothing is done. Otherwise the
+ * query is canceled and will complete with a status of -EINTR.
+ */
+void ib_sa_cancel_query(int id, struct ib_sa_query *query)
+{
+ unsigned long flags;
+ struct ib_mad_agent *agent;
+ struct ib_mad_send_buf *mad_buf;
+
+ spin_lock_irqsave(&idr_lock, flags);
+ if (idr_find(&query_idr, id) != query) {
+ spin_unlock_irqrestore(&idr_lock, flags);
+ return;
+ }
+ agent = query->port->agent;
+ mad_buf = query->mad_buf;
+ spin_unlock_irqrestore(&idr_lock, flags);
+
+ ib_cancel_mad(agent, mad_buf);
+}
+EXPORT_SYMBOL(ib_sa_cancel_query);
+
+static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
+{
+ struct ib_sa_device *sa_dev;
+ struct ib_sa_port *port;
+ unsigned long flags;
+ u8 src_path_mask;
+
+ sa_dev = ib_get_client_data(device, &sa_client);
+ if (!sa_dev)
+ return 0x7f;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ spin_lock_irqsave(&port->ah_lock, flags);
+ src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ return src_path_mask;
+}
+
+int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
+{
+ int ret;
+ u16 gid_index;
+ int force_grh;
+
+ memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->dlid = be16_to_cpu(rec->dlid);
+ ah_attr->sl = rec->sl;
+ ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
+ get_src_path_mask(device, port_num);
+ ah_attr->port_num = port_num;
+ ah_attr->static_rate = rec->rate;
+
+ force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET;
+
+ if (rec->hop_limit > 1 || force_grh) {
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.dgid = rec->dgid;
+
+ ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
+ &gid_index);
+ if (ret)
+ return ret;
+
+ ah_attr->grh.sgid_index = gid_index;
+ ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
+ ah_attr->grh.hop_limit = rec->hop_limit;
+ ah_attr->grh.traffic_class = rec->traffic_class;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_path);
+
+static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&query->port->ah_lock, flags);
+ if (!query->port->sm_ah) {
+ spin_unlock_irqrestore(&query->port->ah_lock, flags);
+ return -EAGAIN;
+ }
+ kref_get(&query->port->sm_ah->ref);
+ query->sm_ah = query->port->sm_ah;
+ spin_unlock_irqrestore(&query->port->ah_lock, flags);
+
+ query->mad_buf = ib_create_send_mad(query->port->agent, 1,
+ query->sm_ah->pkey_index,
+ 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
+ gfp_mask);
+ if (IS_ERR(query->mad_buf)) {
+ kref_put(&query->sm_ah->ref, free_sm_ah);
+ return -ENOMEM;
+ }
+
+ query->mad_buf->ah = query->sm_ah->ah;
+
+ return 0;
+}
+
+static void free_mad(struct ib_sa_query *query)
+{
+ ib_free_send_mad(query->mad_buf);
+ kref_put(&query->sm_ah->ref, free_sm_ah);
+}
+
+static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
+{
+ unsigned long flags;
+
+ memset(mad, 0, sizeof *mad);
+
+ mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+
+ spin_lock_irqsave(&tid_lock, flags);
+ mad->mad_hdr.tid =
+ cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
+ spin_unlock_irqrestore(&tid_lock, flags);
+}
+
+static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
+{
+ unsigned long flags;
+ int ret, id;
+
+retry:
+ if (!idr_pre_get(&query_idr, gfp_mask))
+ return -ENOMEM;
+ spin_lock_irqsave(&idr_lock, flags);
+ ret = idr_get_new(&query_idr, query, &id);
+ spin_unlock_irqrestore(&idr_lock, flags);
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ return ret;
+
+ query->mad_buf->timeout_ms = timeout_ms;
+ query->mad_buf->context[0] = query;
+ query->id = id;
+
+ ret = ib_post_send_mad(query->mad_buf, NULL);
+ if (ret) {
+ spin_lock_irqsave(&idr_lock, flags);
+ idr_remove(&query_idr, id);
+ spin_unlock_irqrestore(&idr_lock, flags);
+ }
+
+ /*
+ * It's not safe to dereference query any more, because the
+ * send may already have completed and freed the query in
+ * another context.
+ */
+ return ret ? ret : id;
+}
+
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+{
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_path);
+
+static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_path_query *query =
+ container_of(sa_query, struct ib_sa_path_query, sa_query);
+
+ if (mad) {
+ struct ib_sa_path_rec rec;
+
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else
+ query->callback(status, NULL, query->context);
+}
+
+static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
+{
+ kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
+}
+
+int ib_sa_path_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_path_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_path_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
+ query = kmalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ query->sa_query.port = port;
+ ret = alloc_mad(&query->sa_query, gfp_mask);
+ if (ret)
+ goto err1;
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
+
+ query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
+ query->sa_query.release = ib_sa_path_rec_release;
+ mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
+
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
+
+ *sa_query = &query->sa_query;
+
+ ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_sa_client_put(query->sa_query.client);
+ free_mad(&query->sa_query);
+
+err1:
+ kfree(query);
+ return ret;
+}
+
+static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_service_query *query =
+ container_of(sa_query, struct ib_sa_service_query, sa_query);
+
+ if (mad) {
+ struct ib_sa_service_rec rec;
+
+ ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else
+ query->callback(status, NULL, query->context);
+}
+
+static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
+{
+ kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
+}
+
+/**
+ * ib_sa_service_rec_query - Start Service Record operation
+ * @client:SA client
+ * @device:device to send request on
+ * @port_num: port number to send request on
+ * @method:SA method - should be get, set, or delete
+ * @rec:Service Record to send in request
+ * @comp_mask:component mask to send in request
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when request completes, times out or is
+ * canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:request context, used to cancel request
+ *
+ * Send a Service Record set/get/delete to the SA to register,
+ * unregister or query a service record.
+ * The callback function will be called when the request completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query. The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_service_rec_query() is negative, it is an
+ * error code. Otherwise it is a request ID that can be used to cancel
+ * the query.
+ */
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num, u8 method,
+ struct ib_sa_service_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_service_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_service_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET &&
+ method != IB_SA_METHOD_DELETE)
+ return -EINVAL;
+
+ query = kmalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ query->sa_query.port = port;
+ ret = alloc_mad(&query->sa_query, gfp_mask);
+ if (ret)
+ goto err1;
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
+
+ query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
+ query->sa_query.release = ib_sa_service_rec_release;
+ mad->mad_hdr.method = method;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
+
+ ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
+ rec, mad->data);
+
+ *sa_query = &query->sa_query;
+
+ ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_sa_client_put(query->sa_query.client);
+ free_mad(&query->sa_query);
+
+err1:
+ kfree(query);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_service_rec_query);
+
+static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_mcmember_query *query =
+ container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
+
+ if (mad) {
+ struct ib_sa_mcmember_rec rec;
+
+ ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else
+ query->callback(status, NULL, query->context);
+}
+
+static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
+{
+ kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
+}
+
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ u8 method,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_mcmember_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_mcmember_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
+ query = kmalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ query->sa_query.port = port;
+ ret = alloc_mad(&query->sa_query, gfp_mask);
+ if (ret)
+ goto err1;
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
+
+ query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
+ query->sa_query.release = ib_sa_mcmember_rec_release;
+ mad->mad_hdr.method = method;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
+
+ ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
+ rec, mad->data);
+
+ *sa_query = &query->sa_query;
+
+ ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_sa_client_put(query->sa_query.client);
+ free_mad(&query->sa_query);
+
+err1:
+ kfree(query);
+ return ret;
+}
+
+static void ib_sa_inform_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_inform_query *query =
+ container_of(sa_query, struct ib_sa_inform_query, sa_query);
+
+ if (mad) {
+ struct ib_sa_inform rec;
+
+ ib_unpack(inform_table, ARRAY_SIZE(inform_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else
+ query->callback(status, NULL, query->context);
+}
+
+static void ib_sa_inform_release(struct ib_sa_query *sa_query)
+{
+ kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query));
+}
+
+int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_guidinfo_rec *rec,
+ ib_sa_comp_mask comp_mask, u8 method,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_guidinfo_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ // stub function -
+ // called originally from mad.c under mlx4_ib_init_sriov()
+ // which calls mlx4_ib_init_alias_guid_service() in alias_GUID.c
+ // which goes down to this function
+
+ printk("ERROR: function should be called only in SRIOV flow!!!");
+
+ return 0;
+}
+
+/**
+ * ib_sa_informinfo_query - Start an InformInfo registration.
+ * @client:SA client
+ * @device:device to send query on
+ * @port_num: port number to send query on
+ * @rec:Inform record to send in query
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when notice handler registration completes,
+ * times out or is canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:query context, used to cancel query
+ *
+ * This function sends inform info to register with SA to receive
+ * in-service notice.
+ * The callback function will be called when the query completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query. The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_inform_query() is negative, it is an
+ * error code. Otherwise it is a query ID that can be used to cancel
+ * the query.
+ */
+int ib_sa_informinfo_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_inform *rec,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_inform *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_inform_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
+ query = kmalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ query->sa_query.port = port;
+ ret = alloc_mad(&query->sa_query, gfp_mask);
+ if (ret)
+ goto err1;
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
+
+ query->sa_query.callback = callback ? ib_sa_inform_callback : NULL;
+ query->sa_query.release = ib_sa_inform_release;
+ query->sa_query.port = port;
+ mad->mad_hdr.method = IB_MGMT_METHOD_SET;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_INFORM_INFO);
+
+ ib_pack(inform_table, ARRAY_SIZE(inform_table), rec, mad->data);
+
+ *sa_query = &query->sa_query;
+ ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_sa_client_put(query->sa_query.client);
+ free_mad(&query->sa_query);
+err1:
+ kfree(query);
+ return ret;
+}
+
+static void ib_sa_notice_resp(struct ib_sa_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_mad_send_buf *mad_buf;
+ struct ib_sa_mad *mad;
+ int ret;
+ unsigned long flags;
+
+ mad_buf = ib_create_send_mad(port->notice_agent, 1, 0, 0,
+ IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
+ GFP_KERNEL);
+ if (IS_ERR(mad_buf))
+ return;
+
+ mad = mad_buf->mad;
+ memcpy(mad, mad_recv_wc->recv_buf.mad, sizeof *mad);
+ mad->mad_hdr.method = IB_MGMT_METHOD_REPORT_RESP;
+
+ spin_lock_irqsave(&port->ah_lock, flags);
+ if (!port->sm_ah) {
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+ ib_free_send_mad(mad_buf);
+ return;
+ }
+ kref_get(&port->sm_ah->ref);
+ mad_buf->context[0] = &port->sm_ah->ref;
+ mad_buf->ah = port->sm_ah->ah;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ ret = ib_post_send_mad(mad_buf, NULL);
+ if (ret)
+ goto err;
+
+ return;
+err:
+ kref_put(mad_buf->context[0], free_sm_ah);
+ ib_free_send_mad(mad_buf);
+}
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
+ unsigned long flags;
+
+ if (query->callback)
+ switch (mad_send_wc->status) {
+ case IB_WC_SUCCESS:
+ /* No callback -- already got recv */
+ break;
+ case IB_WC_RESP_TIMEOUT_ERR:
+ query->callback(query, -ETIMEDOUT, NULL);
+ break;
+ case IB_WC_WR_FLUSH_ERR:
+ query->callback(query, -EINTR, NULL);
+ break;
+ default:
+ query->callback(query, -EIO, NULL);
+ break;
+ }
+
+ spin_lock_irqsave(&idr_lock, flags);
+ idr_remove(&query_idr, query->id);
+ spin_unlock_irqrestore(&idr_lock, flags);
+
+ free_mad(query);
+ ib_sa_client_put(query->client);
+ query->release(query);
+}
+
+static void recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_sa_query *query;
+ struct ib_mad_send_buf *mad_buf;
+
+ mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
+ query = mad_buf->context[0];
+
+ if (query->callback) {
+ if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
+ query->callback(query,
+ mad_recv_wc->recv_buf.mad->mad_hdr.status ?
+ -EINVAL : 0,
+ (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
+ else
+ query->callback(query, -EIO, NULL);
+ }
+
+ ib_free_recv_mad(mad_recv_wc);
+}
+
+static void notice_resp_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ kref_put(mad_send_wc->send_buf->context[0], free_sm_ah);
+ ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+static void notice_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_sa_port *port;
+ struct ib_sa_mad *mad;
+ struct ib_sa_notice notice;
+
+ port = mad_agent->context;
+ mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad;
+ ib_unpack(notice_table, ARRAY_SIZE(notice_table), mad->data, ¬ice);
+
+ if (!notice_dispatch(port->device, port->port_num, ¬ice))
+ ib_sa_notice_resp(port, mad_recv_wc);
+ ib_free_recv_mad(mad_recv_wc);
+}
+
+static void ib_sa_add_one(struct ib_device *device)
+{
+ struct ib_sa_device *sa_dev;
+ struct ib_mad_reg_req reg_req = {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_ADM,
+ .mgmt_class_version = 2
+ };
+ int s, e, i;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ s = e = 0;
+ else {
+ s = 1;
+ e = device->phys_port_cnt;
+ }
+
+ sa_dev = kzalloc(sizeof *sa_dev +
+ (e - s + 1) * sizeof (struct ib_sa_port),
+ GFP_KERNEL);
+ if (!sa_dev)
+ return;
+
+ sa_dev->start_port = s;
+ sa_dev->end_port = e;
+
+ for (i = 0; i <= e - s; ++i) {
+ spin_lock_init(&sa_dev->port[i].ah_lock);
+ if (rdma_port_get_link_layer(device, i + 1) != IB_LINK_LAYER_INFINIBAND)
+ continue;
+
+ sa_dev->port[i].sm_ah = NULL;
+ sa_dev->port[i].port_num = i + s;
+
+ sa_dev->port[i].agent =
+ ib_register_mad_agent(device, i + s, IB_QPT_GSI,
+ NULL, 0, send_handler,
+ recv_handler, sa_dev);
+ if (IS_ERR(sa_dev->port[i].agent))
+ goto err;
+
+ sa_dev->port[i].device = device;
+ set_bit(IB_MGMT_METHOD_REPORT, reg_req.method_mask);
+ sa_dev->port[i].notice_agent =
+ ib_register_mad_agent(device, i + s, IB_QPT_GSI,
+ ®_req, 0, notice_resp_handler,
+ notice_handler, &sa_dev->port[i]);
+
+ if (IS_ERR(sa_dev->port[i].notice_agent))
+ goto err;
+
+ INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
+ }
+
+ ib_set_client_data(device, &sa_client, sa_dev);
+
+ /*
+ * We register our event handler after everything is set up,
+ * and then update our cached info after the event handler is
+ * registered to avoid any problems if a port changes state
+ * during our initialization.
+ */
+
+ INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
+ if (ib_register_event_handler(&sa_dev->event_handler))
+ goto err;
+
+ for (i = 0; i <= e - s; ++i)
+ if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
+ update_sm_ah(&sa_dev->port[i].update_task);
+
+ return;
+
+err:
+ while (--i >= 0)
+ if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) {
+ if (!IS_ERR(sa_dev->port[i].notice_agent))
+ ib_unregister_mad_agent(sa_dev->port[i].notice_agent);
+ if (!IS_ERR(sa_dev->port[i].agent))
+ ib_unregister_mad_agent(sa_dev->port[i].agent);
+ }
+
+ kfree(sa_dev);
+
+ return;
+}
+
+static void ib_sa_remove_one(struct ib_device *device)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ int i;
+
+ if (!sa_dev)
+ return;
+
+ ib_unregister_event_handler(&sa_dev->event_handler);
+
+ flush_scheduled_work();
+
+ for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
+ if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) {
+ ib_unregister_mad_agent(sa_dev->port[i].notice_agent);
+ ib_unregister_mad_agent(sa_dev->port[i].agent);
+ if (sa_dev->port[i].sm_ah)
+ kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
+ }
+
+ }
+
+ kfree(sa_dev);
+}
+
+static int __init ib_sa_init(void)
+{
+ int ret;
+
+ spin_lock_init(&idr_lock);
+ spin_lock_init(&tid_lock);
+
+ get_random_bytes(&tid, sizeof tid);
+
+ ret = ib_register_client(&sa_client);
+ if (ret) {
+ printk(KERN_ERR "Couldn't register ib_sa client\n");
+ goto err1;
+ }
+
+ ret = mcast_init();
+ if (ret) {
+ printk(KERN_ERR "Couldn't initialize multicast handling\n");
+ goto err2;
+ }
+
+ ret = notice_init();
+ if (ret) {
+ printk(KERN_ERR "Couldn't initialize notice handling\n");
+ goto err3;
+ }
+
+ ret = sa_db_init();
+ if (ret) {
+ printk(KERN_ERR "Couldn't initialize local SA\n");
+ goto err4;
+ }
+
+ return 0;
+err4:
+ notice_cleanup();
+err3:
+ mcast_cleanup();
+err2:
+ ib_unregister_client(&sa_client);
+err1:
+ return ret;
+}
+
+static void __exit ib_sa_cleanup(void)
+{
+ sa_db_cleanup();
+ mcast_cleanup();
+ notice_cleanup();
+ ib_unregister_client(&sa_client);
+ idr_destroy(&query_idr);
+}
+
+module_init_order(ib_sa_init, SI_ORDER_SECOND);
+module_exit(ib_sa_cleanup);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/sa_query.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/smi.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/smi.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/smi.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <rdma/ib_smi.h>
+#include "smi.h"
+
+/*
+ * Fixup a directed route SMP for sending
+ * Return 0 if the SMP should be discarded
+ */
+enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
+ u8 node_type, int port_num)
+{
+ u8 hop_ptr, hop_cnt;
+
+ hop_ptr = smp->hop_ptr;
+ hop_cnt = smp->hop_cnt;
+
+ /* See section 14.2.2.2, Vol 1 IB spec */
+ if (!ib_get_smp_direction(smp)) {
+ /* C14-9:1 */
+ if (hop_cnt && hop_ptr == 0) {
+ smp->hop_ptr++;
+ return (smp->initial_path[smp->hop_ptr] ==
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-9:2 */
+ if (hop_ptr && hop_ptr < hop_cnt) {
+ if (node_type != RDMA_NODE_IB_SWITCH)
+ return IB_SMI_DISCARD;
+
+ /* smp->return_path set when received */
+ smp->hop_ptr++;
+ return (smp->initial_path[smp->hop_ptr] ==
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-9:3 -- We're at the end of the DR segment of path */
+ if (hop_ptr == hop_cnt) {
+ /* smp->return_path set when received */
+ smp->hop_ptr++;
+ return (node_type == RDMA_NODE_IB_SWITCH ||
+ smp->dr_dlid == IB_LID_PERMISSIVE ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
+ /* C14-9:5 -- Fail unreasonable hop pointer */
+ return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+
+ } else {
+ /* C14-13:1 */
+ if (hop_cnt && hop_ptr == hop_cnt + 1) {
+ smp->hop_ptr--;
+ return (smp->return_path[smp->hop_ptr] ==
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-13:2 */
+ if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
+ if (node_type != RDMA_NODE_IB_SWITCH)
+ return IB_SMI_DISCARD;
+
+ smp->hop_ptr--;
+ return (smp->return_path[smp->hop_ptr] ==
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-13:3 -- at the end of the DR segment of path */
+ if (hop_ptr == 1) {
+ smp->hop_ptr--;
+ /* C14-13:3 -- SMPs destined for SM shouldn't be here */
+ return (node_type == RDMA_NODE_IB_SWITCH ||
+ smp->dr_slid == IB_LID_PERMISSIVE ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */
+ if (hop_ptr == 0)
+ return IB_SMI_HANDLE;
+
+ /* C14-13:5 -- Check for unreasonable hop pointer */
+ return IB_SMI_DISCARD;
+ }
+}
+
+/*
+ * Adjust information for a received SMP
+ * Return 0 if the SMP should be dropped
+ */
+enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
+ int port_num, int phys_port_cnt)
+{
+ u8 hop_ptr, hop_cnt;
+
+ hop_ptr = smp->hop_ptr;
+ hop_cnt = smp->hop_cnt;
+
+ /* See section 14.2.2.2, Vol 1 IB spec */
+ if (!ib_get_smp_direction(smp)) {
+ /* C14-9:1 -- sender should have incremented hop_ptr */
+ if (hop_cnt && hop_ptr == 0)
+ return IB_SMI_DISCARD;
+
+ /* C14-9:2 -- intermediate hop */
+ if (hop_ptr && hop_ptr < hop_cnt) {
+ if (node_type != RDMA_NODE_IB_SWITCH)
+ return IB_SMI_DISCARD;
+
+ smp->return_path[hop_ptr] = port_num;
+ /* smp->hop_ptr updated when sending */
+ return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-9:3 -- We're at the end of the DR segment of path */
+ if (hop_ptr == hop_cnt) {
+ if (hop_cnt)
+ smp->return_path[hop_ptr] = port_num;
+ /* smp->hop_ptr updated when sending */
+
+ return (node_type == RDMA_NODE_IB_SWITCH ||
+ smp->dr_dlid == IB_LID_PERMISSIVE ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
+ /* C14-9:5 -- fail unreasonable hop pointer */
+ return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+
+ } else {
+
+ /* C14-13:1 */
+ if (hop_cnt && hop_ptr == hop_cnt + 1) {
+ smp->hop_ptr--;
+ return (smp->return_path[smp->hop_ptr] ==
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-13:2 */
+ if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
+ if (node_type != RDMA_NODE_IB_SWITCH)
+ return IB_SMI_DISCARD;
+
+ /* smp->hop_ptr updated when sending */
+ return (smp->return_path[hop_ptr-1] <= phys_port_cnt ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-13:3 -- We're at the end of the DR segment of path */
+ if (hop_ptr == 1) {
+ if (smp->dr_slid == IB_LID_PERMISSIVE) {
+ /* giving SMP to SM - update hop_ptr */
+ smp->hop_ptr--;
+ return IB_SMI_HANDLE;
+ }
+ /* smp->hop_ptr updated when sending */
+ return (node_type == RDMA_NODE_IB_SWITCH ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+
+ /* C14-13:4 -- hop_ptr = 0 -> give to SM */
+ /* C14-13:5 -- Check for unreasonable hop pointer */
+ return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+ }
+}
+
+enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
+{
+ u8 hop_ptr, hop_cnt;
+
+ hop_ptr = smp->hop_ptr;
+ hop_cnt = smp->hop_cnt;
+
+ if (!ib_get_smp_direction(smp)) {
+ /* C14-9:2 -- intermediate hop */
+ if (hop_ptr && hop_ptr < hop_cnt)
+ return IB_SMI_FORWARD;
+
+ /* C14-9:3 -- at the end of the DR segment of path */
+ if (hop_ptr == hop_cnt)
+ return (smp->dr_dlid == IB_LID_PERMISSIVE ?
+ IB_SMI_SEND : IB_SMI_LOCAL);
+
+ /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
+ if (hop_ptr == hop_cnt + 1)
+ return IB_SMI_SEND;
+ } else {
+ /* C14-13:2 -- intermediate hop */
+ if (2 <= hop_ptr && hop_ptr <= hop_cnt)
+ return IB_SMI_FORWARD;
+
+ /* C14-13:3 -- at the end of the DR segment of path */
+ if (hop_ptr == 1)
+ return (smp->dr_slid != IB_LID_PERMISSIVE ?
+ IB_SMI_SEND : IB_SMI_LOCAL);
+ }
+ return IB_SMI_LOCAL;
+}
+
+/*
+ * Return the forwarding port number from initial_path for outgoing SMP and
+ * from return_path for returning SMP
+ */
+int smi_get_fwd_port(struct ib_smp *smp)
+{
+ return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] :
+ smp->return_path[smp->hop_ptr-1]);
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/core/smi.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/smi.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/smi.h (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/smi.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __SMI_H_
+#define __SMI_H_
+
+#include <rdma/ib_smi.h>
+
+enum smi_action {
+ IB_SMI_DISCARD,
+ IB_SMI_HANDLE
+};
+
+enum smi_forward_action {
+ IB_SMI_LOCAL, /* SMP should be completed up the stack */
+ IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */
+ IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */
+};
+
+enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
+ int port_num, int phys_port_cnt);
+int smi_get_fwd_port(struct ib_smp *smp);
+extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
+extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
+ u8 node_type, int port_num);
+
+/*
+ * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
+ * via process_mad
+ */
+static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
+ struct ib_device *device)
+{
+ /* C14-9:3 -- We're at the end of the DR segment of path */
+ /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
+ return ((device->process_mad &&
+ !ib_get_smp_direction(smp) &&
+ (smp->hop_ptr == smp->hop_cnt + 1)) ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
+}
+
+/*
+ * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
+ * via process_mad
+ */
+static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp,
+ struct ib_device *device)
+{
+ /* C14-13:3 -- We're at the end of the DR segment of path */
+ /* C14-13:4 -- Hop Pointer == 0 -> give to SM */
+ return ((device->process_mad &&
+ ib_get_smp_direction(smp) &&
+ !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+}
+
+#endif /* __SMI_H_ */
Property changes on: trunk/sys/ofed/drivers/infiniband/core/smi.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/sysfs.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/sysfs.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/sysfs.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,981 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "core_priv.h"
+
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
+
+struct ib_port {
+ struct kobject kobj;
+ struct ib_device *ibdev;
+ struct attribute_group gid_group;
+ struct attribute_group pkey_group;
+ u8 port_num;
+};
+
+struct port_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf);
+ ssize_t (*store)(struct ib_port *, struct port_attribute *,
+ const char *buf, size_t count);
+};
+
+#define PORT_ATTR(_name, _mode, _show, _store) \
+struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store)
+
+#define PORT_ATTR_RO(_name) \
+struct port_attribute port_attr_##_name = __ATTR_RO(_name)
+
+struct port_table_attribute {
+ struct port_attribute attr;
+ char name[8];
+ int index;
+};
+
+static ssize_t port_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct ib_port *p = container_of(kobj, struct ib_port, kobj);
+
+ if (!port_attr->show)
+ return -EIO;
+
+ return port_attr->show(p, port_attr, buf);
+}
+
+static const struct sysfs_ops port_sysfs_ops = {
+ .show = port_attr_show
+};
+
+static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ static const char *state_name[] = {
+ [IB_PORT_NOP] = "NOP",
+ [IB_PORT_DOWN] = "DOWN",
+ [IB_PORT_INIT] = "INIT",
+ [IB_PORT_ARMED] = "ARMED",
+ [IB_PORT_ACTIVE] = "ACTIVE",
+ [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
+ };
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%d: %s\n", attr.state,
+ attr.state < ARRAY_SIZE(state_name) ?
+ state_name[attr.state] : "UNKNOWN");
+}
+
+static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%x\n", attr.lid);
+}
+
+static ssize_t lid_mask_count_show(struct ib_port *p,
+ struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%d\n", attr.lmc);
+}
+
+static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%x\n", attr.sm_lid);
+}
+
+static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%d\n", attr.sm_sl);
+}
+
+static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%08x\n", attr.port_cap_flags);
+}
+
+static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ char *speed = "";
+ int rate;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ switch (attr.active_speed) {
+ case 2: speed = " DDR"; break;
+ case 4: speed = " QDR"; break;
+ }
+
+ rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
+ if (rate < 0)
+ return -EINVAL;
+
+ return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
+ rate / 10, rate % 10 ? ".5" : "",
+ ib_width_enum_to_int(attr.active_width), speed);
+}
+
+static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ switch (attr.phys_state) {
+ case 1: return sprintf(buf, "1: Sleep\n");
+ case 2: return sprintf(buf, "2: Polling\n");
+ case 3: return sprintf(buf, "3: Disabled\n");
+ case 4: return sprintf(buf, "4: PortConfigurationTraining\n");
+ case 5: return sprintf(buf, "5: LinkUp\n");
+ case 6: return sprintf(buf, "6: LinkErrorRecovery\n");
+ case 7: return sprintf(buf, "7: Phy Test\n");
+ default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
+ }
+}
+
+static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ return sprintf(buf, "%s\n", "IB");
+ case IB_LINK_LAYER_ETHERNET:
+ return sprintf(buf, "%s\n", "Ethernet");
+ default:
+ return sprintf(buf, "%s\n", "Unknown");
+ }
+}
+
+static PORT_ATTR_RO(state);
+static PORT_ATTR_RO(lid);
+static PORT_ATTR_RO(lid_mask_count);
+static PORT_ATTR_RO(sm_lid);
+static PORT_ATTR_RO(sm_sl);
+static PORT_ATTR_RO(cap_mask);
+static PORT_ATTR_RO(rate);
+static PORT_ATTR_RO(phys_state);
+static PORT_ATTR_RO(link_layer);
+
+static struct attribute *port_default_attrs[] = {
+ &port_attr_state.attr,
+ &port_attr_lid.attr,
+ &port_attr_lid_mask_count.attr,
+ &port_attr_sm_lid.attr,
+ &port_attr_sm_sl.attr,
+ &port_attr_cap_mask.attr,
+ &port_attr_rate.attr,
+ &port_attr_phys_state.attr,
+ &port_attr_link_layer.attr,
+ NULL
+};
+
+static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ union ib_gid gid;
+ ssize_t ret;
+ u16 *raw;
+
+ ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
+ if (ret)
+ return ret;
+
+ raw = (u16 *)gid.raw;
+ return sprintf(buf, "%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x\n",
+ htons(raw[0]), htons(raw[1]), htons(raw[2]), htons(raw[3]),
+ htons(raw[4]), htons(raw[5]), htons(raw[6]), htons(raw[7]));
+}
+
+static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ u16 pkey;
+ ssize_t ret;
+
+ ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%04x\n", pkey);
+}
+
+static ssize_t get_pma_counters(struct ib_port *p, struct port_attribute *attr,
+ char *buf, int c_ext)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int offset = tab_attr->index & 0xffff;
+ int width = (tab_attr->index >> 16) & 0xff;
+ struct ib_mad *in_mad = NULL;
+ struct ib_mad *out_mad = NULL;
+ ssize_t ret;
+
+ if (!p->ibdev->process_mad)
+ return -ENXIO;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ in_mad->mad_hdr.base_version = 1;
+ in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
+ in_mad->mad_hdr.class_version = 1;
+ in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ if (c_ext)
+ in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS_EXT;
+ else
+ in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS;
+
+ in_mad->data[41] = p->port_num; /* PortSelect field */
+
+ if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
+ p->port_num, NULL, NULL, in_mad, out_mad) &
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ switch (width) {
+ case 4:
+ ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+ (4 - (offset % 8))) & 0xf);
+ break;
+ case 8:
+ ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+ break;
+ case 16:
+ ret = sprintf(buf, "%u\n",
+ be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ case 32:
+ ret = sprintf(buf, "%u\n",
+ be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ case 64:
+ ret = sprintf(buf, "%llu\n", (unsigned long long)
+ be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ default:
+ ret = 0;
+ }
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+
+ return ret;
+}
+
+#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
+struct port_table_attribute port_pma_attr_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+}
+
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ return get_pma_counters(p, attr, buf, 0);
+}
+
+static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
+static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
+static PORT_PMA_ATTR(link_downed , 2, 8, 56);
+static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
+static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80);
+static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96);
+static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
+static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128);
+static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
+static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152);
+static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156);
+static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
+static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
+static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
+static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
+static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
+
+static struct attribute *pma_attrs[] = {
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_port_xmit_data.attr.attr,
+ &port_pma_attr_port_rcv_data.attr.attr,
+ &port_pma_attr_port_xmit_packets.attr.attr,
+ &port_pma_attr_port_rcv_packets.attr.attr,
+ NULL
+};
+
+static struct attribute_group pma_group = {
+ .name = "counters",
+ .attrs = pma_attrs
+};
+
+#define PORT_PMA_ATTR_EXT(_name, _counter, _width, _offset) \
+struct port_table_attribute port_pma_attr_ext_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter_ext, NULL), \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+}
+
+static ssize_t show_pma_counter_ext(struct ib_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return get_pma_counters(p, attr, buf, 1);
+}
+
+static PORT_PMA_ATTR_EXT(port_xmit_data_64 , 0, 64, 64);
+static PORT_PMA_ATTR_EXT(port_rcv_data_64 , 0, 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets_64 , 0, 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets_64 , 0, 64, 256);
+static PORT_PMA_ATTR_EXT(port_unicast_xmit_packets , 0, 64, 320);
+static PORT_PMA_ATTR_EXT(port_unicast_rcv_packets , 0, 64, 384);
+static PORT_PMA_ATTR_EXT(port_multicast_xmit_packets , 0, 64, 448);
+static PORT_PMA_ATTR_EXT(port_multicast_rcv_packets , 0, 64, 512);
+
+static struct attribute *pma_attrs_ext[] = {
+ &port_pma_attr_ext_port_xmit_data_64.attr.attr,
+ &port_pma_attr_ext_port_rcv_data_64.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets_64.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets_64.attr.attr,
+ &port_pma_attr_ext_port_unicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_unicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_port_multicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_multicast_rcv_packets.attr.attr,
+ NULL
+};
+
+static struct attribute_group pma_ext_group = {
+ .name = "counters_ext",
+ .attrs = pma_attrs_ext
+};
+
+static void ib_port_release(struct kobject *kobj)
+{
+ struct ib_port *p = container_of(kobj, struct ib_port, kobj);
+ struct attribute *a;
+ int i;
+
+ for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(p->gid_group.attrs);
+
+ for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(p->pkey_group.attrs);
+
+ kfree(p);
+}
+
+static struct kobj_type port_type = {
+ .release = ib_port_release,
+ .sysfs_ops = &port_sysfs_ops,
+ .default_attrs = port_default_attrs
+};
+
+static void ib_device_release(struct device *device)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ kfree(dev);
+}
+
+#ifdef __linux__
+/* BSD supports this through devfs(5) and devd(8). */
+static int ib_device_uevent(struct device *device,
+ struct kobj_uevent_env *env)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ if (add_uevent_var(env, "NAME=%s", dev->name))
+ return -ENOMEM;
+
+ /*
+ * It would be nice to pass the node GUID with the event...
+ */
+
+ return 0;
+}
+#endif
+
+static struct attribute **
+alloc_group_attrs(ssize_t (*show)(struct ib_port *,
+ struct port_attribute *, char *buf),
+ int len)
+{
+ struct attribute **tab_attr;
+ struct port_table_attribute *element;
+ int i;
+
+ tab_attr = kcalloc(1 + len, sizeof(struct attribute *), GFP_KERNEL);
+ if (!tab_attr)
+ return NULL;
+
+ for (i = 0; i < len; i++) {
+ element = kzalloc(sizeof(struct port_table_attribute),
+ GFP_KERNEL);
+ if (!element)
+ goto err;
+
+ if (snprintf(element->name, sizeof(element->name),
+ "%d", i) >= sizeof(element->name)) {
+ kfree(element);
+ goto err;
+ }
+
+ element->attr.attr.name = element->name;
+ element->attr.attr.mode = S_IRUGO;
+ element->attr.show = show;
+ element->index = i;
+
+ tab_attr[i] = &element->attr.attr;
+ }
+
+ return tab_attr;
+
+err:
+ while (--i >= 0)
+ kfree(tab_attr[i]);
+ kfree(tab_attr);
+ return NULL;
+}
+
+static int add_port(struct ib_device *device, int port_num,
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *))
+{
+ struct ib_port *p;
+ struct ib_port_attr attr;
+ int i;
+ int ret;
+
+ ret = ib_query_port(device, port_num, &attr);
+ if (ret)
+ return ret;
+
+ p = kzalloc(sizeof *p, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->ibdev = device;
+ p->port_num = port_num;
+
+ ret = kobject_init_and_add(&p->kobj, &port_type,
+ kobject_get(device->ports_parent),
+ "%d", port_num);
+ if (ret)
+ goto err_put;
+
+ ret = sysfs_create_group(&p->kobj, &pma_group);
+ if (ret)
+ goto err_put;
+
+ ret = sysfs_create_group(&p->kobj, &pma_ext_group);
+ if (ret)
+ goto err_remove_pma;
+
+ p->gid_group.name = "gids";
+ p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
+ if (!p->gid_group.attrs)
+ goto err_remove_pma_ext;
+
+ ret = sysfs_create_group(&p->kobj, &p->gid_group);
+ if (ret)
+ goto err_free_gid;
+
+ p->pkey_group.name = "pkeys";
+ p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
+ attr.pkey_tbl_len);
+ if (!p->pkey_group.attrs)
+ goto err_remove_gid;
+
+ ret = sysfs_create_group(&p->kobj, &p->pkey_group);
+ if (ret)
+ goto err_free_pkey;
+
+ if (port_callback) {
+ ret = port_callback(device, port_num, &p->kobj);
+ if (ret)
+ goto err_remove_pkey;
+ }
+
+ list_add_tail(&p->kobj.entry, &device->port_list);
+
+#ifdef __linux__
+ kobject_uevent(&p->kobj, KOBJ_ADD);
+#endif
+ return 0;
+
+err_remove_pkey:
+ sysfs_remove_group(&p->kobj, &p->pkey_group);
+
+err_free_pkey:
+ for (i = 0; i < attr.pkey_tbl_len; ++i)
+ kfree(p->pkey_group.attrs[i]);
+
+ kfree(p->pkey_group.attrs);
+
+err_remove_gid:
+ sysfs_remove_group(&p->kobj, &p->gid_group);
+
+err_free_gid:
+ for (i = 0; i < attr.gid_tbl_len; ++i)
+ kfree(p->gid_group.attrs[i]);
+
+ kfree(p->gid_group.attrs);
+
+err_remove_pma_ext:
+ sysfs_remove_group(&p->kobj, &pma_ext_group);
+
+err_remove_pma:
+ sysfs_remove_group(&p->kobj, &pma_group);
+
+err_put:
+ kobject_put(device->ports_parent);
+ kfree(p);
+ return ret;
+}
+
+static ssize_t show_node_type(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ switch (dev->node_type) {
+ case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
+ case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
+ case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+ case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+ default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+ }
+}
+
+static ssize_t show_sys_image_guid(struct device *device,
+ struct device_attribute *dev_attr, char *buf)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+ struct ib_device_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_device(dev, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
+ be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
+ be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
+ be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
+}
+
+static ssize_t show_node_guid(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ return sprintf(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(((__be16 *) &dev->node_guid)[0]),
+ be16_to_cpu(((__be16 *) &dev->node_guid)[1]),
+ be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
+ be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
+}
+
+static ssize_t show_node_desc(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ return sprintf(buf, "%.64s\n", dev->node_desc);
+}
+
+static ssize_t set_node_desc(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+ struct ib_device_modify desc = {};
+ int ret;
+
+ if (!dev->modify_device)
+ return -EIO;
+
+ memcpy(desc.node_desc, buf, min_t(int, count, 64));
+ ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
+static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
+static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
+static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
+
+static struct device_attribute *ib_class_attributes[] = {
+ &dev_attr_node_type,
+ &dev_attr_sys_image_guid,
+ &dev_attr_node_guid,
+ &dev_attr_node_desc
+};
+
+static struct class ib_class = {
+ .name = "infiniband",
+ .dev_release = ib_device_release,
+#ifdef __linux__
+ .dev_uevent = ib_device_uevent,
+#endif
+};
+
+/* Show a given an attribute in the statistics group */
+static ssize_t show_protocol_stat(const struct device *device,
+ struct device_attribute *attr, char *buf,
+ unsigned offset)
+{
+ struct ib_device *dev = container_of(__DECONST(struct device *, device), struct ib_device, dev);
+ union rdma_protocol_stats stats;
+ ssize_t ret;
+
+ ret = dev->get_protocol_stats(dev, &stats);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%llu\n",
+ (unsigned long long) ((u64 *) &stats)[offset]);
+}
+
+/* generate a read-only iwarp statistics attribute */
+#define IW_STATS_ENTRY(name) \
+static ssize_t show_##name(struct device *device, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ return show_protocol_stat(device, attr, buf, \
+ offsetof(struct iw_protocol_stats, name) / \
+ sizeof (u64)); \
+} \
+static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+IW_STATS_ENTRY(ipInReceives);
+IW_STATS_ENTRY(ipInHdrErrors);
+IW_STATS_ENTRY(ipInTooBigErrors);
+IW_STATS_ENTRY(ipInNoRoutes);
+IW_STATS_ENTRY(ipInAddrErrors);
+IW_STATS_ENTRY(ipInUnknownProtos);
+IW_STATS_ENTRY(ipInTruncatedPkts);
+IW_STATS_ENTRY(ipInDiscards);
+IW_STATS_ENTRY(ipInDelivers);
+IW_STATS_ENTRY(ipOutForwDatagrams);
+IW_STATS_ENTRY(ipOutRequests);
+IW_STATS_ENTRY(ipOutDiscards);
+IW_STATS_ENTRY(ipOutNoRoutes);
+IW_STATS_ENTRY(ipReasmTimeout);
+IW_STATS_ENTRY(ipReasmReqds);
+IW_STATS_ENTRY(ipReasmOKs);
+IW_STATS_ENTRY(ipReasmFails);
+IW_STATS_ENTRY(ipFragOKs);
+IW_STATS_ENTRY(ipFragFails);
+IW_STATS_ENTRY(ipFragCreates);
+IW_STATS_ENTRY(ipInMcastPkts);
+IW_STATS_ENTRY(ipOutMcastPkts);
+IW_STATS_ENTRY(ipInBcastPkts);
+IW_STATS_ENTRY(ipOutBcastPkts);
+IW_STATS_ENTRY(tcpRtoAlgorithm);
+IW_STATS_ENTRY(tcpRtoMin);
+IW_STATS_ENTRY(tcpRtoMax);
+IW_STATS_ENTRY(tcpMaxConn);
+IW_STATS_ENTRY(tcpActiveOpens);
+IW_STATS_ENTRY(tcpPassiveOpens);
+IW_STATS_ENTRY(tcpAttemptFails);
+IW_STATS_ENTRY(tcpEstabResets);
+IW_STATS_ENTRY(tcpCurrEstab);
+IW_STATS_ENTRY(tcpInSegs);
+IW_STATS_ENTRY(tcpOutSegs);
+IW_STATS_ENTRY(tcpRetransSegs);
+IW_STATS_ENTRY(tcpInErrs);
+IW_STATS_ENTRY(tcpOutRsts);
+
+static struct attribute *iw_proto_stats_attrs[] = {
+ &dev_attr_ipInReceives.attr,
+ &dev_attr_ipInHdrErrors.attr,
+ &dev_attr_ipInTooBigErrors.attr,
+ &dev_attr_ipInNoRoutes.attr,
+ &dev_attr_ipInAddrErrors.attr,
+ &dev_attr_ipInUnknownProtos.attr,
+ &dev_attr_ipInTruncatedPkts.attr,
+ &dev_attr_ipInDiscards.attr,
+ &dev_attr_ipInDelivers.attr,
+ &dev_attr_ipOutForwDatagrams.attr,
+ &dev_attr_ipOutRequests.attr,
+ &dev_attr_ipOutDiscards.attr,
+ &dev_attr_ipOutNoRoutes.attr,
+ &dev_attr_ipReasmTimeout.attr,
+ &dev_attr_ipReasmReqds.attr,
+ &dev_attr_ipReasmOKs.attr,
+ &dev_attr_ipReasmFails.attr,
+ &dev_attr_ipFragOKs.attr,
+ &dev_attr_ipFragFails.attr,
+ &dev_attr_ipFragCreates.attr,
+ &dev_attr_ipInMcastPkts.attr,
+ &dev_attr_ipOutMcastPkts.attr,
+ &dev_attr_ipInBcastPkts.attr,
+ &dev_attr_ipOutBcastPkts.attr,
+ &dev_attr_tcpRtoAlgorithm.attr,
+ &dev_attr_tcpRtoMin.attr,
+ &dev_attr_tcpRtoMax.attr,
+ &dev_attr_tcpMaxConn.attr,
+ &dev_attr_tcpActiveOpens.attr,
+ &dev_attr_tcpPassiveOpens.attr,
+ &dev_attr_tcpAttemptFails.attr,
+ &dev_attr_tcpEstabResets.attr,
+ &dev_attr_tcpCurrEstab.attr,
+ &dev_attr_tcpInSegs.attr,
+ &dev_attr_tcpOutSegs.attr,
+ &dev_attr_tcpRetransSegs.attr,
+ &dev_attr_tcpInErrs.attr,
+ &dev_attr_tcpOutRsts.attr,
+ NULL
+};
+
+static struct attribute_group iw_stats_group = {
+ .name = "proto_stats",
+ .attrs = iw_proto_stats_attrs,
+};
+
+int ib_device_register_sysfs(struct ib_device *device,
+ int (*port_callback)(struct ib_device *, u8, struct kobject *))
+{
+ struct device *class_dev = &device->dev;
+ int ret;
+ int i;
+
+ class_dev->class = &ib_class;
+ class_dev->parent = device->dma_device;
+ dev_set_name(class_dev, device->name);
+ dev_set_drvdata(class_dev, device);
+
+ INIT_LIST_HEAD(&device->port_list);
+
+ ret = device_register(class_dev);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
+ ret = device_create_file(class_dev, ib_class_attributes[i]);
+ if (ret)
+ goto err_unregister;
+ }
+
+ device->ports_parent = kobject_create_and_add("ports",
+ kobject_get(&class_dev->kobj));
+ if (!device->ports_parent) {
+ ret = -ENOMEM;
+ goto err_put;
+ }
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH) {
+ ret = add_port(device, 0, port_callback);
+ if (ret)
+ goto err_put;
+ } else {
+ for (i = 1; i <= device->phys_port_cnt; ++i) {
+ ret = add_port(device, i, port_callback);
+ if (ret)
+ goto err_put;
+ }
+ }
+
+ if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) {
+ ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group);
+ if (ret)
+ goto err_put;
+ }
+
+ return 0;
+
+err_put:
+ {
+ struct kobject *p, *t;
+ struct ib_port *port;
+
+ list_for_each_entry_safe(p, t, &device->port_list, entry) {
+ list_del(&p->entry);
+ port = container_of(p, struct ib_port, kobj);
+ sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ kobject_put(p);
+ }
+ }
+
+ kobject_put(&class_dev->kobj);
+
+err_unregister:
+ device_unregister(class_dev);
+
+err:
+ return ret;
+}
+
+void ib_device_unregister_sysfs(struct ib_device *device)
+{
+ struct kobject *p, *t;
+ struct ib_port *port;
+ int i;
+
+ /* Hold kobject until ib_dealloc_device() */
+ kobject_get(&device->dev.kobj);
+
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
+ device_remove_file(&device->dev, ib_class_attributes[i]);
+ }
+
+ list_for_each_entry_safe(p, t, &device->port_list, entry) {
+ list_del(&p->entry);
+ port = container_of(p, struct ib_port, kobj);
+ sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ kobject_put(p);
+ }
+
+ kobject_put(device->ports_parent);
+ device_unregister(&device->dev);
+}
+
+int ib_sysfs_setup(void)
+{
+ return class_register(&ib_class);
+}
+
+void ib_sysfs_cleanup(void)
+{
+ class_unregister(&ib_class);
+}
+
+/*int ib_sysfs_create_port_files(struct ib_device *device,
+ int (*create)(struct ib_device *dev, u8 port_num,
+ struct kobject *kobj))
+{
+ struct kobject *p;
+ struct ib_port *port;
+ int ret = 0;
+
+ list_for_each_entry(p, &device->port_list, entry) {
+ port = container_of(p, struct ib_port, kobj);
+ ret = create(device, port->port_num, &port->kobj);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_sysfs_create_port_files);*/
Property changes on: trunk/sys/ofed/drivers/infiniband/core/sysfs.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/ucm.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/ucm.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/ucm.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,1344 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+#include <linux/idr.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+
+#include <asm/uaccess.h>
+
+#include <rdma/ib_cm.h>
+#include <rdma/ib_user_cm.h>
+#include <rdma/ib_marshall.h>
+
+MODULE_AUTHOR("Libor Michalek");
+MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
+MODULE_LICENSE("Dual BSD/GPL");
+
+struct ib_ucm_device {
+ int devnum;
+ struct cdev cdev;
+ struct device dev;
+ struct ib_device *ib_dev;
+};
+
+struct ib_ucm_file {
+ struct mutex file_mutex;
+ struct file *filp;
+ struct ib_ucm_device *device;
+
+ struct list_head ctxs;
+ struct list_head events;
+ wait_queue_head_t poll_wait;
+};
+
+struct ib_ucm_context {
+ int id;
+ struct completion comp;
+ atomic_t ref;
+ int events_reported;
+
+ struct ib_ucm_file *file;
+ struct ib_cm_id *cm_id;
+ __u64 uid;
+
+ struct list_head events; /* list of pending events. */
+ struct list_head file_list; /* member in file ctx list */
+};
+
+struct ib_ucm_event {
+ struct ib_ucm_context *ctx;
+ struct list_head file_list; /* member in file event list */
+ struct list_head ctx_list; /* member in ctx event list */
+
+ struct ib_cm_id *cm_id;
+ struct ib_ucm_event_resp resp;
+ void *data;
+ void *info;
+ int data_len;
+ int info_len;
+};
+
+enum {
+ IB_UCM_MAJOR = 231,
+ IB_UCM_BASE_MINOR = 224,
+ IB_UCM_MAX_DEVICES = 32
+};
+
+#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
+
+static void ib_ucm_add_one(struct ib_device *device);
+static void ib_ucm_remove_one(struct ib_device *device);
+
+static struct ib_client ucm_client = {
+ .name = "ucm",
+ .add = ib_ucm_add_one,
+ .remove = ib_ucm_remove_one
+};
+
+static DEFINE_MUTEX(ctx_id_mutex);
+static DEFINE_IDR(ctx_id_table);
+static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
+
+static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
+{
+ struct ib_ucm_context *ctx;
+
+ mutex_lock(&ctx_id_mutex);
+ ctx = idr_find(&ctx_id_table, id);
+ if (!ctx)
+ ctx = ERR_PTR(-ENOENT);
+ else if (ctx->file != file)
+ ctx = ERR_PTR(-EINVAL);
+ else
+ atomic_inc(&ctx->ref);
+ mutex_unlock(&ctx_id_mutex);
+
+ return ctx;
+}
+
+static void ib_ucm_ctx_put(struct ib_ucm_context *ctx)
+{
+ if (atomic_dec_and_test(&ctx->ref))
+ complete(&ctx->comp);
+}
+
+static inline int ib_ucm_new_cm_id(int event)
+{
+ return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED;
+}
+
+static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
+{
+ struct ib_ucm_event *uevent;
+
+ mutex_lock(&ctx->file->file_mutex);
+ list_del(&ctx->file_list);
+ while (!list_empty(&ctx->events)) {
+
+ uevent = list_entry(ctx->events.next,
+ struct ib_ucm_event, ctx_list);
+ list_del(&uevent->file_list);
+ list_del(&uevent->ctx_list);
+ mutex_unlock(&ctx->file->file_mutex);
+
+ /* clear incoming connections. */
+ if (ib_ucm_new_cm_id(uevent->resp.event))
+ ib_destroy_cm_id(uevent->cm_id);
+
+ kfree(uevent);
+ mutex_lock(&ctx->file->file_mutex);
+ }
+ mutex_unlock(&ctx->file->file_mutex);
+}
+
+static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
+{
+ struct ib_ucm_context *ctx;
+ int result;
+
+ ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+ if (!ctx)
+ return NULL;
+
+ atomic_set(&ctx->ref, 1);
+ init_completion(&ctx->comp);
+ ctx->file = file;
+ INIT_LIST_HEAD(&ctx->events);
+
+ do {
+ result = idr_pre_get(&ctx_id_table, GFP_KERNEL);
+ if (!result)
+ goto error;
+
+ mutex_lock(&ctx_id_mutex);
+ result = idr_get_new(&ctx_id_table, ctx, &ctx->id);
+ mutex_unlock(&ctx_id_mutex);
+ } while (result == -EAGAIN);
+
+ if (result)
+ goto error;
+
+ list_add_tail(&ctx->file_list, &file->ctxs);
+ return ctx;
+
+error:
+ kfree(ctx);
+ return NULL;
+}
+
+static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
+ struct ib_cm_req_event_param *kreq)
+{
+ ureq->remote_ca_guid = kreq->remote_ca_guid;
+ ureq->remote_qkey = kreq->remote_qkey;
+ ureq->remote_qpn = kreq->remote_qpn;
+ ureq->qp_type = kreq->qp_type;
+ ureq->starting_psn = kreq->starting_psn;
+ ureq->responder_resources = kreq->responder_resources;
+ ureq->initiator_depth = kreq->initiator_depth;
+ ureq->local_cm_response_timeout = kreq->local_cm_response_timeout;
+ ureq->flow_control = kreq->flow_control;
+ ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout;
+ ureq->retry_count = kreq->retry_count;
+ ureq->rnr_retry_count = kreq->rnr_retry_count;
+ ureq->srq = kreq->srq;
+ ureq->port = kreq->port;
+
+ ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path);
+ if (kreq->alternate_path)
+ ib_copy_path_rec_to_user(&ureq->alternate_path,
+ kreq->alternate_path);
+}
+
+static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
+ struct ib_cm_rep_event_param *krep)
+{
+ urep->remote_ca_guid = krep->remote_ca_guid;
+ urep->remote_qkey = krep->remote_qkey;
+ urep->remote_qpn = krep->remote_qpn;
+ urep->starting_psn = krep->starting_psn;
+ urep->responder_resources = krep->responder_resources;
+ urep->initiator_depth = krep->initiator_depth;
+ urep->target_ack_delay = krep->target_ack_delay;
+ urep->failover_accepted = krep->failover_accepted;
+ urep->flow_control = krep->flow_control;
+ urep->rnr_retry_count = krep->rnr_retry_count;
+ urep->srq = krep->srq;
+}
+
+static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep,
+ struct ib_cm_sidr_rep_event_param *krep)
+{
+ urep->status = krep->status;
+ urep->qkey = krep->qkey;
+ urep->qpn = krep->qpn;
+};
+
+static int ib_ucm_event_process(struct ib_cm_event *evt,
+ struct ib_ucm_event *uvt)
+{
+ void *info = NULL;
+
+ switch (evt->event) {
+ case IB_CM_REQ_RECEIVED:
+ ib_ucm_event_req_get(&uvt->resp.u.req_resp,
+ &evt->param.req_rcvd);
+ uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE;
+ uvt->resp.present = IB_UCM_PRES_PRIMARY;
+ uvt->resp.present |= (evt->param.req_rcvd.alternate_path ?
+ IB_UCM_PRES_ALTERNATE : 0);
+ break;
+ case IB_CM_REP_RECEIVED:
+ ib_ucm_event_rep_get(&uvt->resp.u.rep_resp,
+ &evt->param.rep_rcvd);
+ uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
+ break;
+ case IB_CM_RTU_RECEIVED:
+ uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE;
+ uvt->resp.u.send_status = evt->param.send_status;
+ break;
+ case IB_CM_DREQ_RECEIVED:
+ uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE;
+ uvt->resp.u.send_status = evt->param.send_status;
+ break;
+ case IB_CM_DREP_RECEIVED:
+ uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE;
+ uvt->resp.u.send_status = evt->param.send_status;
+ break;
+ case IB_CM_MRA_RECEIVED:
+ uvt->resp.u.mra_resp.timeout =
+ evt->param.mra_rcvd.service_timeout;
+ uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE;
+ break;
+ case IB_CM_REJ_RECEIVED:
+ uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason;
+ uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
+ uvt->info_len = evt->param.rej_rcvd.ari_length;
+ info = evt->param.rej_rcvd.ari;
+ break;
+ case IB_CM_LAP_RECEIVED:
+ ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path,
+ evt->param.lap_rcvd.alternate_path);
+ uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE;
+ uvt->resp.present = IB_UCM_PRES_ALTERNATE;
+ break;
+ case IB_CM_APR_RECEIVED:
+ uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status;
+ uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE;
+ uvt->info_len = evt->param.apr_rcvd.info_len;
+ info = evt->param.apr_rcvd.apr_info;
+ break;
+ case IB_CM_SIDR_REQ_RECEIVED:
+ uvt->resp.u.sidr_req_resp.pkey =
+ evt->param.sidr_req_rcvd.pkey;
+ uvt->resp.u.sidr_req_resp.port =
+ evt->param.sidr_req_rcvd.port;
+ uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
+ break;
+ case IB_CM_SIDR_REP_RECEIVED:
+ ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp,
+ &evt->param.sidr_rep_rcvd);
+ uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
+ uvt->info_len = evt->param.sidr_rep_rcvd.info_len;
+ info = evt->param.sidr_rep_rcvd.info;
+ break;
+ default:
+ uvt->resp.u.send_status = evt->param.send_status;
+ break;
+ }
+
+ if (uvt->data_len) {
+ uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL);
+ if (!uvt->data)
+ goto err1;
+
+ uvt->resp.present |= IB_UCM_PRES_DATA;
+ }
+
+ if (uvt->info_len) {
+ uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL);
+ if (!uvt->info)
+ goto err2;
+
+ uvt->resp.present |= IB_UCM_PRES_INFO;
+ }
+ return 0;
+
+err2:
+ kfree(uvt->data);
+err1:
+ return -ENOMEM;
+}
+
+static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
+ struct ib_cm_event *event)
+{
+ struct ib_ucm_event *uevent;
+ struct ib_ucm_context *ctx;
+ int result = 0;
+
+ ctx = cm_id->context;
+
+ uevent = kzalloc(sizeof *uevent, GFP_KERNEL);
+ if (!uevent)
+ goto err1;
+
+ uevent->ctx = ctx;
+ uevent->cm_id = cm_id;
+ uevent->resp.uid = ctx->uid;
+ uevent->resp.id = ctx->id;
+ uevent->resp.event = event->event;
+
+ result = ib_ucm_event_process(event, uevent);
+ if (result)
+ goto err2;
+
+ mutex_lock(&ctx->file->file_mutex);
+ list_add_tail(&uevent->file_list, &ctx->file->events);
+ list_add_tail(&uevent->ctx_list, &ctx->events);
+ wake_up_interruptible(&ctx->file->poll_wait);
+ if (ctx->file->filp)
+ selwakeup(&ctx->file->filp->f_selinfo);
+ mutex_unlock(&ctx->file->file_mutex);
+ return 0;
+
+err2:
+ kfree(uevent);
+err1:
+ /* Destroy new cm_id's */
+ return ib_ucm_new_cm_id(event->event);
+}
+
+static ssize_t ib_ucm_event(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_ucm_context *ctx;
+ struct ib_ucm_event_get cmd;
+ struct ib_ucm_event *uevent;
+ int result = 0;
+ DEFINE_WAIT(wait);
+
+ if (out_len < sizeof(struct ib_ucm_event_resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&file->file_mutex);
+ while (list_empty(&file->events)) {
+ mutex_unlock(&file->file_mutex);
+
+ if (file->filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->events)))
+ return -ERESTARTSYS;
+
+ mutex_lock(&file->file_mutex);
+ }
+
+ uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
+
+ if (ib_ucm_new_cm_id(uevent->resp.event)) {
+ ctx = ib_ucm_ctx_alloc(file);
+ if (!ctx) {
+ result = -ENOMEM;
+ goto done;
+ }
+
+ ctx->cm_id = uevent->cm_id;
+ ctx->cm_id->context = ctx;
+ uevent->resp.id = ctx->id;
+ }
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &uevent->resp, sizeof(uevent->resp))) {
+ result = -EFAULT;
+ goto done;
+ }
+
+ if (uevent->data) {
+ if (cmd.data_len < uevent->data_len) {
+ result = -ENOMEM;
+ goto done;
+ }
+ if (copy_to_user((void __user *)(unsigned long)cmd.data,
+ uevent->data, uevent->data_len)) {
+ result = -EFAULT;
+ goto done;
+ }
+ }
+
+ if (uevent->info) {
+ if (cmd.info_len < uevent->info_len) {
+ result = -ENOMEM;
+ goto done;
+ }
+ if (copy_to_user((void __user *)(unsigned long)cmd.info,
+ uevent->info, uevent->info_len)) {
+ result = -EFAULT;
+ goto done;
+ }
+ }
+
+ list_del(&uevent->file_list);
+ list_del(&uevent->ctx_list);
+ uevent->ctx->events_reported++;
+
+ kfree(uevent->data);
+ kfree(uevent->info);
+ kfree(uevent);
+done:
+ mutex_unlock(&file->file_mutex);
+ return result;
+}
+
+static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_ucm_create_id cmd;
+ struct ib_ucm_create_id_resp resp;
+ struct ib_ucm_context *ctx;
+ int result;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&file->file_mutex);
+ ctx = ib_ucm_ctx_alloc(file);
+ mutex_unlock(&file->file_mutex);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->uid = cmd.uid;
+ ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
+ ib_ucm_event_handler, ctx);
+ if (IS_ERR(ctx->cm_id)) {
+ result = PTR_ERR(ctx->cm_id);
+ goto err1;
+ }
+
+ resp.id = ctx->id;
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp))) {
+ result = -EFAULT;
+ goto err2;
+ }
+ return 0;
+
+err2:
+ ib_destroy_cm_id(ctx->cm_id);
+err1:
+ mutex_lock(&ctx_id_mutex);
+ idr_remove(&ctx_id_table, ctx->id);
+ mutex_unlock(&ctx_id_mutex);
+ kfree(ctx);
+ return result;
+}
+
+static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_ucm_destroy_id cmd;
+ struct ib_ucm_destroy_id_resp resp;
+ struct ib_ucm_context *ctx;
+ int result = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&ctx_id_mutex);
+ ctx = idr_find(&ctx_id_table, cmd.id);
+ if (!ctx)
+ ctx = ERR_PTR(-ENOENT);
+ else if (ctx->file != file)
+ ctx = ERR_PTR(-EINVAL);
+ else
+ idr_remove(&ctx_id_table, ctx->id);
+ mutex_unlock(&ctx_id_mutex);
+
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ib_ucm_ctx_put(ctx);
+ wait_for_completion(&ctx->comp);
+
+ /* No new events will be generated after destroying the cm_id. */
+ ib_destroy_cm_id(ctx->cm_id);
+ /* Cleanup events not yet reported to the user. */
+ ib_ucm_cleanup_events(ctx);
+
+ resp.events_reported = ctx->events_reported;
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ result = -EFAULT;
+
+ kfree(ctx);
+ return result;
+}
+
+static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_ucm_attr_id_resp resp;
+ struct ib_ucm_attr_id cmd;
+ struct ib_ucm_context *ctx;
+ int result = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ resp.service_id = ctx->cm_id->service_id;
+ resp.service_mask = ctx->cm_id->service_mask;
+ resp.local_id = ctx->cm_id->local_id;
+ resp.remote_id = ctx->cm_id->remote_id;
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ result = -EFAULT;
+
+ ib_ucm_ctx_put(ctx);
+ return result;
+}
+
+static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_qp_attr resp;
+ struct ib_ucm_init_qp_attr cmd;
+ struct ib_ucm_context *ctx;
+ struct ib_qp_attr qp_attr;
+ int result = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ resp.qp_attr_mask = 0;
+ memset(&qp_attr, 0, sizeof qp_attr);
+ qp_attr.qp_state = cmd.qp_state;
+ result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
+ if (result)
+ goto out;
+
+ ib_copy_qp_attr_to_user(&resp, &qp_attr);
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ result = -EFAULT;
+
+out:
+ ib_ucm_ctx_put(ctx);
+ return result;
+}
+
+static int ucm_validate_listen(__be64 service_id, __be64 service_mask)
+{
+ service_id &= service_mask;
+
+ if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) ||
+ ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID))
+ return -EINVAL;
+
+ return 0;
+}
+
+static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_ucm_listen cmd;
+ struct ib_ucm_context *ctx;
+ int result;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ result = ucm_validate_listen(cmd.service_id, cmd.service_mask);
+ if (result)
+ goto out;
+
+ result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask,
+ NULL);
+out:
+ ib_ucm_ctx_put(ctx);
+ return result;
+}
+
+static ssize_t ib_ucm_notify(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_ucm_notify cmd;
+ struct ib_ucm_context *ctx;
+ int result;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
+ ib_ucm_ctx_put(ctx);
+ return result;
+}
+
+static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
+{
+ void *data;
+
+ *dest = NULL;
+
+ if (!len)
+ return 0;
+
+ data = kmalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ if (copy_from_user(data, (void __user *)(unsigned long)src, len)) {
+ kfree(data);
+ return -EFAULT;
+ }
+
+ *dest = data;
+ return 0;
+}
+
+static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
+{
+ struct ib_user_path_rec upath;
+ struct ib_sa_path_rec *sa_path;
+
+ *path = NULL;
+
+ if (!src)
+ return 0;
+
+ sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL);
+ if (!sa_path)
+ return -ENOMEM;
+
+ if (copy_from_user(&upath, (void __user *)(unsigned long)src,
+ sizeof(upath))) {
+
+ kfree(sa_path);
+ return -EFAULT;
+ }
+
+ ib_copy_path_rec_from_user(sa_path, &upath);
+ *path = sa_path;
+ return 0;
+}
+
+static ssize_t ib_ucm_send_req(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_cm_req_param param;
+ struct ib_ucm_context *ctx;
+ struct ib_ucm_req cmd;
+ int result;
+
+ param.private_data = NULL;
+ param.primary_path = NULL;
+ param.alternate_path = NULL;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len);
+ if (result)
+ goto done;
+
+ result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path);
+ if (result)
+ goto done;
+
+ result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path);
+ if (result)
+ goto done;
+
+ param.private_data_len = cmd.len;
+ param.service_id = cmd.sid;
+ param.qp_num = cmd.qpn;
+ param.qp_type = cmd.qp_type;
+ param.starting_psn = cmd.psn;
+ param.peer_to_peer = cmd.peer_to_peer;
+ param.responder_resources = cmd.responder_resources;
+ param.initiator_depth = cmd.initiator_depth;
+ param.remote_cm_response_timeout = cmd.remote_cm_response_timeout;
+ param.flow_control = cmd.flow_control;
+ param.local_cm_response_timeout = cmd.local_cm_response_timeout;
+ param.retry_count = cmd.retry_count;
+ param.rnr_retry_count = cmd.rnr_retry_count;
+ param.max_cm_retries = cmd.max_cm_retries;
+ param.srq = cmd.srq;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (!IS_ERR(ctx)) {
+ result = ib_send_cm_req(ctx->cm_id, ¶m);
+ ib_ucm_ctx_put(ctx);
+ } else
+ result = PTR_ERR(ctx);
+
+done:
+ kfree(param.private_data);
+ kfree(param.primary_path);
+ kfree(param.alternate_path);
+ return result;
+}
+
+static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_cm_rep_param param;
+ struct ib_ucm_context *ctx;
+ struct ib_ucm_rep cmd;
+ int result;
+
+ param.private_data = NULL;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len);
+ if (result)
+ return result;
+
+ param.qp_num = cmd.qpn;
+ param.starting_psn = cmd.psn;
+ param.private_data_len = cmd.len;
+ param.responder_resources = cmd.responder_resources;
+ param.initiator_depth = cmd.initiator_depth;
+ param.failover_accepted = cmd.failover_accepted;
+ param.flow_control = cmd.flow_control;
+ param.rnr_retry_count = cmd.rnr_retry_count;
+ param.srq = cmd.srq;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (!IS_ERR(ctx)) {
+ ctx->uid = cmd.uid;
+ result = ib_send_cm_rep(ctx->cm_id, ¶m);
+ ib_ucm_ctx_put(ctx);
+ } else
+ result = PTR_ERR(ctx);
+
+ kfree(param.private_data);
+ return result;
+}
+
+static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file,
+ const char __user *inbuf, int in_len,
+ int (*func)(struct ib_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len))
+{
+ struct ib_ucm_private_data cmd;
+ struct ib_ucm_context *ctx;
+ const void *private_data = NULL;
+ int result;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len);
+ if (result)
+ return result;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (!IS_ERR(ctx)) {
+ result = func(ctx->cm_id, private_data, cmd.len);
+ ib_ucm_ctx_put(ctx);
+ } else
+ result = PTR_ERR(ctx);
+
+ kfree(private_data);
+ return result;
+}
+
+static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu);
+}
+
+static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq);
+}
+
+static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep);
+}
+
+static ssize_t ib_ucm_send_info(struct ib_ucm_file *file,
+ const char __user *inbuf, int in_len,
+ int (*func)(struct ib_cm_id *cm_id,
+ int status,
+ const void *info,
+ u8 info_len,
+ const void *data,
+ u8 data_len))
+{
+ struct ib_ucm_context *ctx;
+ struct ib_ucm_info cmd;
+ const void *data = NULL;
+ const void *info = NULL;
+ int result;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len);
+ if (result)
+ goto done;
+
+ result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len);
+ if (result)
+ goto done;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (!IS_ERR(ctx)) {
+ result = func(ctx->cm_id, cmd.status, info, cmd.info_len,
+ data, cmd.data_len);
+ ib_ucm_ctx_put(ctx);
+ } else
+ result = PTR_ERR(ctx);
+
+done:
+ kfree(data);
+ kfree(info);
+ return result;
+}
+
+static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej);
+}
+
+static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr);
+}
+
+static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_ucm_context *ctx;
+ struct ib_ucm_mra cmd;
+ const void *data = NULL;
+ int result;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
+ if (result)
+ return result;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (!IS_ERR(ctx)) {
+ result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len);
+ ib_ucm_ctx_put(ctx);
+ } else
+ result = PTR_ERR(ctx);
+
+ kfree(data);
+ return result;
+}
+
+static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_ucm_context *ctx;
+ struct ib_sa_path_rec *path = NULL;
+ struct ib_ucm_lap cmd;
+ const void *data = NULL;
+ int result;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
+ if (result)
+ goto done;
+
+ result = ib_ucm_path_get(&path, cmd.path);
+ if (result)
+ goto done;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (!IS_ERR(ctx)) {
+ result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len);
+ ib_ucm_ctx_put(ctx);
+ } else
+ result = PTR_ERR(ctx);
+
+done:
+ kfree(data);
+ kfree(path);
+ return result;
+}
+
+static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_cm_sidr_req_param param;
+ struct ib_ucm_context *ctx;
+ struct ib_ucm_sidr_req cmd;
+ int result;
+
+ param.private_data = NULL;
+ param.path = NULL;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len);
+ if (result)
+ goto done;
+
+ result = ib_ucm_path_get(¶m.path, cmd.path);
+ if (result)
+ goto done;
+
+ param.private_data_len = cmd.len;
+ param.service_id = cmd.sid;
+ param.timeout_ms = cmd.timeout;
+ param.max_cm_retries = cmd.max_cm_retries;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (!IS_ERR(ctx)) {
+ result = ib_send_cm_sidr_req(ctx->cm_id, ¶m);
+ ib_ucm_ctx_put(ctx);
+ } else
+ result = PTR_ERR(ctx);
+
+done:
+ kfree(param.private_data);
+ kfree(param.path);
+ return result;
+}
+
+static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ib_cm_sidr_rep_param param;
+ struct ib_ucm_sidr_rep cmd;
+ struct ib_ucm_context *ctx;
+ int result;
+
+ param.info = NULL;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ result = ib_ucm_alloc_data(¶m.private_data,
+ cmd.data, cmd.data_len);
+ if (result)
+ goto done;
+
+ result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len);
+ if (result)
+ goto done;
+
+ param.qp_num = cmd.qpn;
+ param.qkey = cmd.qkey;
+ param.status = cmd.status;
+ param.info_length = cmd.info_len;
+ param.private_data_len = cmd.data_len;
+
+ ctx = ib_ucm_ctx_get(file, cmd.id);
+ if (!IS_ERR(ctx)) {
+ result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m);
+ ib_ucm_ctx_put(ctx);
+ } else
+ result = PTR_ERR(ctx);
+
+done:
+ kfree(param.private_data);
+ kfree(param.info);
+ return result;
+}
+
+static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len) = {
+ [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id,
+ [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id,
+ [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id,
+ [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen,
+ [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify,
+ [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req,
+ [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep,
+ [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu,
+ [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq,
+ [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep,
+ [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej,
+ [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra,
+ [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap,
+ [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr,
+ [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req,
+ [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep,
+ [IB_USER_CM_CMD_EVENT] = ib_ucm_event,
+ [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr,
+};
+
+static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct ib_ucm_file *file = filp->private_data;
+ struct ib_ucm_cmd_hdr hdr;
+ ssize_t result;
+
+ if (len < sizeof(hdr))
+ return -EINVAL;
+
+ if (copy_from_user(&hdr, buf, sizeof(hdr)))
+ return -EFAULT;
+
+ if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
+ return -EINVAL;
+
+ if (hdr.in + sizeof(hdr) > len)
+ return -EINVAL;
+
+ result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr),
+ hdr.in, hdr.out);
+ if (!result)
+ result = len;
+
+ return result;
+}
+
+static unsigned int ib_ucm_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct ib_ucm_file *file = filp->private_data;
+ unsigned int mask = 0;
+
+ poll_wait(filp, &file->poll_wait, wait);
+
+ if (!list_empty(&file->events))
+ mask = POLLIN | POLLRDNORM;
+
+ return mask;
+}
+
+/*
+ * ib_ucm_open() does not need the BKL:
+ *
+ * - no global state is referred to;
+ * - there is no ioctl method to race against;
+ * - no further module initialization is required for open to work
+ * after the device is registered.
+ */
+static int ib_ucm_open(struct inode *inode, struct file *filp)
+{
+ struct ib_ucm_file *file;
+
+ file = kzalloc(sizeof(*file), GFP_KERNEL);
+ if (!file)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&file->events);
+ INIT_LIST_HEAD(&file->ctxs);
+ init_waitqueue_head(&file->poll_wait);
+
+ mutex_init(&file->file_mutex);
+
+ filp->private_data = file;
+ file->filp = filp;
+ file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev);
+
+ return 0;
+}
+
+static int ib_ucm_close(struct inode *inode, struct file *filp)
+{
+ struct ib_ucm_file *file = filp->private_data;
+ struct ib_ucm_context *ctx;
+
+ mutex_lock(&file->file_mutex);
+ while (!list_empty(&file->ctxs)) {
+ ctx = list_entry(file->ctxs.next,
+ struct ib_ucm_context, file_list);
+ mutex_unlock(&file->file_mutex);
+
+ mutex_lock(&ctx_id_mutex);
+ idr_remove(&ctx_id_table, ctx->id);
+ mutex_unlock(&ctx_id_mutex);
+
+ ib_destroy_cm_id(ctx->cm_id);
+ ib_ucm_cleanup_events(ctx);
+ kfree(ctx);
+
+ mutex_lock(&file->file_mutex);
+ }
+ mutex_unlock(&file->file_mutex);
+ kfree(file);
+ return 0;
+}
+
+static void ib_ucm_release_dev(struct device *dev)
+{
+ struct ib_ucm_device *ucm_dev;
+
+ ucm_dev = container_of(dev, struct ib_ucm_device, dev);
+ cdev_del(&ucm_dev->cdev);
+ clear_bit(ucm_dev->devnum, dev_map);
+ kfree(ucm_dev);
+}
+
+static const struct file_operations ucm_fops = {
+ .owner = THIS_MODULE,
+ .open = ib_ucm_open,
+ .release = ib_ucm_close,
+ .write = ib_ucm_write,
+ .poll = ib_ucm_poll,
+};
+
+static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct ib_ucm_device *ucm_dev;
+
+ ucm_dev = container_of(dev, struct ib_ucm_device, dev);
+ return sprintf(buf, "%s\n", ucm_dev->ib_dev->name);
+}
+static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static void ib_ucm_add_one(struct ib_device *device)
+{
+ struct ib_ucm_device *ucm_dev;
+
+ if (!device->alloc_ucontext ||
+ rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
+ if (!ucm_dev)
+ return;
+
+ ucm_dev->ib_dev = device;
+
+ ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
+ if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
+ goto err;
+
+ set_bit(ucm_dev->devnum, dev_map);
+
+ cdev_init(&ucm_dev->cdev, &ucm_fops);
+ ucm_dev->cdev.owner = THIS_MODULE;
+ kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
+ if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
+ goto err;
+
+ ucm_dev->dev.class = &cm_class;
+ ucm_dev->dev.parent = device->dma_device;
+ ucm_dev->dev.devt = ucm_dev->cdev.dev;
+ ucm_dev->dev.release = ib_ucm_release_dev;
+ dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
+ if (device_register(&ucm_dev->dev))
+ goto err_cdev;
+
+ if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev))
+ goto err_dev;
+
+ ib_set_client_data(device, &ucm_client, ucm_dev);
+ return;
+
+err_dev:
+ device_unregister(&ucm_dev->dev);
+err_cdev:
+ cdev_del(&ucm_dev->cdev);
+ clear_bit(ucm_dev->devnum, dev_map);
+err:
+ kfree(ucm_dev);
+ return;
+}
+
+static void ib_ucm_remove_one(struct ib_device *device)
+{
+ struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client);
+
+ if (!ucm_dev)
+ return;
+
+ device_unregister(&ucm_dev->dev);
+}
+
+static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int __init ib_ucm_init(void)
+{
+ int ret;
+
+ ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
+ "infiniband_cm");
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't register device number\n");
+ goto error1;
+ }
+
+ ret = class_create_file(&cm_class, &class_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
+ goto error2;
+ }
+
+ ret = ib_register_client(&ucm_client);
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't register client\n");
+ goto error3;
+ }
+ return 0;
+
+error3:
+ class_remove_file(&cm_class, &class_attr_abi_version);
+error2:
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+error1:
+ return ret;
+}
+
+static void __exit ib_ucm_cleanup(void)
+{
+ ib_unregister_client(&ucm_client);
+ class_remove_file(&cm_class, &class_attr_abi_version);
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+ idr_destroy(&ctx_id_table);
+}
+
+module_init_order(ib_ucm_init, SI_ORDER_THIRD);
+module_exit(ib_ucm_cleanup);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/ucm.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/ucma.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/ucma.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/ucma.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,1353 @@
+/*
+ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/idr.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/miscdevice.h>
+
+#include <sys/filio.h>
+
+#include <rdma/rdma_user_cm.h>
+#include <rdma/ib_marshall.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/rdma_cm_ib.h>
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
+MODULE_LICENSE("Dual BSD/GPL");
+
+enum {
+ UCMA_MAX_BACKLOG = 1024
+};
+
+struct ucma_file {
+ struct mutex mut;
+ struct file *filp;
+ struct list_head ctx_list;
+ struct list_head event_list;
+ wait_queue_head_t poll_wait;
+};
+
+struct ucma_context {
+ int id;
+ struct completion comp;
+ atomic_t ref;
+ int events_reported;
+ int backlog;
+
+ struct ucma_file *file;
+ struct rdma_cm_id *cm_id;
+ u64 uid;
+
+ struct list_head list;
+ struct list_head mc_list;
+};
+
+struct ucma_multicast {
+ struct ucma_context *ctx;
+ int id;
+ int events_reported;
+
+ u64 uid;
+ struct list_head list;
+ struct sockaddr_storage addr;
+};
+
+struct ucma_event {
+ struct ucma_context *ctx;
+ struct ucma_multicast *mc;
+ struct list_head list;
+ struct rdma_cm_id *cm_id;
+ struct rdma_ucm_event_resp resp;
+};
+
+static DEFINE_MUTEX(mut);
+static DEFINE_IDR(ctx_idr);
+static DEFINE_IDR(multicast_idr);
+
+static inline struct ucma_context *_ucma_find_context(int id,
+ struct ucma_file *file)
+{
+ struct ucma_context *ctx;
+
+ ctx = idr_find(&ctx_idr, id);
+ if (!ctx)
+ ctx = ERR_PTR(-ENOENT);
+ else if (ctx->file != file)
+ ctx = ERR_PTR(-EINVAL);
+ return ctx;
+}
+
+static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
+{
+ struct ucma_context *ctx;
+
+ mutex_lock(&mut);
+ ctx = _ucma_find_context(id, file);
+ if (!IS_ERR(ctx))
+ atomic_inc(&ctx->ref);
+ mutex_unlock(&mut);
+ return ctx;
+}
+
+static void ucma_put_ctx(struct ucma_context *ctx)
+{
+ if (atomic_dec_and_test(&ctx->ref))
+ complete(&ctx->comp);
+}
+
+static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
+{
+ struct ucma_context *ctx;
+ int ret;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return NULL;
+
+ atomic_set(&ctx->ref, 1);
+ init_completion(&ctx->comp);
+ INIT_LIST_HEAD(&ctx->mc_list);
+ ctx->file = file;
+
+ do {
+ ret = idr_pre_get(&ctx_idr, GFP_KERNEL);
+ if (!ret)
+ goto error;
+
+ mutex_lock(&mut);
+ ret = idr_get_new(&ctx_idr, ctx, &ctx->id);
+ mutex_unlock(&mut);
+ } while (ret == -EAGAIN);
+
+ if (ret)
+ goto error;
+
+ list_add_tail(&ctx->list, &file->ctx_list);
+ return ctx;
+
+error:
+ kfree(ctx);
+ return NULL;
+}
+
+static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+{
+ struct ucma_multicast *mc;
+ int ret;
+
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
+ if (!mc)
+ return NULL;
+
+ do {
+ ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
+ if (!ret)
+ goto error;
+
+ mutex_lock(&mut);
+ ret = idr_get_new(&multicast_idr, mc, &mc->id);
+ mutex_unlock(&mut);
+ } while (ret == -EAGAIN);
+
+ if (ret)
+ goto error;
+
+ mc->ctx = ctx;
+ list_add_tail(&mc->list, &ctx->mc_list);
+ return mc;
+
+error:
+ kfree(mc);
+ return NULL;
+}
+
+static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
+ struct rdma_conn_param *src)
+{
+ if (src->private_data_len)
+ memcpy(dst->private_data, src->private_data,
+ src->private_data_len);
+ dst->private_data_len = src->private_data_len;
+ dst->responder_resources =src->responder_resources;
+ dst->initiator_depth = src->initiator_depth;
+ dst->flow_control = src->flow_control;
+ dst->retry_count = src->retry_count;
+ dst->rnr_retry_count = src->rnr_retry_count;
+ dst->srq = src->srq;
+ dst->qp_num = src->qp_num;
+}
+
+static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst,
+ struct rdma_ud_param *src)
+{
+ if (src->private_data_len)
+ memcpy(dst->private_data, src->private_data,
+ src->private_data_len);
+ dst->private_data_len = src->private_data_len;
+ ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
+ dst->qp_num = src->qp_num;
+ dst->qkey = src->qkey;
+}
+
+static void ucma_set_event_context(struct ucma_context *ctx,
+ struct rdma_cm_event *event,
+ struct ucma_event *uevent)
+{
+ uevent->ctx = ctx;
+ switch (event->event) {
+ case RDMA_CM_EVENT_MULTICAST_JOIN:
+ case RDMA_CM_EVENT_MULTICAST_ERROR:
+ uevent->mc = (struct ucma_multicast *)
+ event->param.ud.private_data;
+ uevent->resp.uid = uevent->mc->uid;
+ uevent->resp.id = uevent->mc->id;
+ break;
+ default:
+ uevent->resp.uid = ctx->uid;
+ uevent->resp.id = ctx->id;
+ break;
+ }
+}
+
+static int ucma_event_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ struct ucma_event *uevent;
+ struct ucma_context *ctx = cm_id->context;
+ int ret = 0;
+
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+ if (!uevent)
+ return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
+
+ uevent->cm_id = cm_id;
+ ucma_set_event_context(ctx, event, uevent);
+ uevent->resp.event = event->event;
+ uevent->resp.status = event->status;
+ if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
+ ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
+ else
+ ucma_copy_conn_event(&uevent->resp.param.conn,
+ &event->param.conn);
+
+ mutex_lock(&ctx->file->mut);
+ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
+ if (!ctx->backlog) {
+ ret = -ENOMEM;
+ kfree(uevent);
+ goto out;
+ }
+ ctx->backlog--;
+ } else if (!ctx->uid) {
+ /*
+ * We ignore events for new connections until userspace has set
+ * their context. This can only happen if an error occurs on a
+ * new connection before the user accepts it. This is okay,
+ * since the accept will just fail later.
+ */
+ kfree(uevent);
+ goto out;
+ }
+
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ wake_up_interruptible(&ctx->file->poll_wait);
+ if (ctx->file->filp)
+ selwakeup(&ctx->file->filp->f_selinfo);
+out:
+ mutex_unlock(&ctx->file->mut);
+ return ret;
+}
+
+static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct ucma_context *ctx;
+ struct rdma_ucm_get_event cmd;
+ struct ucma_event *uevent;
+ int ret = 0;
+ DEFINE_WAIT(wait);
+
+ if (out_len < sizeof uevent->resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&file->mut);
+ while (list_empty(&file->event_list)) {
+ mutex_unlock(&file->mut);
+
+ if (file->filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->event_list)))
+ return -ERESTARTSYS;
+
+ mutex_lock(&file->mut);
+ }
+
+ uevent = list_entry(file->event_list.next, struct ucma_event, list);
+
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
+ ctx = ucma_alloc_ctx(file);
+ if (!ctx) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ uevent->ctx->backlog++;
+ ctx->cm_id = uevent->cm_id;
+ ctx->cm_id->context = ctx;
+ uevent->resp.id = ctx->id;
+ }
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &uevent->resp, sizeof uevent->resp)) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ list_del(&uevent->list);
+ uevent->ctx->events_reported++;
+ if (uevent->mc)
+ uevent->mc->events_reported++;
+ kfree(uevent);
+done:
+ mutex_unlock(&file->mut);
+ return ret;
+}
+
+static ssize_t ucma_create_id(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_create_id cmd;
+ struct rdma_ucm_create_id_resp resp;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&file->mut);
+ ctx = ucma_alloc_ctx(file);
+ mutex_unlock(&file->mut);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->uid = cmd.uid;
+ ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps);
+ if (IS_ERR(ctx->cm_id)) {
+ ret = PTR_ERR(ctx->cm_id);
+ goto err1;
+ }
+
+ resp.id = ctx->id;
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp))) {
+ ret = -EFAULT;
+ goto err2;
+ }
+ return 0;
+
+err2:
+ rdma_destroy_id(ctx->cm_id);
+err1:
+ mutex_lock(&mut);
+ idr_remove(&ctx_idr, ctx->id);
+ mutex_unlock(&mut);
+ kfree(ctx);
+ return ret;
+}
+
+static void ucma_cleanup_multicast(struct ucma_context *ctx)
+{
+ struct ucma_multicast *mc, *tmp;
+
+ mutex_lock(&mut);
+ list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
+ list_del(&mc->list);
+ idr_remove(&multicast_idr, mc->id);
+ kfree(mc);
+ }
+ mutex_unlock(&mut);
+}
+
+static void ucma_cleanup_events(struct ucma_context *ctx)
+{
+ struct ucma_event *uevent, *tmp;
+
+ list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
+ if (uevent->ctx != ctx)
+ continue;
+
+ list_del(&uevent->list);
+
+ /* clear incoming connections. */
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ rdma_destroy_id(uevent->cm_id);
+
+ kfree(uevent);
+ }
+}
+
+static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
+{
+ struct ucma_event *uevent, *tmp;
+
+ list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
+ if (uevent->mc != mc)
+ continue;
+
+ list_del(&uevent->list);
+ kfree(uevent);
+ }
+}
+
+static int ucma_free_ctx(struct ucma_context *ctx)
+{
+ int events_reported;
+
+ /* No new events will be generated after destroying the id. */
+ rdma_destroy_id(ctx->cm_id);
+
+ ucma_cleanup_multicast(ctx);
+
+ /* Cleanup events not yet reported to the user. */
+ mutex_lock(&ctx->file->mut);
+ ucma_cleanup_events(ctx);
+ list_del(&ctx->list);
+ mutex_unlock(&ctx->file->mut);
+
+ events_reported = ctx->events_reported;
+ kfree(ctx);
+ return events_reported;
+}
+
+static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_destroy_id cmd;
+ struct rdma_ucm_destroy_id_resp resp;
+ struct ucma_context *ctx;
+ int ret = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&mut);
+ ctx = _ucma_find_context(cmd.id, file);
+ if (!IS_ERR(ctx))
+ idr_remove(&ctx_idr, ctx->id);
+ mutex_unlock(&mut);
+
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ucma_put_ctx(ctx);
+ wait_for_completion(&ctx->comp);
+ resp.events_reported = ucma_free_ctx(ctx);
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_bind_addr cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_resolve_addr(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_resolve_addr cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
+ (struct sockaddr *) &cmd.dst_addr,
+ cmd.timeout_ms);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_resolve_route(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_resolve_route cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
+ struct rdma_route *route)
+{
+ struct rdma_dev_addr *dev_addr;
+
+ resp->num_paths = route->num_paths;
+ switch (route->num_paths) {
+ case 0:
+ dev_addr = &route->addr.dev_addr;
+ rdma_addr_get_dgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].dgid);
+ rdma_addr_get_sgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].sgid);
+ resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+ break;
+ case 2:
+ ib_copy_path_rec_to_user(&resp->ib_route[1],
+ &route->path_rec[1]);
+ /* fall through */
+ case 1:
+ ib_copy_path_rec_to_user(&resp->ib_route[0],
+ &route->path_rec[0]);
+ break;
+ default:
+ break;
+ }
+}
+
+static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
+ struct rdma_route *route)
+{
+ struct rdma_dev_addr *dev_addr;
+ struct net_device *dev;
+ u16 vid = 0;
+
+ resp->num_paths = route->num_paths;
+ switch (route->num_paths) {
+ case 0:
+ dev_addr = &route->addr.dev_addr;
+ dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (dev) {
+ vid = rdma_vlan_dev_vlan_id(dev);
+ dev_put(dev);
+ }
+
+ iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
+ dev_addr->dst_dev_addr, vid);
+ iboe_addr_get_sgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].sgid);
+ resp->ib_route[0].pkey = cpu_to_be16(0xffff);
+ break;
+ case 2:
+ ib_copy_path_rec_to_user(&resp->ib_route[1],
+ &route->path_rec[1]);
+ /* fall through */
+ case 1:
+ ib_copy_path_rec_to_user(&resp->ib_route[0],
+ &route->path_rec[0]);
+ break;
+ default:
+ break;
+ }
+}
+
+static ssize_t ucma_query_route(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_query_route cmd;
+ struct rdma_ucm_query_route_resp resp;
+ struct ucma_context *ctx;
+ struct sockaddr *addr;
+ int ret = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ memset(&resp, 0, sizeof resp);
+ addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
+ memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
+ sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6));
+ addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
+ memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ?
+ sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6));
+ if (!ctx->cm_id->device)
+ goto out;
+
+ resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
+ resp.port_num = ctx->cm_id->port_num;
+ if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) {
+ switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ucma_copy_ib_route(&resp, &ctx->cm_id->route);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
+ break;
+ default:
+ break;
+ }
+ }
+
+out:
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static void ucma_copy_conn_param(struct rdma_conn_param *dst,
+ struct rdma_ucm_conn_param *src)
+{
+ dst->private_data = src->private_data;
+ dst->private_data_len = src->private_data_len;
+ dst->responder_resources =src->responder_resources;
+ dst->initiator_depth = src->initiator_depth;
+ dst->flow_control = src->flow_control;
+ dst->retry_count = src->retry_count;
+ dst->rnr_retry_count = src->rnr_retry_count;
+ dst->srq = src->srq;
+ dst->qp_num = src->qp_num;
+}
+
+static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_connect cmd;
+ struct rdma_conn_param conn_param;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ if (!cmd.conn_param.valid)
+ return -EINVAL;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ucma_copy_conn_param(&conn_param, &cmd.conn_param);
+ ret = rdma_connect(ctx->cm_id, &conn_param);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_listen cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ?
+ cmd.backlog : UCMA_MAX_BACKLOG;
+ ret = rdma_listen(ctx->cm_id, ctx->backlog);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_accept cmd;
+ struct rdma_conn_param conn_param;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ if (cmd.conn_param.valid) {
+ ctx->uid = cmd.uid;
+ ucma_copy_conn_param(&conn_param, &cmd.conn_param);
+ ret = rdma_accept(ctx->cm_id, &conn_param);
+ } else
+ ret = rdma_accept(ctx->cm_id, NULL);
+
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_reject cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_disconnect cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_disconnect(ctx->cm_id);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_init_qp_attr(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_init_qp_attr cmd;
+ struct ib_uverbs_qp_attr resp;
+ struct ucma_context *ctx;
+ struct ib_qp_attr qp_attr;
+ int ret;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ resp.qp_attr_mask = 0;
+ memset(&qp_attr, 0, sizeof qp_attr);
+ qp_attr.qp_state = cmd.qp_state;
+ ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
+ if (ret)
+ goto out;
+
+ ib_copy_qp_attr_to_user(&resp, &qp_attr);
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+
+out:
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static int ucma_set_option_id(struct ucma_context *ctx, int optname,
+ void *optval, size_t optlen)
+{
+ int ret = 0;
+
+ switch (optname) {
+ case RDMA_OPTION_ID_TOS:
+ if (optlen != sizeof(u8)) {
+ ret = -EINVAL;
+ break;
+ }
+ rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ return ret;
+}
+
+static int ucma_set_ib_path(struct ucma_context *ctx,
+ struct ib_path_rec_data *path_data, size_t optlen)
+{
+ struct ib_sa_path_rec sa_path;
+ struct rdma_cm_event event;
+ int ret;
+
+ if (optlen % sizeof(*path_data))
+ return -EINVAL;
+
+ for (; optlen; optlen -= sizeof(*path_data), path_data++) {
+ if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
+ IB_PATH_BIDIRECTIONAL))
+ break;
+ }
+
+ if (!optlen)
+ return -EINVAL;
+
+ ib_sa_unpack_path(path_data->path_rec, &sa_path);
+ ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ if (ret)
+ return ret;
+
+ memset(&event, 0, sizeof event);
+ event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ return ucma_event_handler(ctx->cm_id, &event);
+}
+
+static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
+ void *optval, size_t optlen)
+{
+ int ret;
+
+ switch (optname) {
+ case RDMA_OPTION_IB_PATH:
+ ret = ucma_set_ib_path(ctx, optval, optlen);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ return ret;
+}
+
+static int ucma_set_option_level(struct ucma_context *ctx, int level,
+ int optname, void *optval, size_t optlen)
+{
+ int ret;
+
+ switch (level) {
+ case RDMA_OPTION_ID:
+ ret = ucma_set_option_id(ctx, optname, optval, optlen);
+ break;
+ case RDMA_OPTION_IB:
+ ret = ucma_set_option_ib(ctx, optname, optval, optlen);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ return ret;
+}
+
+static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_set_option cmd;
+ struct ucma_context *ctx;
+ void *optval;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ optval = kmalloc(cmd.optlen, GFP_KERNEL);
+ if (!optval) {
+ ret = -ENOMEM;
+ goto out1;
+ }
+
+ if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
+ cmd.optlen)) {
+ ret = -EFAULT;
+ goto out2;
+ }
+
+ ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
+ cmd.optlen);
+out2:
+ kfree(optval);
+out1:
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_notify cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_join_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_join_mcast cmd;
+ struct rdma_ucm_create_id_resp resp;
+ struct ucma_context *ctx;
+ struct ucma_multicast *mc;
+ int ret;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ mutex_lock(&file->mut);
+ mc = ucma_alloc_multicast(ctx);
+ if (!mc) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ mc->uid = cmd.uid;
+ memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
+ ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
+ if (ret)
+ goto err2;
+
+ resp.id = mc->id;
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp))) {
+ ret = -EFAULT;
+ goto err3;
+ }
+
+ mutex_unlock(&file->mut);
+ ucma_put_ctx(ctx);
+ return 0;
+
+err3:
+ rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
+ ucma_cleanup_mc_events(mc);
+err2:
+ mutex_lock(&mut);
+ idr_remove(&multicast_idr, mc->id);
+ mutex_unlock(&mut);
+ list_del(&mc->list);
+ kfree(mc);
+err1:
+ mutex_unlock(&file->mut);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_leave_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_destroy_id cmd;
+ struct rdma_ucm_destroy_id_resp resp;
+ struct ucma_multicast *mc;
+ int ret = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&mut);
+ mc = idr_find(&multicast_idr, cmd.id);
+ if (!mc)
+ mc = ERR_PTR(-ENOENT);
+ else if (mc->ctx->file != file)
+ mc = ERR_PTR(-EINVAL);
+ else {
+ idr_remove(&multicast_idr, mc->id);
+ atomic_inc(&mc->ctx->ref);
+ }
+ mutex_unlock(&mut);
+
+ if (IS_ERR(mc)) {
+ ret = PTR_ERR(mc);
+ goto out;
+ }
+
+ rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
+ mutex_lock(&mc->ctx->file->mut);
+ ucma_cleanup_mc_events(mc);
+ list_del(&mc->list);
+ mutex_unlock(&mc->ctx->file->mut);
+
+ ucma_put_ctx(mc->ctx);
+ resp.events_reported = mc->events_reported;
+ kfree(mc);
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+out:
+ return ret;
+}
+
+static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
+{
+ /* Acquire mutex's based on pointer comparison to prevent deadlock. */
+ if (file1 < file2) {
+ mutex_lock(&file1->mut);
+ mutex_lock(&file2->mut);
+ } else {
+ mutex_lock(&file2->mut);
+ mutex_lock(&file1->mut);
+ }
+}
+
+static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
+{
+ if (file1 < file2) {
+ mutex_unlock(&file2->mut);
+ mutex_unlock(&file1->mut);
+ } else {
+ mutex_unlock(&file1->mut);
+ mutex_unlock(&file2->mut);
+ }
+}
+
+static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
+{
+ struct ucma_event *uevent, *tmp;
+
+ list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
+ if (uevent->ctx == ctx)
+ list_move_tail(&uevent->list, &file->event_list);
+}
+
+static ssize_t ucma_migrate_id(struct ucma_file *new_file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_migrate_id cmd;
+ struct rdma_ucm_migrate_resp resp;
+ struct ucma_context *ctx;
+ struct file *filp;
+ struct ucma_file *cur_file;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ /* Get current fd to protect against it being closed */
+ filp = fget(cmd.fd);
+ if (!filp)
+ return -ENOENT;
+
+ /* Validate current fd and prevent destruction of id. */
+ ctx = ucma_get_ctx(filp->private_data, cmd.id);
+ if (IS_ERR(ctx)) {
+ ret = PTR_ERR(ctx);
+ goto file_put;
+ }
+
+ cur_file = ctx->file;
+ if (cur_file == new_file) {
+ resp.events_reported = ctx->events_reported;
+ goto response;
+ }
+
+ /*
+ * Migrate events between fd's, maintaining order, and avoiding new
+ * events being added before existing events.
+ */
+ ucma_lock_files(cur_file, new_file);
+ mutex_lock(&mut);
+
+ list_move_tail(&ctx->list, &new_file->ctx_list);
+ ucma_move_events(ctx, new_file);
+ ctx->file = new_file;
+ resp.events_reported = ctx->events_reported;
+
+ mutex_unlock(&mut);
+ ucma_unlock_files(cur_file, new_file);
+
+response:
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+
+ ucma_put_ctx(ctx);
+file_put:
+ fput(filp);
+ return ret;
+}
+
+static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len) = {
+ [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id,
+ [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id,
+ [RDMA_USER_CM_CMD_BIND_ADDR] = ucma_bind_addr,
+ [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
+ [RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route,
+ [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route,
+ [RDMA_USER_CM_CMD_CONNECT] = ucma_connect,
+ [RDMA_USER_CM_CMD_LISTEN] = ucma_listen,
+ [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept,
+ [RDMA_USER_CM_CMD_REJECT] = ucma_reject,
+ [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect,
+ [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
+ [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event,
+ [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
+ [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option,
+ [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
+ [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
+ [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
+ [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id
+};
+
+static ssize_t ucma_write(struct file *filp, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct ucma_file *file = filp->private_data;
+ struct rdma_ucm_cmd_hdr hdr;
+ ssize_t ret;
+
+ if (len < sizeof(hdr))
+ return -EINVAL;
+
+ if (copy_from_user(&hdr, buf, sizeof(hdr)))
+ return -EFAULT;
+
+ if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
+ return -EINVAL;
+
+ if (hdr.in + sizeof(hdr) > len)
+ return -EINVAL;
+
+ if (!ucma_cmd_table[hdr.cmd])
+ return -ENOSYS;
+
+ ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
+ if (!ret)
+ ret = len;
+
+ return ret;
+}
+
+static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait)
+{
+ struct ucma_file *file = filp->private_data;
+ unsigned int mask = 0;
+
+ poll_wait(filp, &file->poll_wait, wait);
+
+ if (!list_empty(&file->event_list))
+ mask = POLLIN | POLLRDNORM;
+
+ return mask;
+}
+
+/*
+ * ucma_open() does not need the BKL:
+ *
+ * - no global state is referred to;
+ * - there is no ioctl method to race against;
+ * - no further module initialization is required for open to work
+ * after the device is registered.
+ */
+static int ucma_open(struct inode *inode, struct file *filp)
+{
+ struct ucma_file *file;
+
+ file = kmalloc(sizeof *file, GFP_KERNEL);
+ if (!file)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&file->event_list);
+ INIT_LIST_HEAD(&file->ctx_list);
+ init_waitqueue_head(&file->poll_wait);
+ mutex_init(&file->mut);
+
+ filp->private_data = file;
+ file->filp = filp;
+ return 0;
+}
+
+static int ucma_close(struct inode *inode, struct file *filp)
+{
+ struct ucma_file *file = filp->private_data;
+ struct ucma_context *ctx, *tmp;
+
+ mutex_lock(&file->mut);
+ list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
+ mutex_unlock(&file->mut);
+
+ mutex_lock(&mut);
+ idr_remove(&ctx_idr, ctx->id);
+ mutex_unlock(&mut);
+
+ ucma_free_ctx(ctx);
+ mutex_lock(&file->mut);
+ }
+ mutex_unlock(&file->mut);
+ kfree(file);
+ return 0;
+}
+
+static long
+ucma_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+
+ switch (cmd) {
+ case FIONBIO:
+ case FIOASYNC:
+ return (0);
+ default:
+ return (-ENOTTY);
+ }
+}
+
+static const struct file_operations ucma_fops = {
+ .owner = THIS_MODULE,
+ .open = ucma_open,
+ .release = ucma_close,
+ .write = ucma_write,
+ .unlocked_ioctl = ucma_ioctl,
+ .poll = ucma_poll,
+};
+
+static struct miscdevice ucma_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "rdma_cm",
+ .fops = &ucma_fops,
+};
+
+static ssize_t show_abi_version(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
+}
+static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int __init ucma_init(void)
+{
+ int ret;
+
+ ret = misc_register(&ucma_misc);
+ if (ret)
+ return ret;
+
+ ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
+ goto err;
+ }
+ return 0;
+err:
+ misc_deregister(&ucma_misc);
+ return ret;
+}
+
+static void __exit ucma_cleanup(void)
+{
+ device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
+ misc_deregister(&ucma_misc);
+ idr_destroy(&ctx_idr);
+}
+
+module_init(ucma_init);
+module_exit(ucma_cleanup);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/ucma.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/ud_header.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/ud_header.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/ud_header.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/if_ether.h>
+
+#include <rdma/ib_pack.h>
+
+#define STRUCT_FIELD(header, field) \
+ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
+ .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \
+ .field_name = #header ":" #field
+
+static const struct ib_field lrh_table[] = {
+ { STRUCT_FIELD(lrh, virtual_lane),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { STRUCT_FIELD(lrh, link_version),
+ .offset_words = 0,
+ .offset_bits = 4,
+ .size_bits = 4 },
+ { STRUCT_FIELD(lrh, service_level),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 4 },
+ { RESERVED,
+ .offset_words = 0,
+ .offset_bits = 12,
+ .size_bits = 2 },
+ { STRUCT_FIELD(lrh, link_next_header),
+ .offset_words = 0,
+ .offset_bits = 14,
+ .size_bits = 2 },
+ { STRUCT_FIELD(lrh, destination_lid),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 5 },
+ { STRUCT_FIELD(lrh, packet_length),
+ .offset_words = 1,
+ .offset_bits = 5,
+ .size_bits = 11 },
+ { STRUCT_FIELD(lrh, source_lid),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 }
+};
+
+static const struct ib_field eth_table[] = {
+ { STRUCT_FIELD(eth, dmac_h),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { STRUCT_FIELD(eth, dmac_l),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(eth, smac_h),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(eth, smac_l),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { STRUCT_FIELD(eth, type),
+ .offset_words = 3,
+ .offset_bits = 0,
+ .size_bits = 16 }
+};
+
+static const struct ib_field vlan_table[] = {
+ { STRUCT_FIELD(vlan, tag),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(vlan, type),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 }
+};
+
+static const struct ib_field grh_table[] = {
+ { STRUCT_FIELD(grh, ip_version),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { STRUCT_FIELD(grh, traffic_class),
+ .offset_words = 0,
+ .offset_bits = 4,
+ .size_bits = 8 },
+ { STRUCT_FIELD(grh, flow_label),
+ .offset_words = 0,
+ .offset_bits = 12,
+ .size_bits = 20 },
+ { STRUCT_FIELD(grh, payload_length),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(grh, next_header),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 8 },
+ { STRUCT_FIELD(grh, hop_limit),
+ .offset_words = 1,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { STRUCT_FIELD(grh, source_gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { STRUCT_FIELD(grh, destination_gid),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 128 }
+};
+
+static const struct ib_field bth_table[] = {
+ { STRUCT_FIELD(bth, opcode),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(bth, solicited_event),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { STRUCT_FIELD(bth, mig_req),
+ .offset_words = 0,
+ .offset_bits = 9,
+ .size_bits = 1 },
+ { STRUCT_FIELD(bth, pad_count),
+ .offset_words = 0,
+ .offset_bits = 10,
+ .size_bits = 2 },
+ { STRUCT_FIELD(bth, transport_header_version),
+ .offset_words = 0,
+ .offset_bits = 12,
+ .size_bits = 4 },
+ { STRUCT_FIELD(bth, pkey),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(bth, destination_qpn),
+ .offset_words = 1,
+ .offset_bits = 8,
+ .size_bits = 24 },
+ { STRUCT_FIELD(bth, ack_req),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 2,
+ .offset_bits = 1,
+ .size_bits = 7 },
+ { STRUCT_FIELD(bth, psn),
+ .offset_words = 2,
+ .offset_bits = 8,
+ .size_bits = 24 }
+};
+
+static const struct ib_field deth_table[] = {
+ { STRUCT_FIELD(deth, qkey),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { RESERVED,
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(deth, source_qpn),
+ .offset_words = 1,
+ .offset_bits = 8,
+ .size_bits = 24 }
+};
+
+/**
+ * ib_ud_header_init - Initialize UD header structure
+ * @payload_bytes:Length of packet payload
+ * @lrh_present: specify if LRH is present
+ * @eth_present: specify if Eth header is present
+ * @vlan_present: packet is tagged vlan
+ * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @immediate_present: specify if immediate data is present
+ * @header:Structure to initialize
+ */
+void ib_ud_header_init(int payload_bytes,
+ int lrh_present,
+ int eth_present,
+ int vlan_present,
+ int grh_present,
+ int immediate_present,
+ struct ib_ud_header *header)
+{
+ u16 packet_length = 0;
+
+ memset(header, 0, sizeof *header);
+
+ if (lrh_present) {
+ header->lrh.link_version = 0;
+ header->lrh.link_next_header =
+ grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
+ packet_length = IB_LRH_BYTES;
+ }
+
+ if (eth_present) {
+ if (vlan_present) {
+ header->eth.type = cpu_to_be16(ETH_P_8021Q);
+ packet_length += IB_VLAN_BYTES;
+
+ }
+ packet_length += IB_ETH_BYTES;
+ }
+
+ packet_length += IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes +
+ 4 + /* ICRC */
+ 3; /* round up */
+ packet_length /= 4;
+ if (grh_present) {
+ packet_length += IB_GRH_BYTES / 4;
+ header->grh.ip_version = 6;
+ header->grh.payload_length =
+ cpu_to_be16((IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4 + /* ICRC */
+ 3) & ~3); /* round up */
+ header->grh.next_header = 0x1b;
+ }
+
+ if (lrh_present)
+ header->lrh.packet_length = cpu_to_be16(packet_length);
+
+ if (immediate_present)
+ header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ else
+ header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ header->bth.pad_count = (4 - payload_bytes) & 3;
+ header->bth.transport_header_version = 0;
+
+ header->lrh_present = lrh_present;
+ header->eth_present = eth_present;
+ header->vlan_present = vlan_present;
+ header->grh_present = grh_present;
+ header->immediate_present = immediate_present;
+}
+EXPORT_SYMBOL(ib_ud_header_init);
+
+/**
+ * ib_lrh_header_pack - Pack LRH header struct into wire format
+ * @lrh:unpacked LRH header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_lrh_header_pack() packs the LRH header structure @lrh into
+ * wire format in the buffer @buf.
+ */
+int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf)
+{
+ ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf);
+ return 0;
+}
+EXPORT_SYMBOL(ib_lrh_header_pack);
+
+/**
+ * ib_lrh_header_unpack - Unpack LRH structure from wire format
+ * @lrh:unpacked LRH header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_lrh_header_unpack() unpacks the LRH header structure from
+ * wire format (in buf) into @lrh.
+ */
+int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh)
+{
+ ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh);
+ return 0;
+}
+EXPORT_SYMBOL(ib_lrh_header_unpack);
+
+/**
+ * ib_ud_header_pack - Pack UD header struct into wire format
+ * @header:UD header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_ud_header_pack() packs the UD header structure @header into wire
+ * format in the buffer @buf.
+ */
+int ib_ud_header_pack(struct ib_ud_header *header,
+ void *buf)
+{
+ int len = 0;
+
+ if (header->lrh_present) {
+ ib_pack(lrh_table, ARRAY_SIZE(lrh_table),
+ &header->lrh, buf + len);
+ len += IB_LRH_BYTES;
+ }
+ if (header->eth_present) {
+ ib_pack(eth_table, ARRAY_SIZE(eth_table),
+ &header->eth, buf + len);
+ len += IB_ETH_BYTES;
+ }
+
+
+ if (header->vlan_present) {
+ ib_pack(vlan_table, ARRAY_SIZE(vlan_table),
+ &header->vlan, buf + len);
+ len += IB_VLAN_BYTES;
+ }
+
+ if (header->grh_present) {
+ ib_pack(grh_table, ARRAY_SIZE(grh_table),
+ &header->grh, buf + len);
+ len += IB_GRH_BYTES;
+ }
+
+ ib_pack(bth_table, ARRAY_SIZE(bth_table),
+ &header->bth, buf + len);
+ len += IB_BTH_BYTES;
+
+ ib_pack(deth_table, ARRAY_SIZE(deth_table),
+ &header->deth, buf + len);
+ len += IB_DETH_BYTES;
+
+ if (header->immediate_present) {
+ memcpy(buf + len, &header->immediate_data, sizeof header->immediate_data);
+ len += sizeof header->immediate_data;
+ }
+
+ return len;
+}
+EXPORT_SYMBOL(ib_ud_header_pack);
+
+/**
+ * ib_ud_header_unpack - Unpack UD header struct from wire format
+ * @header:UD header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_ud_header_pack() unpacks the UD header structure @header from wire
+ * format in the buffer @buf.
+ */
+int ib_ud_header_unpack(void *buf,
+ struct ib_ud_header *header)
+{
+ ib_unpack(lrh_table, ARRAY_SIZE(lrh_table),
+ buf, &header->lrh);
+ buf += IB_LRH_BYTES;
+
+ if (header->lrh.link_version != 0) {
+ printk(KERN_WARNING "Invalid LRH.link_version %d\n",
+ header->lrh.link_version);
+ return -EINVAL;
+ }
+
+ switch (header->lrh.link_next_header) {
+ case IB_LNH_IBA_LOCAL:
+ header->grh_present = 0;
+ break;
+
+ case IB_LNH_IBA_GLOBAL:
+ header->grh_present = 1;
+ ib_unpack(grh_table, ARRAY_SIZE(grh_table),
+ buf, &header->grh);
+ buf += IB_GRH_BYTES;
+
+ if (header->grh.ip_version != 6) {
+ printk(KERN_WARNING "Invalid GRH.ip_version %d\n",
+ header->grh.ip_version);
+ return -EINVAL;
+ }
+ if (header->grh.next_header != 0x1b) {
+ printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n",
+ header->grh.next_header);
+ return -EINVAL;
+ }
+ break;
+
+ default:
+ printk(KERN_WARNING "Invalid LRH.link_next_header %d\n",
+ header->lrh.link_next_header);
+ return -EINVAL;
+ }
+
+ ib_unpack(bth_table, ARRAY_SIZE(bth_table),
+ buf, &header->bth);
+ buf += IB_BTH_BYTES;
+
+ switch (header->bth.opcode) {
+ case IB_OPCODE_UD_SEND_ONLY:
+ header->immediate_present = 0;
+ break;
+ case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE:
+ header->immediate_present = 1;
+ break;
+ default:
+ printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n",
+ header->bth.opcode);
+ return -EINVAL;
+ }
+
+ if (header->bth.transport_header_version != 0) {
+ printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n",
+ header->bth.transport_header_version);
+ return -EINVAL;
+ }
+
+ ib_unpack(deth_table, ARRAY_SIZE(deth_table),
+ buf, &header->deth);
+ buf += IB_DETH_BYTES;
+
+ if (header->immediate_present)
+ memcpy(&header->immediate_data, buf, sizeof header->immediate_data);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_ud_header_unpack);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/ud_header.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/umem.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/umem.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/umem.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,575 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#ifdef __linux__
+#include <linux/hugetlb.h>
+#endif
+#include <linux/dma-attrs.h>
+
+#include <sys/priv.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+
+#include <vm/vm.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_pageout.h>
+
+#include "uverbs.h"
+
+static int allow_weak_ordering;
+module_param(allow_weak_ordering, bool, 0444);
+MODULE_PARM_DESC(allow_weak_ordering, "Allow weak ordering for data registered memory");
+
+#define IB_UMEM_MAX_PAGE_CHUNK \
+ ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
+ ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
+ (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
+
+#ifdef __ia64__
+extern int dma_map_sg_hp_wa;
+
+static int dma_map_sg_ia64(struct ib_device *ibdev,
+ struct scatterlist *sg,
+ int nents,
+ enum dma_data_direction dir)
+{
+ int i, rc, j, lents = 0;
+ struct device *dev;
+
+ if (!dma_map_sg_hp_wa)
+ return ib_dma_map_sg(ibdev, sg, nents, dir);
+
+ dev = ibdev->dma_device;
+ for (i = 0; i < nents; ++i) {
+ rc = dma_map_sg(dev, sg + i, 1, dir);
+ if (rc <= 0) {
+ for (j = 0; j < i; ++j)
+ dma_unmap_sg(dev, sg + j, 1, dir);
+
+ return 0;
+ }
+ lents += rc;
+ }
+
+ return lents;
+}
+
+static void dma_unmap_sg_ia64(struct ib_device *ibdev,
+ struct scatterlist *sg,
+ int nents,
+ enum dma_data_direction dir)
+{
+ int i;
+ struct device *dev;
+
+ if (!dma_map_sg_hp_wa)
+ return ib_dma_unmap_sg(ibdev, sg, nents, dir);
+
+ dev = ibdev->dma_device;
+ for (i = 0; i < nents; ++i)
+ dma_unmap_sg(dev, sg + i, 1, dir);
+}
+
+#define ib_dma_map_sg(dev, sg, nents, dir) dma_map_sg_ia64(dev, sg, nents, dir)
+#define ib_dma_unmap_sg(dev, sg, nents, dir) dma_unmap_sg_ia64(dev, sg, nents, dir)
+
+#endif
+
+static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
+{
+#ifdef __linux__
+ struct ib_umem_chunk *chunk, *tmp;
+ int i;
+
+ list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
+ ib_dma_unmap_sg_attrs(dev, chunk->page_list,
+ chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs);
+ for (i = 0; i < chunk->nents; ++i) {
+ struct page *page = sg_page(&chunk->page_list[i]);
+ if (umem->writable && dirty)
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+ kfree(chunk);
+ }
+#else
+ struct ib_umem_chunk *chunk, *tmp;
+ vm_object_t object;
+ int i;
+
+ object = NULL;
+ list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
+ ib_dma_unmap_sg_attrs(dev, chunk->page_list,
+ chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs);
+ for (i = 0; i < chunk->nents; ++i) {
+ struct page *page = sg_page(&chunk->page_list[i]);
+ if (umem->writable && dirty) {
+ if (object && object != page->object)
+ VM_OBJECT_UNLOCK(object);
+ if (object != page->object) {
+ object = page->object;
+ VM_OBJECT_LOCK(object);
+ }
+ vm_page_dirty(page);
+ }
+ }
+ kfree(chunk);
+ }
+ if (object)
+ VM_OBJECT_UNLOCK(object);
+
+#endif
+}
+
+/**
+ * ib_umem_get - Pin and DMA map userspace memory.
+ * @context: userspace context to pin memory for
+ * @addr: userspace virtual address to start at
+ * @size: length of region to pin
+ * @access: IB_ACCESS_xxx flags for memory being pinned
+ * @dmasync: flush in-flight DMA when the memory region is written
+ */
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+ size_t size, int access, int dmasync)
+{
+#ifdef __linux__
+ struct ib_umem *umem;
+ struct page **page_list;
+ struct vm_area_struct **vma_list;
+ struct ib_umem_chunk *chunk;
+ unsigned long locked;
+ unsigned long lock_limit;
+ unsigned long cur_base;
+ unsigned long npages;
+ int ret;
+ int off;
+ int i;
+ DEFINE_DMA_ATTRS(attrs);
+
+ if (dmasync)
+ dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
+ else if (allow_weak_ordering)
+ dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs);
+
+ if (!can_do_mlock())
+ return ERR_PTR(-EPERM);
+
+ umem = kmalloc(sizeof *umem, GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->context = context;
+ umem->length = size;
+ umem->offset = addr & ~PAGE_MASK;
+ umem->page_size = PAGE_SIZE;
+ /*
+ * We ask for writable memory if any access flags other than
+ * "remote read" are set. "Local write" and "remote write"
+ * obviously require write access. "Remote atomic" can do
+ * things like fetch and add, which will modify memory, and
+ * "MW bind" can change permissions by binding a window.
+ */
+ umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
+
+ /* We assume the memory is from hugetlb until proved otherwise */
+ umem->hugetlb = 1;
+
+ INIT_LIST_HEAD(&umem->chunk_list);
+
+ page_list = (struct page **) __get_free_page(GFP_KERNEL);
+ if (!page_list) {
+ kfree(umem);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /*
+ * if we can't alloc the vma_list, it's not so bad;
+ * just assume the memory is not hugetlb memory
+ */
+ vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
+ if (!vma_list)
+ umem->hugetlb = 0;
+
+ npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
+
+ down_write(¤t->mm->mmap_sem);
+
+ locked = npages + current->mm->locked_vm;
+ lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ cur_base = addr & PAGE_MASK;
+
+ ret = 0;
+
+ while (npages) {
+ ret = get_user_pages(current, current->mm, cur_base,
+ min_t(unsigned long, npages,
+ PAGE_SIZE / sizeof (struct page *)),
+ 1, !umem->writable, page_list, vma_list);
+
+ if (ret < 0)
+ goto out;
+
+ cur_base += ret * PAGE_SIZE;
+ npages -= ret;
+
+ off = 0;
+
+ while (ret) {
+ chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
+ min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
+ GFP_KERNEL);
+ if (!chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ chunk->attrs = attrs;
+ chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
+ sg_init_table(chunk->page_list, chunk->nents);
+ for (i = 0; i < chunk->nents; ++i) {
+ if (vma_list &&
+ !is_vm_hugetlb_page(vma_list[i + off]))
+ umem->hugetlb = 0;
+ sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
+ }
+
+ chunk->nmap = ib_dma_map_sg_attrs(context->device,
+ &chunk->page_list[0],
+ chunk->nents,
+ DMA_BIDIRECTIONAL,
+ &attrs);
+ if (chunk->nmap <= 0) {
+ for (i = 0; i < chunk->nents; ++i)
+ put_page(sg_page(&chunk->page_list[i]));
+ kfree(chunk);
+
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret -= chunk->nents;
+ off += chunk->nents;
+ list_add_tail(&chunk->list, &umem->chunk_list);
+ }
+
+ ret = 0;
+ }
+
+out:
+ if (ret < 0) {
+ __ib_umem_release(context->device, umem, 0);
+ kfree(umem);
+ } else
+ current->mm->locked_vm = locked;
+
+ up_write(¤t->mm->mmap_sem);
+ if (vma_list)
+ free_page((unsigned long) vma_list);
+ free_page((unsigned long) page_list);
+
+ return ret < 0 ? ERR_PTR(ret) : umem;
+#else
+ struct ib_umem *umem;
+ struct ib_umem_chunk *chunk;
+ struct proc *proc;
+ pmap_t pmap;
+ vm_offset_t end, last, start;
+ vm_size_t npages;
+ int error;
+ int ents;
+ int ret;
+ int i;
+ DEFINE_DMA_ATTRS(attrs);
+
+ error = priv_check(curthread, PRIV_VM_MLOCK);
+ if (error)
+ return ERR_PTR(-error);
+
+ last = addr + size;
+ start = addr & PAGE_MASK; /* Use the linux PAGE_MASK definition. */
+ end = roundup2(last, PAGE_SIZE); /* Use PAGE_MASK safe operation. */
+ if (last < addr || end < addr)
+ return ERR_PTR(-EINVAL);
+ npages = atop(end - start);
+ if (npages > vm_page_max_wired)
+ return ERR_PTR(-ENOMEM);
+ umem = kzalloc(sizeof *umem, GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+ proc = curthread->td_proc;
+ PROC_LOCK(proc);
+ if (ptoa(npages +
+ pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))) >
+ lim_cur(proc, RLIMIT_MEMLOCK)) {
+ PROC_UNLOCK(proc);
+ kfree(umem);
+ return ERR_PTR(-ENOMEM);
+ }
+ PROC_UNLOCK(proc);
+ if (npages + cnt.v_wire_count > vm_page_max_wired) {
+ kfree(umem);
+ return ERR_PTR(-EAGAIN);
+ }
+ error = vm_map_wire(&proc->p_vmspace->vm_map, start, end,
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES |
+ (umem->writable ? VM_MAP_WIRE_WRITE : 0));
+ if (error != KERN_SUCCESS) {
+ kfree(umem);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ umem->context = context;
+ umem->length = size;
+ umem->offset = addr & ~PAGE_MASK;
+ umem->page_size = PAGE_SIZE;
+ umem->start = addr;
+ /*
+ * We ask for writable memory if any access flags other than
+ * "remote read" are set. "Local write" and "remote write"
+ * obviously require write access. "Remote atomic" can do
+ * things like fetch and add, which will modify memory, and
+ * "MW bind" can change permissions by binding a window.
+ */
+ umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
+ umem->hugetlb = 0;
+ INIT_LIST_HEAD(&umem->chunk_list);
+
+ pmap = vm_map_pmap(&proc->p_vmspace->vm_map);
+ ret = 0;
+ while (npages) {
+ ents = min_t(int, npages, IB_UMEM_MAX_PAGE_CHUNK);
+ chunk = kmalloc(sizeof(*chunk) +
+ (sizeof(struct scatterlist) * ents),
+ GFP_KERNEL);
+ if (!chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ chunk->attrs = attrs;
+ chunk->nents = ents;
+ sg_init_table(&chunk->page_list[0], ents);
+ for (i = 0; i < chunk->nents; ++i) {
+ vm_paddr_t pa;
+
+ pa = pmap_extract(pmap, start);
+ if (pa == 0) {
+ ret = -ENOMEM;
+ kfree(chunk);
+ goto out;
+ }
+ sg_set_page(&chunk->page_list[i], PHYS_TO_VM_PAGE(pa),
+ PAGE_SIZE, 0);
+ npages--;
+ start += PAGE_SIZE;
+ }
+
+ chunk->nmap = ib_dma_map_sg_attrs(context->device,
+ &chunk->page_list[0],
+ chunk->nents,
+ DMA_BIDIRECTIONAL,
+ &attrs);
+ if (chunk->nmap != chunk->nents) {
+ kfree(chunk);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add_tail(&chunk->list, &umem->chunk_list);
+ }
+
+out:
+ if (ret < 0) {
+ __ib_umem_release(context->device, umem, 0);
+ kfree(umem);
+ }
+
+ return ret < 0 ? ERR_PTR(ret) : umem;
+#endif
+}
+EXPORT_SYMBOL(ib_umem_get);
+
+#ifdef __linux__
+static void ib_umem_account(struct work_struct *work)
+{
+ struct ib_umem *umem = container_of(work, struct ib_umem, work);
+
+ down_write(&umem->mm->mmap_sem);
+ umem->mm->locked_vm -= umem->diff;
+ up_write(&umem->mm->mmap_sem);
+ mmput(umem->mm);
+ kfree(umem);
+}
+#endif
+
+/**
+ * ib_umem_release - release memory pinned with ib_umem_get
+ * @umem: umem struct to release
+ */
+void ib_umem_release(struct ib_umem *umem)
+{
+#ifdef __linux__
+ struct ib_ucontext *context = umem->context;
+ struct mm_struct *mm;
+ unsigned long diff;
+
+ __ib_umem_release(umem->context->device, umem, 1);
+
+ mm = get_task_mm(current);
+ if (!mm) {
+ kfree(umem);
+ return;
+ }
+
+ diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+
+ /*
+ * We may be called with the mm's mmap_sem already held. This
+ * can happen when a userspace munmap() is the call that drops
+ * the last reference to our file and calls our release
+ * method. If there are memory regions to destroy, we'll end
+ * up here and not be able to take the mmap_sem. In that case
+ * we defer the vm_locked accounting to the system workqueue.
+ */
+ if (context->closing) {
+ if (!down_write_trylock(&mm->mmap_sem)) {
+ INIT_WORK(&umem->work, ib_umem_account);
+ umem->mm = mm;
+ umem->diff = diff;
+
+ schedule_work(&umem->work);
+ return;
+ }
+ } else
+ down_write(&mm->mmap_sem);
+
+ current->mm->locked_vm -= diff;
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+#else
+ vm_offset_t addr, end, last, start;
+ vm_size_t size;
+ int error;
+
+ __ib_umem_release(umem->context->device, umem, 1);
+ if (umem->context->closing) {
+ kfree(umem);
+ return;
+ }
+ error = priv_check(curthread, PRIV_VM_MUNLOCK);
+ if (error)
+ return;
+ addr = umem->start;
+ size = umem->length;
+ last = addr + size;
+ start = addr & PAGE_MASK; /* Use the linux PAGE_MASK definition. */
+ end = roundup2(last, PAGE_SIZE); /* Use PAGE_MASK safe operation. */
+ vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map, start, end,
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
+
+#endif
+ kfree(umem);
+}
+EXPORT_SYMBOL(ib_umem_release);
+
+int ib_umem_page_count(struct ib_umem *umem)
+{
+ struct ib_umem_chunk *chunk;
+ int shift;
+ int i;
+ int n;
+
+ shift = ilog2(umem->page_size);
+
+ n = 0;
+ list_for_each_entry(chunk, &umem->chunk_list, list)
+ for (i = 0; i < chunk->nmap; ++i)
+ n += sg_dma_len(&chunk->page_list[i]) >> shift;
+
+ return n;
+}
+EXPORT_SYMBOL(ib_umem_page_count);
+
+/**********************************************/
+/*
+ * Stub functions for contiguous pages -
+ * We currently do not support this feature
+ */
+/**********************************************/
+
+/**
+ * ib_cmem_release_contiguous_pages - release memory allocated by
+ * ib_cmem_alloc_contiguous_pages.
+ * @cmem: cmem struct to release
+ */
+void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem)
+{
+}
+EXPORT_SYMBOL(ib_cmem_release_contiguous_pages);
+
+/**
+ * * ib_cmem_alloc_contiguous_pages - allocate contiguous pages
+ * * @context: userspace context to allocate memory for
+ * * @total_size: total required size for that allocation.
+ * * @page_size_order: order of one contiguous page.
+ * */
+struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context,
+ unsigned long total_size,
+ unsigned long page_size_order)
+{
+ return NULL;
+}
+EXPORT_SYMBOL(ib_cmem_alloc_contiguous_pages);
+
+/**
+ * * ib_cmem_map_contiguous_pages_to_vma - map contiguous pages into VMA
+ * * @ib_cmem: cmem structure returned by ib_cmem_alloc_contiguous_pages
+ * * @vma: VMA to inject pages into.
+ * */
+int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem,
+ struct vm_area_struct *vma)
+{
+ return 0;
+}
+EXPORT_SYMBOL(ib_cmem_map_contiguous_pages_to_vma);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/umem.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/user_mad.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/user_mad.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/user_mad.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,1224 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2008 Cisco. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/dma-mapping.h>
+#include <linux/poll.h>
+#include <linux/mutex.h>
+#include <linux/kref.h>
+#include <linux/compat.h>
+#include <linux/semaphore.h>
+
+#include <asm/uaccess.h>
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_user_mad.h>
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
+MODULE_LICENSE("Dual BSD/GPL");
+
+enum {
+ IB_UMAD_MAX_PORTS = 64,
+ IB_UMAD_MAX_AGENTS = 32,
+
+ IB_UMAD_MAJOR = 231,
+ IB_UMAD_MINOR_BASE = 0
+};
+
+/*
+ * Our lifetime rules for these structs are the following: each time a
+ * device special file is opened, we look up the corresponding struct
+ * ib_umad_port by minor in the umad_port[] table while holding the
+ * port_lock. If this lookup succeeds, we take a reference on the
+ * ib_umad_port's struct ib_umad_device while still holding the
+ * port_lock; if the lookup fails, we fail the open(). We drop these
+ * references in the corresponding close().
+ *
+ * In addition to references coming from open character devices, there
+ * is one more reference to each ib_umad_device representing the
+ * module's reference taken when allocating the ib_umad_device in
+ * ib_umad_add_one().
+ *
+ * When destroying an ib_umad_device, we clear all of its
+ * ib_umad_ports from umad_port[] while holding port_lock before
+ * dropping the module's reference to the ib_umad_device. This is
+ * always safe because any open() calls will either succeed and obtain
+ * a reference before we clear the umad_port[] entries, or fail after
+ * we clear the umad_port[] entries.
+ */
+
+struct ib_umad_port {
+ struct cdev *cdev;
+ struct device *dev;
+
+ struct cdev *sm_cdev;
+ struct device *sm_dev;
+ struct semaphore sm_sem;
+
+ struct mutex file_mutex;
+ struct list_head file_list;
+
+ struct ib_device *ib_dev;
+ struct ib_umad_device *umad_dev;
+ int dev_num;
+ u8 port_num;
+};
+
+struct ib_umad_device {
+ int start_port, end_port;
+ struct kref ref;
+ struct ib_umad_port port[0];
+};
+
+struct ib_umad_file {
+ struct mutex mutex;
+ struct ib_umad_port *port;
+ struct file *filp;
+ struct list_head recv_list;
+ struct list_head send_list;
+ struct list_head port_list;
+ spinlock_t send_lock;
+ wait_queue_head_t recv_wait;
+ struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
+ int agents_dead;
+ u8 use_pkey_index;
+ u8 already_used;
+};
+
+struct ib_umad_packet {
+ struct ib_mad_send_buf *msg;
+ struct ib_mad_recv_wc *recv_wc;
+ struct list_head list;
+ int length;
+ struct ib_user_mad mad;
+};
+
+static struct class *umad_class;
+
+static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+
+static DEFINE_SPINLOCK(port_lock);
+static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
+static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
+
+static void ib_umad_add_one(struct ib_device *device);
+static void ib_umad_remove_one(struct ib_device *device);
+
+static void ib_umad_release_dev(struct kref *ref)
+{
+ struct ib_umad_device *dev =
+ container_of(ref, struct ib_umad_device, ref);
+
+ kfree(dev);
+}
+
+static int hdr_size(struct ib_umad_file *file)
+{
+ return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
+ sizeof (struct ib_user_mad_hdr_old);
+}
+
+/* caller must hold file->mutex */
+static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
+{
+ return file->agents_dead ? NULL : file->agent[id];
+}
+
+static int queue_packet(struct ib_umad_file *file,
+ struct ib_mad_agent *agent,
+ struct ib_umad_packet *packet)
+{
+ int ret = 1;
+
+ mutex_lock(&file->mutex);
+
+ for (packet->mad.hdr.id = 0;
+ packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
+ packet->mad.hdr.id++)
+ if (agent == __get_agent(file, packet->mad.hdr.id)) {
+ list_add_tail(&packet->list, &file->recv_list);
+ selwakeup(&file->filp->f_selinfo);
+ wake_up_interruptible(&file->recv_wait);
+ ret = 0;
+ break;
+ }
+
+ mutex_unlock(&file->mutex);
+
+ return ret;
+}
+
+static void dequeue_send(struct ib_umad_file *file,
+ struct ib_umad_packet *packet)
+{
+ spin_lock_irq(&file->send_lock);
+ list_del(&packet->list);
+ spin_unlock_irq(&file->send_lock);
+}
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *send_wc)
+{
+ struct ib_umad_file *file = agent->context;
+ struct ib_umad_packet *packet = send_wc->send_buf->context[0];
+
+ dequeue_send(file, packet);
+ ib_destroy_ah(packet->msg->ah);
+ ib_free_send_mad(packet->msg);
+
+ if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
+ packet->length = IB_MGMT_MAD_HDR;
+ packet->mad.hdr.status = ETIMEDOUT;
+ if (!queue_packet(file, agent, packet))
+ return;
+ }
+ kfree(packet);
+}
+
+static void recv_handler(struct ib_mad_agent *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_umad_file *file = agent->context;
+ struct ib_umad_packet *packet;
+
+ if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
+ goto err1;
+
+ packet = kzalloc(sizeof *packet, GFP_KERNEL);
+ if (!packet)
+ goto err1;
+
+ packet->length = mad_recv_wc->mad_len;
+ packet->recv_wc = mad_recv_wc;
+
+ packet->mad.hdr.status = 0;
+ packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len;
+ packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
+ packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
+ packet->mad.hdr.sl = mad_recv_wc->wc->sl;
+ packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
+ packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
+ packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
+ if (packet->mad.hdr.grh_present) {
+ struct ib_ah_attr ah_attr;
+
+ ib_init_ah_from_wc(agent->device, agent->port_num,
+ mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+
+ packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
+ packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
+ packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
+ memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
+ packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
+ }
+
+ if (queue_packet(file, agent, packet))
+ goto err2;
+ return;
+
+err2:
+ kfree(packet);
+err1:
+ ib_free_recv_mad(mad_recv_wc);
+}
+
+static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf,
+ struct ib_umad_packet *packet, size_t count)
+{
+ struct ib_mad_recv_buf *recv_buf;
+ int left, seg_payload, offset, max_seg_payload;
+
+ /* We need enough room to copy the first (or only) MAD segment. */
+ recv_buf = &packet->recv_wc->recv_buf;
+ if ((packet->length <= sizeof (*recv_buf->mad) &&
+ count < hdr_size(file) + packet->length) ||
+ (packet->length > sizeof (*recv_buf->mad) &&
+ count < hdr_size(file) + sizeof (*recv_buf->mad)))
+ return -EINVAL;
+
+ if (copy_to_user(buf, &packet->mad, hdr_size(file)))
+ return -EFAULT;
+
+ buf += hdr_size(file);
+ seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
+ if (copy_to_user(buf, recv_buf->mad, seg_payload))
+ return -EFAULT;
+
+ if (seg_payload < packet->length) {
+ /*
+ * Multipacket RMPP MAD message. Copy remainder of message.
+ * Note that last segment may have a shorter payload.
+ */
+ if (count < hdr_size(file) + packet->length) {
+ /*
+ * The buffer is too small, return the first RMPP segment,
+ * which includes the RMPP message length.
+ */
+ return -ENOSPC;
+ }
+ offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class);
+ max_seg_payload = sizeof (struct ib_mad) - offset;
+
+ for (left = packet->length - seg_payload, buf += seg_payload;
+ left; left -= seg_payload, buf += seg_payload) {
+ recv_buf = container_of(recv_buf->list.next,
+ struct ib_mad_recv_buf, list);
+ seg_payload = min(left, max_seg_payload);
+ if (copy_to_user(buf, ((void *) recv_buf->mad) + offset,
+ seg_payload))
+ return -EFAULT;
+ }
+ }
+ return hdr_size(file) + packet->length;
+}
+
+static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf,
+ struct ib_umad_packet *packet, size_t count)
+{
+ ssize_t size = hdr_size(file) + packet->length;
+
+ if (count < size)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &packet->mad, hdr_size(file)))
+ return -EFAULT;
+
+ buf += hdr_size(file);
+
+ if (copy_to_user(buf, packet->mad.data, packet->length))
+ return -EFAULT;
+
+ return size;
+}
+
+static ssize_t ib_umad_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_umad_file *file = filp->private_data;
+ struct ib_umad_packet *packet;
+ ssize_t ret;
+
+ if (count < hdr_size(file))
+ return -EINVAL;
+
+ mutex_lock(&file->mutex);
+
+ while (list_empty(&file->recv_list)) {
+ mutex_unlock(&file->mutex);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->recv_wait,
+ !list_empty(&file->recv_list)))
+ return -ERESTARTSYS;
+
+ mutex_lock(&file->mutex);
+ }
+
+ packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
+ list_del(&packet->list);
+
+ mutex_unlock(&file->mutex);
+
+ if (packet->recv_wc)
+ ret = copy_recv_mad(file, buf, packet, count);
+ else
+ ret = copy_send_mad(file, buf, packet, count);
+
+ if (ret < 0) {
+ /* Requeue packet */
+ mutex_lock(&file->mutex);
+ list_add(&packet->list, &file->recv_list);
+ mutex_unlock(&file->mutex);
+ } else {
+ if (packet->recv_wc)
+ ib_free_recv_mad(packet->recv_wc);
+ kfree(packet);
+ }
+ return ret;
+}
+
+static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf)
+{
+ int left, seg;
+
+ /* Copy class specific header */
+ if ((msg->hdr_len > IB_MGMT_RMPP_HDR) &&
+ copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR,
+ msg->hdr_len - IB_MGMT_RMPP_HDR))
+ return -EFAULT;
+
+ /* All headers are in place. Copy data segments. */
+ for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0;
+ seg++, left -= msg->seg_size, buf += msg->seg_size) {
+ if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf,
+ min(left, msg->seg_size)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static int same_destination(struct ib_user_mad_hdr *hdr1,
+ struct ib_user_mad_hdr *hdr2)
+{
+ if (!hdr1->grh_present && !hdr2->grh_present)
+ return (hdr1->lid == hdr2->lid);
+
+ if (hdr1->grh_present && hdr2->grh_present)
+ return !memcmp(hdr1->gid, hdr2->gid, 16);
+
+ return 0;
+}
+
+static int is_duplicate(struct ib_umad_file *file,
+ struct ib_umad_packet *packet)
+{
+ struct ib_umad_packet *sent_packet;
+ struct ib_mad_hdr *sent_hdr, *hdr;
+
+ hdr = (struct ib_mad_hdr *) packet->mad.data;
+ list_for_each_entry(sent_packet, &file->send_list, list) {
+ sent_hdr = (struct ib_mad_hdr *) sent_packet->mad.data;
+
+ if ((hdr->tid != sent_hdr->tid) ||
+ (hdr->mgmt_class != sent_hdr->mgmt_class))
+ continue;
+
+ /*
+ * No need to be overly clever here. If two new operations have
+ * the same TID, reject the second as a duplicate. This is more
+ * restrictive than required by the spec.
+ */
+ if (!ib_response_mad((struct ib_mad *) hdr)) {
+ if (!ib_response_mad((struct ib_mad *) sent_hdr))
+ return 1;
+ continue;
+ } else if (!ib_response_mad((struct ib_mad *) sent_hdr))
+ continue;
+
+ if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr))
+ return 1;
+ }
+
+ return 0;
+}
+
+static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_umad_file *file = filp->private_data;
+ struct ib_umad_packet *packet;
+ struct ib_mad_agent *agent;
+ struct ib_ah_attr ah_attr;
+ struct ib_ah *ah;
+ struct ib_rmpp_mad *rmpp_mad;
+ __be64 *tid;
+ int ret, data_len, hdr_len, copy_offset, rmpp_active;
+
+ if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
+ return -EINVAL;
+
+ packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
+ if (!packet)
+ return -ENOMEM;
+
+ if (copy_from_user(&packet->mad, buf, hdr_size(file))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ if (packet->mad.hdr.id < 0 ||
+ packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ buf += hdr_size(file);
+
+ if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ mutex_lock(&file->mutex);
+
+ agent = __get_agent(file, packet->mad.hdr.id);
+ if (!agent) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ memset(&ah_attr, 0, sizeof ah_attr);
+ ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid);
+ ah_attr.sl = packet->mad.hdr.sl;
+ ah_attr.src_path_bits = packet->mad.hdr.path_bits;
+ ah_attr.port_num = file->port->port_num;
+ if (packet->mad.hdr.grh_present) {
+ ah_attr.ah_flags = IB_AH_GRH;
+ memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
+ ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
+ ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
+ ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
+ ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
+ }
+
+ ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
+ goto err_up;
+ }
+
+ rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
+ hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
+ if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) {
+ copy_offset = IB_MGMT_MAD_HDR;
+ rmpp_active = 0;
+ } else {
+ copy_offset = IB_MGMT_RMPP_HDR;
+ rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE;
+ }
+
+ data_len = count - hdr_size(file) - hdr_len;
+ packet->msg = ib_create_send_mad(agent,
+ be32_to_cpu(packet->mad.hdr.qpn),
+ packet->mad.hdr.pkey_index, rmpp_active,
+ hdr_len, data_len, GFP_KERNEL);
+ if (IS_ERR(packet->msg)) {
+ ret = PTR_ERR(packet->msg);
+ goto err_ah;
+ }
+
+ packet->msg->ah = ah;
+ packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
+ packet->msg->retries = packet->mad.hdr.retries;
+ packet->msg->context[0] = packet;
+
+ /* Copy MAD header. Any RMPP header is already in place. */
+ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
+
+ if (!rmpp_active) {
+ if (copy_from_user(packet->msg->mad + copy_offset,
+ buf + copy_offset,
+ hdr_len + data_len - copy_offset)) {
+ ret = -EFAULT;
+ goto err_msg;
+ }
+ } else {
+ ret = copy_rmpp_mad(packet->msg, buf);
+ if (ret)
+ goto err_msg;
+ }
+
+ /*
+ * Set the high-order part of the transaction ID to make MADs from
+ * different agents unique, and allow routing responses back to the
+ * original requestor.
+ */
+ if (!ib_response_mad(packet->msg->mad)) {
+ tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
+ *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
+ (be64_to_cpup(tid) & 0xffffffff));
+ rmpp_mad->mad_hdr.tid = *tid;
+ }
+
+ spin_lock_irq(&file->send_lock);
+ ret = is_duplicate(file, packet);
+ if (!ret)
+ list_add_tail(&packet->list, &file->send_list);
+ spin_unlock_irq(&file->send_lock);
+ if (ret) {
+ ret = -EINVAL;
+ goto err_msg;
+ }
+
+ ret = ib_post_send_mad(packet->msg, NULL);
+ if (ret)
+ goto err_send;
+
+ mutex_unlock(&file->mutex);
+ return count;
+
+err_send:
+ dequeue_send(file, packet);
+err_msg:
+ ib_free_send_mad(packet->msg);
+err_ah:
+ ib_destroy_ah(ah);
+err_up:
+ mutex_unlock(&file->mutex);
+err:
+ kfree(packet);
+ return ret;
+}
+
+static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
+{
+ struct ib_umad_file *file = filp->private_data;
+
+ /* we will always be able to post a MAD send */
+ unsigned int mask = POLLOUT | POLLWRNORM;
+
+ poll_wait(filp, &file->recv_wait, wait);
+
+ if (!list_empty(&file->recv_list))
+ mask |= POLLIN | POLLRDNORM;
+
+ return mask;
+}
+
+static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
+ int compat_method_mask)
+{
+ struct ib_user_mad_reg_req ureq;
+ struct ib_mad_reg_req req;
+ struct ib_mad_agent *agent = NULL;
+ int agent_id;
+ int ret;
+
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
+
+ if (!file->port->ib_dev) {
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (copy_from_user(&ureq, arg, sizeof ureq)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (ureq.qpn != 0 && ureq.qpn != 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
+ if (!__get_agent(file, agent_id))
+ goto found;
+
+ ret = -ENOMEM;
+ goto out;
+
+found:
+ if (ureq.mgmt_class) {
+ req.mgmt_class = ureq.mgmt_class;
+ req.mgmt_class_version = ureq.mgmt_class_version;
+ memcpy(req.oui, ureq.oui, sizeof req.oui);
+
+ if (compat_method_mask) {
+ u32 *umm = (u32 *) ureq.method_mask;
+ int i;
+
+ for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i)
+ req.method_mask[i] =
+ umm[i * 2] | ((u64) umm[i * 2 + 1] << 32);
+ } else
+ memcpy(req.method_mask, ureq.method_mask,
+ sizeof req.method_mask);
+ }
+
+ agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
+ ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
+ ureq.mgmt_class ? &req : NULL,
+ ureq.rmpp_version,
+ send_handler, recv_handler, file);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ agent = NULL;
+ goto out;
+ }
+
+ if (put_user(agent_id,
+ (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (!file->already_used) {
+ file->already_used = 1;
+ if (!file->use_pkey_index) {
+ printk(KERN_WARNING "user_mad: process %s did not enable "
+ "P_Key index support.\n", curproc->p_comm);
+ printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt "
+ "has info on the new ABI.\n");
+ }
+ }
+
+ file->agent[agent_id] = agent;
+ ret = 0;
+
+out:
+ mutex_unlock(&file->mutex);
+
+ if (ret && agent)
+ ib_unregister_mad_agent(agent);
+
+ mutex_unlock(&file->port->file_mutex);
+
+ return ret;
+}
+
+static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
+{
+ struct ib_mad_agent *agent = NULL;
+ u32 id;
+ int ret = 0;
+
+ if (get_user(id, arg))
+ return -EFAULT;
+
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
+
+ if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ agent = file->agent[id];
+ file->agent[id] = NULL;
+
+out:
+ mutex_unlock(&file->mutex);
+
+ if (agent)
+ ib_unregister_mad_agent(agent);
+
+ mutex_unlock(&file->port->file_mutex);
+
+ return ret;
+}
+
+static long ib_umad_enable_pkey(struct ib_umad_file *file)
+{
+ int ret = 0;
+
+ mutex_lock(&file->mutex);
+ if (file->already_used)
+ ret = -EINVAL;
+ else
+ file->use_pkey_index = 1;
+ mutex_unlock(&file->mutex);
+
+ return ret;
+}
+
+static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ switch (cmd) {
+ case IB_USER_MAD_REGISTER_AGENT:
+ return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0);
+ case IB_USER_MAD_UNREGISTER_AGENT:
+ return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
+ case IB_USER_MAD_ENABLE_PKEY:
+ return ib_umad_enable_pkey(filp->private_data);
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ switch (cmd) {
+ case IB_USER_MAD_REGISTER_AGENT:
+ return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1);
+ case IB_USER_MAD_UNREGISTER_AGENT:
+ return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
+ case IB_USER_MAD_ENABLE_PKEY:
+ return ib_umad_enable_pkey(filp->private_data);
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+#endif
+
+/*
+ * ib_umad_open() does not need the BKL:
+ *
+ * - umad_port[] accesses are protected by port_lock, the
+ * ib_umad_port structures are properly reference counted, and
+ * everything else is purely local to the file being created, so
+ * races against other open calls are not a problem;
+ * - the ioctl method does not affect any global state outside of the
+ * file structure being operated on;
+ * - the port is added to umad_port[] as the last part of module
+ * initialization so the open method will either immediately run
+ * -ENXIO, or all required initialization will be done.
+ */
+static int ib_umad_open(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_port *port;
+ struct ib_umad_file *file;
+ int ret = 0;
+
+ spin_lock(&port_lock);
+ port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
+ if (port)
+ kref_get(&port->umad_dev->ref);
+ spin_unlock(&port_lock);
+
+ if (!port)
+ return -ENXIO;
+
+ mutex_lock(&port->file_mutex);
+
+ if (!port->ib_dev) {
+ ret = -ENXIO;
+ goto out;
+ }
+
+ file = kzalloc(sizeof *file, GFP_KERNEL);
+ if (!file) {
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_init(&file->mutex);
+ spin_lock_init(&file->send_lock);
+ INIT_LIST_HEAD(&file->recv_list);
+ INIT_LIST_HEAD(&file->send_list);
+ init_waitqueue_head(&file->recv_wait);
+
+ file->port = port;
+ file->filp = filp;
+ filp->private_data = file;
+
+ list_add_tail(&file->port_list, &port->file_list);
+
+out:
+ mutex_unlock(&port->file_mutex);
+ return ret;
+}
+
+static int ib_umad_close(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_file *file = filp->private_data;
+ struct ib_umad_device *dev = file->port->umad_dev;
+ struct ib_umad_packet *packet, *tmp;
+ int already_dead;
+ int i;
+
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
+
+ already_dead = file->agents_dead;
+ file->agents_dead = 1;
+
+ list_for_each_entry_safe(packet, tmp, &file->recv_list, list) {
+ if (packet->recv_wc)
+ ib_free_recv_mad(packet->recv_wc);
+ kfree(packet);
+ }
+
+ list_del(&file->port_list);
+
+ mutex_unlock(&file->mutex);
+
+ if (!already_dead)
+ for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
+ if (file->agent[i])
+ ib_unregister_mad_agent(file->agent[i]);
+
+ mutex_unlock(&file->port->file_mutex);
+
+ kfree(file);
+ kref_put(&dev->ref, ib_umad_release_dev);
+
+ return 0;
+}
+
+static const struct file_operations umad_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_umad_read,
+ .write = ib_umad_write,
+ .poll = ib_umad_poll,
+ .unlocked_ioctl = ib_umad_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ib_umad_compat_ioctl,
+#endif
+ .open = ib_umad_open,
+ .release = ib_umad_close
+};
+
+static int ib_umad_sm_open(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_port *port;
+ struct ib_port_modify props = {
+ .set_port_cap_mask = IB_PORT_SM
+ };
+ int ret;
+
+ spin_lock(&port_lock);
+ port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
+ if (port)
+ kref_get(&port->umad_dev->ref);
+ spin_unlock(&port_lock);
+
+ if (!port)
+ return -ENXIO;
+
+ if (filp->f_flags & O_NONBLOCK) {
+ if (down_trylock(&port->sm_sem)) {
+ ret = -EAGAIN;
+ goto fail;
+ }
+ } else {
+ if (down_interruptible(&port->sm_sem)) {
+ ret = -ERESTARTSYS;
+ goto fail;
+ }
+ }
+
+ ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+ if (ret) {
+ up(&port->sm_sem);
+ goto fail;
+ }
+
+ filp->private_data = port;
+
+ return 0;
+
+fail:
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ return ret;
+}
+
+static int ib_umad_sm_close(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_port *port = filp->private_data;
+ struct ib_port_modify props = {
+ .clr_port_cap_mask = IB_PORT_SM
+ };
+ int ret = 0;
+
+ mutex_lock(&port->file_mutex);
+ if (port->ib_dev)
+ ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+ mutex_unlock(&port->file_mutex);
+
+ up(&port->sm_sem);
+
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+
+ return ret;
+}
+
+static const struct file_operations umad_sm_fops = {
+ .owner = THIS_MODULE,
+ .open = ib_umad_sm_open,
+ .release = ib_umad_sm_close
+};
+
+static struct ib_client umad_client = {
+ .name = "umad",
+ .add = ib_umad_add_one,
+ .remove = ib_umad_remove_one
+};
+
+static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct ib_umad_port *port = dev_get_drvdata(dev);
+
+ if (!port)
+ return -ENODEV;
+
+ return sprintf(buf, "%s\n", port->ib_dev->name);
+}
+static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static ssize_t show_port(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct ib_umad_port *port = dev_get_drvdata(dev);
+
+ if (!port)
+ return -ENODEV;
+
+ return sprintf(buf, "%d\n", port->port_num);
+}
+static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
+
+static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int ib_umad_init_port(struct ib_device *device, int port_num,
+ struct ib_umad_port *port)
+{
+ spin_lock(&port_lock);
+ port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+ if (port->dev_num >= IB_UMAD_MAX_PORTS) {
+ spin_unlock(&port_lock);
+ return -1;
+ }
+ set_bit(port->dev_num, dev_map);
+ spin_unlock(&port_lock);
+
+ port->ib_dev = device;
+ port->port_num = port_num;
+ init_MUTEX(&port->sm_sem);
+ mutex_init(&port->file_mutex);
+ INIT_LIST_HEAD(&port->file_list);
+
+ port->cdev = cdev_alloc();
+ if (!port->cdev)
+ return -1;
+ port->cdev->owner = THIS_MODULE;
+ port->cdev->ops = &umad_fops;
+ kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num);
+ if (cdev_add(port->cdev, base_dev + port->dev_num, 1))
+ goto err_cdev;
+
+ port->dev = device_create(umad_class, device->dma_device,
+ port->cdev->dev, port,
+ "umad%d", port->dev_num);
+ if (IS_ERR(port->dev))
+ goto err_cdev;
+
+ if (device_create_file(port->dev, &dev_attr_ibdev))
+ goto err_dev;
+ if (device_create_file(port->dev, &dev_attr_port))
+ goto err_dev;
+
+ port->sm_cdev = cdev_alloc();
+ if (!port->sm_cdev)
+ goto err_dev;
+ port->sm_cdev->owner = THIS_MODULE;
+ port->sm_cdev->ops = &umad_sm_fops;
+ kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num);
+ if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
+ goto err_sm_cdev;
+
+ port->sm_dev = device_create(umad_class, device->dma_device,
+ port->sm_cdev->dev, port,
+ "issm%d", port->dev_num);
+ if (IS_ERR(port->sm_dev))
+ goto err_sm_cdev;
+
+ if (device_create_file(port->sm_dev, &dev_attr_ibdev))
+ goto err_sm_dev;
+ if (device_create_file(port->sm_dev, &dev_attr_port))
+ goto err_sm_dev;
+
+ spin_lock(&port_lock);
+ umad_port[port->dev_num] = port;
+ spin_unlock(&port_lock);
+
+ return 0;
+
+err_sm_dev:
+ device_destroy(umad_class, port->sm_cdev->dev);
+
+err_sm_cdev:
+ cdev_del(port->sm_cdev);
+
+err_dev:
+ device_destroy(umad_class, port->cdev->dev);
+
+err_cdev:
+ cdev_del(port->cdev);
+ clear_bit(port->dev_num, dev_map);
+
+ return -1;
+}
+
+static void ib_umad_kill_port(struct ib_umad_port *port)
+{
+ struct ib_umad_file *file;
+ int already_dead;
+ int id;
+
+ dev_set_drvdata(port->dev, NULL);
+ dev_set_drvdata(port->sm_dev, NULL);
+
+ device_destroy(umad_class, port->cdev->dev);
+ device_destroy(umad_class, port->sm_cdev->dev);
+
+ cdev_del(port->cdev);
+ cdev_del(port->sm_cdev);
+
+ spin_lock(&port_lock);
+ umad_port[port->dev_num] = NULL;
+ spin_unlock(&port_lock);
+
+ mutex_lock(&port->file_mutex);
+
+ port->ib_dev = NULL;
+
+ list_for_each_entry(file, &port->file_list, port_list) {
+ mutex_lock(&file->mutex);
+ already_dead = file->agents_dead;
+ file->agents_dead = 1;
+ mutex_unlock(&file->mutex);
+
+ for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
+ if (file->agent[id])
+ ib_unregister_mad_agent(file->agent[id]);
+ }
+
+ mutex_unlock(&port->file_mutex);
+
+ clear_bit(port->dev_num, dev_map);
+}
+
+static void ib_umad_add_one(struct ib_device *device)
+{
+ struct ib_umad_device *umad_dev;
+ int s, e, i;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ s = e = 0;
+ else {
+ s = 1;
+ e = device->phys_port_cnt;
+ }
+
+ umad_dev = kzalloc(sizeof *umad_dev +
+ (e - s + 1) * sizeof (struct ib_umad_port),
+ GFP_KERNEL);
+ if (!umad_dev)
+ return;
+
+ kref_init(&umad_dev->ref);
+
+ umad_dev->start_port = s;
+ umad_dev->end_port = e;
+
+ for (i = s; i <= e; ++i) {
+ umad_dev->port[i - s].umad_dev = umad_dev;
+
+ if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND)
+ if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
+ goto err;
+ }
+
+ ib_set_client_data(device, &umad_client, umad_dev);
+
+ return;
+
+err:
+ while (--i >= s)
+ if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND)
+ ib_umad_kill_port(&umad_dev->port[i - s]);
+
+ kref_put(&umad_dev->ref, ib_umad_release_dev);
+}
+
+static void ib_umad_remove_one(struct ib_device *device)
+{
+ struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client);
+ int i;
+
+ if (!umad_dev)
+ return;
+
+ for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
+ if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
+ ib_umad_kill_port(&umad_dev->port[i]);
+
+ kref_put(&umad_dev->ref, ib_umad_release_dev);
+}
+
+static int __init ib_umad_init(void)
+{
+ int ret;
+
+ ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
+ "infiniband_mad");
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't register device number\n");
+ goto out;
+ }
+
+ umad_class = class_create(THIS_MODULE, "infiniband_mad");
+ if (IS_ERR(umad_class)) {
+ ret = PTR_ERR(umad_class);
+ printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
+ goto out_chrdev;
+ }
+
+ ret = class_create_file(umad_class, &class_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
+ goto out_class;
+ }
+
+ ret = ib_register_client(&umad_client);
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't register ib_umad client\n");
+ goto out_class;
+ }
+
+ return 0;
+
+out_class:
+ class_destroy(umad_class);
+
+out_chrdev:
+ unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+
+out:
+ return ret;
+}
+
+static void __exit ib_umad_cleanup(void)
+{
+ ib_unregister_client(&umad_client);
+ class_destroy(umad_class);
+ unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+}
+
+module_init(ib_umad_init);
+module_exit(ib_umad_cleanup);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/user_mad.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/uverbs.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/uverbs.h (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/uverbs.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef UVERBS_H
+#define UVERBS_H
+
+#include <linux/kref.h>
+#include <linux/idr.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+
+/*
+ * Our lifetime rules for these structs are the following:
+ *
+ * struct ib_uverbs_device: One reference is held by the module and
+ * released in ib_uverbs_remove_one(). Another reference is taken by
+ * ib_uverbs_open() each time the character special file is opened,
+ * and released in ib_uverbs_release_file() when the file is released.
+ *
+ * struct ib_uverbs_file: One reference is held by the VFS and
+ * released when the file is closed. Another reference is taken when
+ * an asynchronous event queue file is created and released when the
+ * event file is closed.
+ *
+ * struct ib_uverbs_event_file: One reference is held by the VFS and
+ * released when the file is closed. For asynchronous event files,
+ * another reference is held by the corresponding main context file
+ * and released when that file is closed. For completion event files,
+ * a reference is taken when a CQ is created that uses the file, and
+ * released when the CQ is destroyed.
+ */
+
+struct ib_uverbs_device {
+ struct kref ref;
+ struct completion comp;
+ int devnum;
+ struct cdev *cdev;
+ struct device *dev;
+ struct ib_device *ib_dev;
+ int num_comp_vectors;
+};
+
+struct ib_uverbs_event_file {
+ struct kref ref;
+ struct file *filp;
+ struct ib_uverbs_file *uverbs_file;
+ spinlock_t lock;
+ wait_queue_head_t poll_wait;
+ struct fasync_struct *async_queue;
+ struct list_head event_list;
+ int is_async;
+ int is_closed;
+};
+
+struct ib_uverbs_file {
+ struct kref ref;
+ struct mutex mutex;
+ struct ib_uverbs_device *device;
+ struct ib_ucontext *ucontext;
+ struct ib_event_handler event_handler;
+ struct ib_uverbs_event_file *async_file;
+};
+
+struct ib_uverbs_event {
+ union {
+ struct ib_uverbs_async_event_desc async;
+ struct ib_uverbs_comp_event_desc comp;
+ } desc;
+ struct list_head list;
+ struct list_head obj_list;
+ u32 *counter;
+};
+
+struct ib_uverbs_mcast_entry {
+ struct list_head list;
+ union ib_gid gid;
+ u16 lid;
+};
+
+struct ib_uevent_object {
+ struct ib_uobject uobject;
+ struct list_head event_list;
+ u32 events_reported;
+};
+
+struct ib_uqp_object {
+ struct ib_uevent_object uevent;
+ struct list_head mcast_list;
+};
+
+struct ib_ucq_object {
+ struct ib_uobject uobject;
+ struct ib_uverbs_file *uverbs_file;
+ struct list_head comp_list;
+ struct list_head async_list;
+ u32 comp_events_reported;
+ u32 async_events_reported;
+};
+
+struct ib_uxrcd_object {
+ struct ib_uobject uobject;
+ struct list_head xrc_reg_qp_list;
+};
+
+extern spinlock_t ib_uverbs_idr_lock;
+extern struct idr ib_uverbs_pd_idr;
+extern struct idr ib_uverbs_mr_idr;
+extern struct idr ib_uverbs_mw_idr;
+extern struct idr ib_uverbs_ah_idr;
+extern struct idr ib_uverbs_cq_idr;
+extern struct idr ib_uverbs_qp_idr;
+extern struct idr ib_uverbs_srq_idr;
+extern struct idr ib_uverbs_xrc_domain_idr;
+
+void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
+
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+ int is_async, int *fd);
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
+
+void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
+ struct ib_uverbs_event_file *ev_file,
+ struct ib_ucq_object *uobj);
+void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
+ struct ib_uevent_object *uobj);
+
+void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event);
+void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event,
+ void *context_ptr);
+void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
+ struct ib_xrcd *xrcd);
+int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file,
+ struct ib_xrcd *xrcd, u32 qp_num);
+
+#define IB_UVERBS_DECLARE_CMD(name) \
+ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
+ const char __user *buf, int in_len, \
+ int out_len)
+
+IB_UVERBS_DECLARE_CMD(get_context);
+IB_UVERBS_DECLARE_CMD(query_device);
+IB_UVERBS_DECLARE_CMD(query_port);
+IB_UVERBS_DECLARE_CMD(alloc_pd);
+IB_UVERBS_DECLARE_CMD(dealloc_pd);
+IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(create_comp_channel);
+IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(resize_cq);
+IB_UVERBS_DECLARE_CMD(poll_cq);
+IB_UVERBS_DECLARE_CMD(req_notify_cq);
+IB_UVERBS_DECLARE_CMD(destroy_cq);
+IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(query_qp);
+IB_UVERBS_DECLARE_CMD(modify_qp);
+IB_UVERBS_DECLARE_CMD(destroy_qp);
+IB_UVERBS_DECLARE_CMD(post_send);
+IB_UVERBS_DECLARE_CMD(post_recv);
+IB_UVERBS_DECLARE_CMD(post_srq_recv);
+IB_UVERBS_DECLARE_CMD(create_ah);
+IB_UVERBS_DECLARE_CMD(destroy_ah);
+IB_UVERBS_DECLARE_CMD(attach_mcast);
+IB_UVERBS_DECLARE_CMD(detach_mcast);
+IB_UVERBS_DECLARE_CMD(create_srq);
+IB_UVERBS_DECLARE_CMD(modify_srq);
+IB_UVERBS_DECLARE_CMD(query_srq);
+IB_UVERBS_DECLARE_CMD(destroy_srq);
+IB_UVERBS_DECLARE_CMD(create_xrc_srq);
+IB_UVERBS_DECLARE_CMD(open_xrc_domain);
+IB_UVERBS_DECLARE_CMD(close_xrc_domain);
+IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp);
+
+
+#endif /* UVERBS_H */
Property changes on: trunk/sys/ofed/drivers/infiniband/core/uverbs.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/uverbs_cmd.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/uverbs_cmd.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,3023 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/lockdep.h>
+
+#include <asm/uaccess.h>
+#include <asm/fcntl.h>
+
+#include "uverbs.h"
+
+static struct lock_class_key pd_lock_key;
+static struct lock_class_key mr_lock_key;
+static struct lock_class_key cq_lock_key;
+static struct lock_class_key qp_lock_key;
+static struct lock_class_key ah_lock_key;
+static struct lock_class_key srq_lock_key;
+
+#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (void __user *) (ibuf); \
+ (udata)->outbuf = (void __user *) (obuf); \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
+/*
+ * The ib_uobject locking scheme is as follows:
+ *
+ * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
+ * needs to be held during all idr operations. When an object is
+ * looked up, a reference must be taken on the object's kref before
+ * dropping this lock.
+ *
+ * - Each object also has an rwsem. This rwsem must be held for
+ * reading while an operation that uses the object is performed.
+ * For example, while registering an MR, the associated PD's
+ * uobject.mutex must be held for reading. The rwsem must be held
+ * for writing while initializing or destroying an object.
+ *
+ * - In addition, each object has a "live" flag. If this flag is not
+ * set, then lookups of the object will fail even if it is found in
+ * the idr. This handles a reader that blocks and does not acquire
+ * the rwsem until after the object is destroyed. The destroy
+ * operation will set the live flag to 0 and then drop the rwsem;
+ * this will allow the reader to acquire the rwsem, see that the
+ * live flag is 0, and then drop the rwsem and its reference to
+ * object. The underlying storage will not be freed until the last
+ * reference to the object is dropped.
+ */
+
+static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
+ struct ib_ucontext *context, struct lock_class_key *key)
+{
+ uobj->user_handle = user_handle;
+ uobj->context = context;
+ kref_init(&uobj->ref);
+ init_rwsem(&uobj->mutex);
+ lockdep_set_class(&uobj->mutex, key);
+ uobj->live = 0;
+}
+
+static void release_uobj(struct kref *kref)
+{
+ kfree(container_of(kref, struct ib_uobject, ref));
+}
+
+static void put_uobj(struct ib_uobject *uobj)
+{
+ kref_put(&uobj->ref, release_uobj);
+}
+
+static void put_uobj_read(struct ib_uobject *uobj)
+{
+ up_read(&uobj->mutex);
+ put_uobj(uobj);
+}
+
+static void put_uobj_write(struct ib_uobject *uobj)
+{
+ up_write(&uobj->mutex);
+ put_uobj(uobj);
+}
+
+static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
+{
+ int ret;
+
+retry:
+ if (!idr_pre_get(idr, GFP_KERNEL))
+ return -ENOMEM;
+
+ spin_lock(&ib_uverbs_idr_lock);
+ ret = idr_get_new(idr, uobj, &uobj->id);
+ spin_unlock(&ib_uverbs_idr_lock);
+
+ if (ret == -EAGAIN)
+ goto retry;
+
+ return ret;
+}
+
+void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
+{
+ spin_lock(&ib_uverbs_idr_lock);
+ idr_remove(idr, uobj->id);
+ spin_unlock(&ib_uverbs_idr_lock);
+}
+
+static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
+ struct ib_ucontext *context)
+{
+ struct ib_uobject *uobj;
+
+ spin_lock(&ib_uverbs_idr_lock);
+ uobj = idr_find(idr, id);
+ if (uobj) {
+ if (uobj->context == context)
+ kref_get(&uobj->ref);
+ else
+ uobj = NULL;
+ }
+ spin_unlock(&ib_uverbs_idr_lock);
+
+ return uobj;
+}
+
+static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
+ struct ib_ucontext *context, int nested)
+{
+ struct ib_uobject *uobj;
+
+ uobj = __idr_get_uobj(idr, id, context);
+ if (!uobj)
+ return NULL;
+
+ if (nested)
+ down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
+ else
+ down_read(&uobj->mutex);
+ if (!uobj->live) {
+ put_uobj_read(uobj);
+ return NULL;
+ }
+
+ return uobj;
+}
+
+static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
+ struct ib_ucontext *context)
+{
+ struct ib_uobject *uobj;
+
+ uobj = __idr_get_uobj(idr, id, context);
+ if (!uobj)
+ return NULL;
+
+ down_write(&uobj->mutex);
+ if (!uobj->live) {
+ put_uobj_write(uobj);
+ return NULL;
+ }
+
+ return uobj;
+}
+
+static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
+ int nested)
+{
+ struct ib_uobject *uobj;
+
+ uobj = idr_read_uobj(idr, id, context, nested);
+ return uobj ? uobj->object : NULL;
+}
+
+static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
+}
+
+static void put_pd_read(struct ib_pd *pd)
+{
+ put_uobj_read(pd->uobject);
+}
+
+static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
+{
+ return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
+}
+
+static void put_cq_read(struct ib_cq *cq)
+{
+ put_uobj_read(cq->uobject);
+}
+
+static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
+}
+
+static void put_ah_read(struct ib_ah *ah)
+{
+ put_uobj_read(ah->uobject);
+}
+
+static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
+}
+
+static void put_qp_read(struct ib_qp *qp)
+{
+ put_uobj_read(qp->uobject);
+}
+
+static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
+}
+
+static void put_srq_read(struct ib_srq *srq)
+{
+ put_uobj_read(srq->uobject);
+}
+
+static struct ib_xrcd *idr_read_xrcd(int xrcd_handle,
+ struct ib_ucontext *context,
+ struct ib_uobject **uobj)
+{
+ *uobj = idr_read_uobj(&ib_uverbs_xrc_domain_idr, xrcd_handle,
+ context, 0);
+ return *uobj ? (*uobj)->object : NULL;
+}
+
+static void put_xrcd_read(struct ib_uobject *uobj)
+{
+ put_uobj_read(uobj);
+}
+
+ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_get_context cmd;
+ struct ib_uverbs_get_context_resp resp;
+ struct ib_udata udata;
+ struct ib_device *ibdev = file->device->ib_dev;
+ struct ib_ucontext *ucontext;
+ struct file *filp;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ mutex_lock(&file->mutex);
+
+ if (file->ucontext) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ ucontext = ibdev->alloc_ucontext(ibdev, &udata);
+ if (IS_ERR(ucontext)) {
+ ret = PTR_ERR(file->ucontext);
+ goto err;
+ }
+
+ ucontext->device = ibdev;
+ INIT_LIST_HEAD(&ucontext->pd_list);
+ INIT_LIST_HEAD(&ucontext->mr_list);
+ INIT_LIST_HEAD(&ucontext->mw_list);
+ INIT_LIST_HEAD(&ucontext->cq_list);
+ INIT_LIST_HEAD(&ucontext->qp_list);
+ INIT_LIST_HEAD(&ucontext->srq_list);
+ INIT_LIST_HEAD(&ucontext->ah_list);
+ INIT_LIST_HEAD(&ucontext->xrcd_list);
+ ucontext->closing = 0;
+
+ resp.num_comp_vectors = file->device->num_comp_vectors;
+
+ filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ goto err_free;
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_file;
+ }
+
+ file->async_file = filp->private_data;
+
+ INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev,
+ ib_uverbs_event_handler);
+ ret = ib_register_event_handler(&file->event_handler);
+ if (ret)
+ goto err_file;
+
+ kref_get(&file->async_file->ref);
+ kref_get(&file->ref);
+ file->ucontext = ucontext;
+
+ fd_install(resp.async_fd, filp);
+
+ mutex_unlock(&file->mutex);
+
+ return in_len;
+
+err_file:
+ put_unused_fd(resp.async_fd);
+ fput(filp);
+
+err_free:
+ ibdev->dealloc_ucontext(ucontext);
+
+err:
+ mutex_unlock(&file->mutex);
+ return ret;
+}
+
+ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_device cmd;
+ struct ib_uverbs_query_device_resp resp;
+ struct ib_device_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_query_device(file->device->ib_dev, &attr);
+ if (ret)
+ return ret;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.fw_ver = attr.fw_ver;
+ resp.node_guid = file->device->ib_dev->node_guid;
+ resp.sys_image_guid = attr.sys_image_guid;
+ resp.max_mr_size = attr.max_mr_size;
+ resp.page_size_cap = attr.page_size_cap;
+ resp.vendor_id = attr.vendor_id;
+ resp.vendor_part_id = attr.vendor_part_id;
+ resp.hw_ver = attr.hw_ver;
+ resp.max_qp = attr.max_qp;
+ resp.max_qp_wr = attr.max_qp_wr;
+ resp.device_cap_flags = attr.device_cap_flags;
+ resp.max_sge = attr.max_sge;
+ resp.max_sge_rd = attr.max_sge_rd;
+ resp.max_cq = attr.max_cq;
+ resp.max_cqe = attr.max_cqe;
+ resp.max_mr = attr.max_mr;
+ resp.max_pd = attr.max_pd;
+ resp.max_qp_rd_atom = attr.max_qp_rd_atom;
+ resp.max_ee_rd_atom = attr.max_ee_rd_atom;
+ resp.max_res_rd_atom = attr.max_res_rd_atom;
+ resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom;
+ resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom;
+ resp.atomic_cap = attr.atomic_cap;
+ resp.max_ee = attr.max_ee;
+ resp.max_rdd = attr.max_rdd;
+ resp.max_mw = attr.max_mw;
+ resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp;
+ resp.max_raw_ethy_qp = attr.max_raw_ethy_qp;
+ resp.max_mcast_grp = attr.max_mcast_grp;
+ resp.max_mcast_qp_attach = attr.max_mcast_qp_attach;
+ resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
+ resp.max_ah = attr.max_ah;
+ resp.max_fmr = attr.max_fmr;
+ resp.max_map_per_fmr = attr.max_map_per_fmr;
+ resp.max_srq = attr.max_srq;
+ resp.max_srq_wr = attr.max_srq_wr;
+ resp.max_srq_sge = attr.max_srq_sge;
+ resp.max_pkeys = attr.max_pkeys;
+ resp.local_ca_ack_delay = attr.local_ca_ack_delay;
+ resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_port cmd;
+ struct ib_uverbs_query_port_resp resp;
+ struct ib_port_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr);
+ if (ret)
+ return ret;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.state = attr.state;
+ resp.max_mtu = attr.max_mtu;
+ resp.active_mtu = attr.active_mtu;
+ resp.gid_tbl_len = attr.gid_tbl_len;
+ resp.port_cap_flags = attr.port_cap_flags;
+ resp.max_msg_sz = attr.max_msg_sz;
+ resp.bad_pkey_cntr = attr.bad_pkey_cntr;
+ resp.qkey_viol_cntr = attr.qkey_viol_cntr;
+ resp.pkey_tbl_len = attr.pkey_tbl_len;
+ resp.lid = attr.lid;
+ resp.sm_lid = attr.sm_lid;
+ resp.lmc = attr.lmc;
+ resp.max_vl_num = attr.max_vl_num;
+ resp.sm_sl = attr.sm_sl;
+ resp.subnet_timeout = attr.subnet_timeout;
+ resp.init_type_reply = attr.init_type_reply;
+ resp.active_width = attr.active_width;
+ resp.active_speed = attr.active_speed;
+ resp.phys_state = attr.phys_state;
+ resp.link_layer = attr.link_layer;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_alloc_pd cmd;
+ struct ib_uverbs_alloc_pd_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
+ down_write(&uobj->mutex);
+
+ pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
+ file->ucontext, &udata);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ goto err;
+ }
+
+ pd->device = file->device->ib_dev;
+ pd->uobject = uobj;
+ atomic_set(&pd->usecnt, 0);
+
+ uobj->object = pd;
+ ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
+ if (ret)
+ goto err_idr;
+
+ memset(&resp, 0, sizeof resp);
+ resp.pd_handle = uobj->id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->pd_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+
+err_idr:
+ ib_dealloc_pd(pd);
+
+err:
+ put_uobj_write(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_dealloc_pd cmd;
+ struct ib_uobject *uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+
+ ret = ib_dealloc_pd(uobj->object);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_reg_mr cmd;
+ struct ib_uverbs_reg_mr_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ return -EINVAL;
+
+ /*
+ * Local write permission is required if remote write or
+ * remote atomic permission is also requested.
+ */
+ if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+ !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
+ return -EINVAL;
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
+ down_write(&uobj->mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
+ cmd.access_flags, &udata, 0);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto err_put;
+ }
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+
+ uobj->object = mr;
+ ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
+ if (ret)
+ goto err_unreg;
+
+ memset(&resp, 0, sizeof resp);
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+ resp.mr_handle = uobj->id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->mr_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+
+err_unreg:
+ ib_dereg_mr(mr);
+
+err_put:
+ put_pd_read(pd);
+
+err_free:
+ put_uobj_write(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_dereg_mr cmd;
+ struct ib_mr *mr;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+
+ mr = uobj->object;
+
+ ret = ib_dereg_mr(mr);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_comp_channel cmd;
+ struct ib_uverbs_create_comp_channel_resp resp;
+ struct file *filp;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ put_unused_fd(resp.fd);
+ fput(filp);
+ return -EFAULT;
+ }
+
+ fd_install(resp.fd, filp);
+ return in_len;
+}
+
+ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_cq cmd;
+ struct ib_uverbs_create_cq_resp resp;
+ struct ib_udata udata;
+ struct ib_ucq_object *obj;
+ struct ib_uverbs_event_file *ev_file = NULL;
+ struct ib_cq *cq;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ if (cmd.comp_vector >= file->device->num_comp_vectors)
+ return -EINVAL;
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key);
+ down_write(&obj->uobject.mutex);
+
+ if (cmd.comp_channel >= 0) {
+ ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
+ if (!ev_file) {
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ obj->uverbs_file = file;
+ obj->comp_events_reported = 0;
+ obj->async_events_reported = 0;
+ INIT_LIST_HEAD(&obj->comp_list);
+ INIT_LIST_HEAD(&obj->async_list);
+
+ cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
+ cmd.comp_vector,
+ file->ucontext, &udata);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto err_file;
+ }
+
+ cq->device = file->device->ib_dev;
+ cq->uobject = &obj->uobject;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = ev_file;
+ atomic_set(&cq->usecnt, 0);
+
+ obj->uobject.object = cq;
+ ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
+ if (ret)
+ goto err_free;
+
+ memset(&resp, 0, sizeof resp);
+ resp.cq_handle = obj->uobject.id;
+ resp.cqe = cq->cqe;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
+ mutex_unlock(&file->mutex);
+
+ obj->uobject.live = 1;
+
+ up_write(&obj->uobject.mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
+
+err_free:
+ ib_destroy_cq(cq);
+
+err_file:
+ if (ev_file)
+ ib_uverbs_release_ucq(file, ev_file, obj);
+
+err:
+ put_uobj_write(&obj->uobject);
+ return ret;
+}
+
+ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_resize_cq cmd;
+ struct ib_uverbs_resize_cq_resp resp;
+ struct ib_udata udata;
+ struct ib_cq *cq;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!cq)
+ return -EINVAL;
+
+ ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
+ if (ret)
+ goto out;
+
+ resp.cqe = cq->cqe;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp.cqe))
+ ret = -EFAULT;
+
+out:
+ put_cq_read(cq);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_poll_cq cmd;
+ struct ib_uverbs_poll_cq_resp *resp;
+ struct ib_cq *cq;
+ struct ib_wc *wc;
+ int ret = 0;
+ int i;
+ int rsize;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
+ if (!wc)
+ return -ENOMEM;
+
+ rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
+ resp = kmalloc(rsize, GFP_KERNEL);
+ if (!resp) {
+ ret = -ENOMEM;
+ goto out_wc;
+ }
+
+ cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!cq) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ resp->count = ib_poll_cq(cq, cmd.ne, wc);
+
+ put_cq_read(cq);
+
+ for (i = 0; i < resp->count; i++) {
+ resp->wc[i].wr_id = wc[i].wr_id;
+ resp->wc[i].status = wc[i].status;
+ resp->wc[i].opcode = wc[i].opcode;
+ resp->wc[i].vendor_err = wc[i].vendor_err;
+ resp->wc[i].byte_len = wc[i].byte_len;
+ resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data;
+ resp->wc[i].qp_num = wc[i].qp->qp_num;
+ resp->wc[i].src_qp = wc[i].src_qp;
+ resp->wc[i].wc_flags = wc[i].wc_flags;
+ resp->wc[i].pkey_index = wc[i].pkey_index;
+ resp->wc[i].slid = wc[i].slid;
+ resp->wc[i].sl = wc[i].sl;
+ resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
+ resp->wc[i].port_num = wc[i].port_num;
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
+ ret = -EFAULT;
+
+out:
+ kfree(resp);
+
+out_wc:
+ kfree(wc);
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_req_notify_cq cmd;
+ struct ib_cq *cq;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!cq)
+ return -EINVAL;
+
+ ib_req_notify_cq(cq, cmd.solicited_only ?
+ IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
+
+ put_cq_read(cq);
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_destroy_cq cmd;
+ struct ib_uverbs_destroy_cq_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_cq *cq;
+ struct ib_ucq_object *obj;
+ struct ib_uverbs_event_file *ev_file;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ cq = uobj->object;
+ ev_file = cq->cq_context;
+ obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+
+ ret = ib_destroy_cq(cq);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ ib_uverbs_release_ucq(file, ev_file, obj);
+
+ memset(&resp, 0, sizeof resp);
+ resp.comp_events_reported = obj->comp_events_reported;
+ resp.async_events_reported = obj->async_events_reported;
+
+ put_uobj(uobj);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_qp cmd;
+ struct ib_uverbs_create_qp_resp resp;
+ struct ib_udata udata;
+ struct ib_uqp_object *obj;
+ struct ib_pd *pd;
+ struct ib_cq *scq, *rcq;
+ struct ib_srq *srq;
+ struct ib_qp *qp;
+ struct ib_qp_init_attr attr;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *xrcd_uobj;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
+ down_write(&obj->uevent.uobject.mutex);
+
+ srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ?
+ idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
+ xrcd = cmd.qp_type == IB_QPT_XRC ?
+ idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : NULL;
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
+ rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
+ scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
+
+ if (!pd || !scq || !rcq || (cmd.is_srq && !srq) ||
+ (cmd.qp_type == IB_QPT_XRC && !xrcd)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ attr.create_flags = 0;
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.qp_context = file;
+ attr.send_cq = scq;
+ attr.recv_cq = rcq;
+ attr.srq = srq;
+ attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ attr.qp_type = cmd.qp_type;
+ attr.xrcd = xrcd;
+ attr.create_flags = 0;
+
+ attr.cap.max_send_wr = cmd.max_send_wr;
+ attr.cap.max_recv_wr = cmd.max_recv_wr;
+ attr.cap.max_send_sge = cmd.max_send_sge;
+ attr.cap.max_recv_sge = cmd.max_recv_sge;
+ attr.cap.max_inline_data = cmd.max_inline_data;
+
+ obj->uevent.events_reported = 0;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ INIT_LIST_HEAD(&obj->mcast_list);
+
+ qp = pd->device->create_qp(pd, &attr, &udata);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_put;
+ }
+
+ qp->device = pd->device;
+ qp->pd = pd;
+ qp->send_cq = attr.send_cq;
+ qp->recv_cq = attr.recv_cq;
+ qp->srq = attr.srq;
+ qp->uobject = &obj->uevent.uobject;
+ qp->event_handler = attr.event_handler;
+ qp->qp_context = attr.qp_context;
+ qp->qp_type = attr.qp_type;
+ qp->xrcd = attr.xrcd;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&attr.send_cq->usecnt);
+ atomic_inc(&attr.recv_cq->usecnt);
+ if (attr.srq)
+ atomic_inc(&attr.srq->usecnt);
+ else if (attr.xrcd)
+ atomic_inc(&attr.xrcd->usecnt);
+
+ obj->uevent.uobject.object = qp;
+ ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+ if (ret)
+ goto err_destroy;
+
+ memset(&resp, 0, sizeof resp);
+ resp.qpn = qp->qp_num;
+ resp.qp_handle = obj->uevent.uobject.id;
+ resp.max_recv_sge = attr.cap.max_recv_sge;
+ resp.max_send_sge = attr.cap.max_send_sge;
+ resp.max_recv_wr = attr.cap.max_recv_wr;
+ resp.max_send_wr = attr.cap.max_send_wr;
+ resp.max_inline_data = attr.cap.max_inline_data;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ put_pd_read(pd);
+ put_cq_read(scq);
+ if (rcq != scq)
+ put_cq_read(rcq);
+ if (srq)
+ put_srq_read(srq);
+ if (xrcd)
+ put_xrcd_read(xrcd_uobj);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
+ mutex_unlock(&file->mutex);
+
+ obj->uevent.uobject.live = 1;
+
+ up_write(&obj->uevent.uobject.mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+
+err_destroy:
+ ib_destroy_qp(qp);
+
+err_put:
+ if (pd)
+ put_pd_read(pd);
+ if (scq)
+ put_cq_read(scq);
+ if (rcq && rcq != scq)
+ put_cq_read(rcq);
+ if (srq)
+ put_srq_read(srq);
+ if (xrcd)
+ put_xrcd_read(xrcd_uobj);
+
+ put_uobj_write(&obj->uevent.uobject);
+ return ret;
+}
+
+ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_query_qp cmd;
+ struct ib_uverbs_query_qp_resp resp;
+ struct ib_qp *qp;
+ struct ib_qp_attr *attr;
+ struct ib_qp_init_attr *init_attr;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
+ if (!attr || !init_attr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
+
+ put_qp_read(qp);
+
+ if (ret)
+ goto out;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.qp_state = attr->qp_state;
+ resp.cur_qp_state = attr->cur_qp_state;
+ resp.path_mtu = attr->path_mtu;
+ resp.path_mig_state = attr->path_mig_state;
+ resp.qkey = attr->qkey;
+ resp.rq_psn = attr->rq_psn;
+ resp.sq_psn = attr->sq_psn;
+ resp.dest_qp_num = attr->dest_qp_num;
+ resp.qp_access_flags = attr->qp_access_flags;
+ resp.pkey_index = attr->pkey_index;
+ resp.alt_pkey_index = attr->alt_pkey_index;
+ resp.sq_draining = attr->sq_draining;
+ resp.max_rd_atomic = attr->max_rd_atomic;
+ resp.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ resp.min_rnr_timer = attr->min_rnr_timer;
+ resp.port_num = attr->port_num;
+ resp.timeout = attr->timeout;
+ resp.retry_cnt = attr->retry_cnt;
+ resp.rnr_retry = attr->rnr_retry;
+ resp.alt_port_num = attr->alt_port_num;
+ resp.alt_timeout = attr->alt_timeout;
+
+ memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
+ resp.dest.flow_label = attr->ah_attr.grh.flow_label;
+ resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
+ resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
+ resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
+ resp.dest.dlid = attr->ah_attr.dlid;
+ resp.dest.sl = attr->ah_attr.sl;
+ resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
+ resp.dest.static_rate = attr->ah_attr.static_rate;
+ resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
+ resp.dest.port_num = attr->ah_attr.port_num;
+
+ memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
+ resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
+ resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
+ resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
+ resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
+ resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
+ resp.alt_dest.sl = attr->alt_ah_attr.sl;
+ resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
+ resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
+ resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
+ resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+
+ resp.max_send_wr = init_attr->cap.max_send_wr;
+ resp.max_recv_wr = init_attr->cap.max_recv_wr;
+ resp.max_send_sge = init_attr->cap.max_send_sge;
+ resp.max_recv_sge = init_attr->cap.max_recv_sge;
+ resp.max_inline_data = init_attr->cap.max_inline_data;
+ resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ kfree(attr);
+ kfree(init_attr);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_modify_qp cmd;
+ struct ib_udata udata;
+ struct ib_qp *qp;
+ struct ib_qp_attr *attr;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+ out_len);
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ attr->qp_state = cmd.qp_state;
+ attr->cur_qp_state = cmd.cur_qp_state;
+ attr->path_mtu = cmd.path_mtu;
+ attr->path_mig_state = cmd.path_mig_state;
+ attr->qkey = cmd.qkey;
+ attr->rq_psn = cmd.rq_psn;
+ attr->sq_psn = cmd.sq_psn;
+ attr->dest_qp_num = cmd.dest_qp_num;
+ attr->qp_access_flags = cmd.qp_access_flags;
+ attr->pkey_index = cmd.pkey_index;
+ attr->alt_pkey_index = cmd.alt_pkey_index;
+ attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
+ attr->max_rd_atomic = cmd.max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd.min_rnr_timer;
+ attr->port_num = cmd.port_num;
+ attr->timeout = cmd.timeout;
+ attr->retry_cnt = cmd.retry_cnt;
+ attr->rnr_retry = cmd.rnr_retry;
+ attr->alt_port_num = cmd.alt_port_num;
+ attr->alt_timeout = cmd.alt_timeout;
+
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
+ attr->ah_attr.dlid = cmd.dest.dlid;
+ attr->ah_attr.sl = cmd.dest.sl;
+ attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd.dest.static_rate;
+ attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd.dest.port_num;
+
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd.alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+
+ ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
+
+ put_qp_read(qp);
+
+ if (ret)
+ goto out;
+
+ ret = in_len;
+
+out:
+ kfree(attr);
+
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_destroy_qp cmd;
+ struct ib_uverbs_destroy_qp_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_qp *qp;
+ struct ib_uqp_object *obj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ qp = uobj->object;
+ obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
+
+ if (!list_empty(&obj->mcast_list)) {
+ put_uobj_write(uobj);
+ return -EBUSY;
+ }
+
+ ret = ib_destroy_qp(qp);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ ib_uverbs_release_uevent(file, &obj->uevent);
+
+ resp.events_reported = obj->uevent.events_reported;
+
+ put_uobj(uobj);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_post_send cmd;
+ struct ib_uverbs_post_send_resp resp;
+ struct ib_uverbs_send_wr *user_wr;
+ struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
+ struct ib_qp *qp;
+ int i, sg_ind;
+ int is_ud;
+ ssize_t ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
+ cmd.sge_count * sizeof (struct ib_uverbs_sge))
+ return -EINVAL;
+
+ if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
+ return -EINVAL;
+
+ user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
+ if (!user_wr)
+ return -ENOMEM;
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp)
+ goto out;
+
+ is_ud = qp->qp_type == IB_QPT_UD;
+ sg_ind = 0;
+ last = NULL;
+ for (i = 0; i < cmd.wr_count; ++i) {
+ if (copy_from_user(user_wr,
+ buf + sizeof cmd + i * cmd.wqe_size,
+ cmd.wqe_size)) {
+ ret = -EFAULT;
+ goto out_put;
+ }
+
+ if (user_wr->num_sge + sg_ind > cmd.sge_count) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+ user_wr->num_sge * sizeof (struct ib_sge),
+ GFP_KERNEL);
+ if (!next) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ if (!last)
+ wr = next;
+ else
+ last->next = next;
+ last = next;
+
+ next->next = NULL;
+ next->wr_id = user_wr->wr_id;
+ next->num_sge = user_wr->num_sge;
+ next->opcode = user_wr->opcode;
+ next->send_flags = user_wr->send_flags;
+
+ if (is_ud) {
+ next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
+ file->ucontext);
+ if (!next->wr.ud.ah) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+ next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
+ next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+ } else {
+ switch (next->opcode) {
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_READ:
+ next->wr.rdma.remote_addr =
+ user_wr->wr.rdma.remote_addr;
+ next->wr.rdma.rkey =
+ user_wr->wr.rdma.rkey;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ next->ex.invalidate_rkey =
+ user_wr->ex.invalidate_rkey;
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ next->wr.atomic.remote_addr =
+ user_wr->wr.atomic.remote_addr;
+ next->wr.atomic.compare_add =
+ user_wr->wr.atomic.compare_add;
+ next->wr.atomic.swap = user_wr->wr.atomic.swap;
+ next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (next->num_sge) {
+ next->sg_list = (void *) next +
+ ALIGN(sizeof *next, sizeof (struct ib_sge));
+ if (copy_from_user(next->sg_list,
+ buf + sizeof cmd +
+ cmd.wr_count * cmd.wqe_size +
+ sg_ind * sizeof (struct ib_sge),
+ next->num_sge * sizeof (struct ib_sge))) {
+ ret = -EFAULT;
+ goto out_put;
+ }
+ sg_ind += next->num_sge;
+ } else
+ next->sg_list = NULL;
+ }
+
+ resp.bad_wr = 0;
+ ret = qp->device->post_send(qp, wr, &bad_wr);
+ if (ret)
+ for (next = wr; next; next = next->next) {
+ ++resp.bad_wr;
+ if (next == bad_wr)
+ break;
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out_put:
+ put_qp_read(qp);
+
+ while (wr) {
+ if (is_ud && wr->wr.ud.ah)
+ put_ah_read(wr->wr.ud.ah);
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+out:
+ kfree(user_wr);
+
+ return ret ? ret : in_len;
+}
+
+static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
+ int in_len,
+ u32 wr_count,
+ u32 sge_count,
+ u32 wqe_size)
+{
+ struct ib_uverbs_recv_wr *user_wr;
+ struct ib_recv_wr *wr = NULL, *last, *next;
+ int sg_ind;
+ int i;
+ int ret;
+
+ if (in_len < wqe_size * wr_count +
+ sge_count * sizeof (struct ib_uverbs_sge))
+ return ERR_PTR(-EINVAL);
+
+ if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
+ return ERR_PTR(-EINVAL);
+
+ user_wr = kmalloc(wqe_size, GFP_KERNEL);
+ if (!user_wr)
+ return ERR_PTR(-ENOMEM);
+
+ sg_ind = 0;
+ last = NULL;
+ for (i = 0; i < wr_count; ++i) {
+ if (copy_from_user(user_wr, buf + i * wqe_size,
+ wqe_size)) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ if (user_wr->num_sge + sg_ind > sge_count) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+ user_wr->num_sge * sizeof (struct ib_sge),
+ GFP_KERNEL);
+ if (!next) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (!last)
+ wr = next;
+ else
+ last->next = next;
+ last = next;
+
+ next->next = NULL;
+ next->wr_id = user_wr->wr_id;
+ next->num_sge = user_wr->num_sge;
+
+ if (next->num_sge) {
+ next->sg_list = (void *) next +
+ ALIGN(sizeof *next, sizeof (struct ib_sge));
+ if (copy_from_user(next->sg_list,
+ buf + wr_count * wqe_size +
+ sg_ind * sizeof (struct ib_sge),
+ next->num_sge * sizeof (struct ib_sge))) {
+ ret = -EFAULT;
+ goto err;
+ }
+ sg_ind += next->num_sge;
+ } else
+ next->sg_list = NULL;
+ }
+
+ kfree(user_wr);
+ return wr;
+
+err:
+ kfree(user_wr);
+
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ return ERR_PTR(ret);
+}
+
+ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_post_recv cmd;
+ struct ib_uverbs_post_recv_resp resp;
+ struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_qp *qp;
+ ssize_t ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+ in_len - sizeof cmd, cmd.wr_count,
+ cmd.sge_count, cmd.wqe_size);
+ if (IS_ERR(wr))
+ return PTR_ERR(wr);
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp)
+ goto out;
+
+ resp.bad_wr = 0;
+ ret = qp->device->post_recv(qp, wr, &bad_wr);
+
+ put_qp_read(qp);
+
+ if (ret)
+ for (next = wr; next; next = next->next) {
+ ++resp.bad_wr;
+ if (next == bad_wr)
+ break;
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_post_srq_recv cmd;
+ struct ib_uverbs_post_srq_recv_resp resp;
+ struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_srq *srq;
+ ssize_t ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+ in_len - sizeof cmd, cmd.wr_count,
+ cmd.sge_count, cmd.wqe_size);
+ if (IS_ERR(wr))
+ return PTR_ERR(wr);
+
+ srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ if (!srq)
+ goto out;
+
+ resp.bad_wr = 0;
+ ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+
+ put_srq_read(srq);
+
+ if (ret)
+ for (next = wr; next; next = next->next) {
+ ++resp.bad_wr;
+ if (next == bad_wr)
+ break;
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_ah cmd;
+ struct ib_uverbs_create_ah_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_ah *ah;
+ struct ib_ah_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key);
+ down_write(&uobj->mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ attr.dlid = cmd.attr.dlid;
+ attr.sl = cmd.attr.sl;
+ attr.src_path_bits = cmd.attr.src_path_bits;
+ attr.static_rate = cmd.attr.static_rate;
+ attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0;
+ attr.port_num = cmd.attr.port_num;
+ attr.grh.flow_label = cmd.attr.grh.flow_label;
+ attr.grh.sgid_index = cmd.attr.grh.sgid_index;
+ attr.grh.hop_limit = cmd.attr.grh.hop_limit;
+ attr.grh.traffic_class = cmd.attr.grh.traffic_class;
+ memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+
+ ah = ib_create_ah(pd, &attr);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
+ goto err_put;
+ }
+
+ ah->uobject = uobj;
+ uobj->object = ah;
+
+ ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
+ if (ret)
+ goto err_destroy;
+
+ resp.ah_handle = uobj->id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->ah_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
+
+err_destroy:
+ ib_destroy_ah(ah);
+
+err_put:
+ put_pd_read(pd);
+
+err:
+ put_uobj_write(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len, int out_len)
+{
+ struct ib_uverbs_destroy_ah cmd;
+ struct ib_ah *ah;
+ struct ib_uobject *uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ ah = uobj->object;
+
+ ret = ib_destroy_ah(ah);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_attach_mcast cmd;
+ struct ib_qp *qp;
+ struct ib_uqp_object *obj;
+ struct ib_uverbs_mcast_entry *mcast;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp)
+ return -EINVAL;
+
+ obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+
+ list_for_each_entry(mcast, &obj->mcast_list, list)
+ if (cmd.mlid == mcast->lid &&
+ !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
+ ret = 0;
+ goto out_put;
+ }
+
+ mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
+ if (!mcast) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ mcast->lid = cmd.mlid;
+ memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw);
+
+ ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid);
+ if (!ret)
+ list_add_tail(&mcast->list, &obj->mcast_list);
+ else
+ kfree(mcast);
+
+out_put:
+ put_qp_read(qp);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_detach_mcast cmd;
+ struct ib_uqp_object *obj;
+ struct ib_qp *qp;
+ struct ib_uverbs_mcast_entry *mcast;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp)
+ return -EINVAL;
+
+ ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+ if (ret)
+ goto out_put;
+
+ obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+
+ list_for_each_entry(mcast, &obj->mcast_list, list)
+ if (cmd.mlid == mcast->lid &&
+ !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
+ list_del(&mcast->list);
+ kfree(mcast);
+ break;
+ }
+
+out_put:
+ put_qp_read(qp);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_srq cmd;
+ struct ib_uverbs_create_srq_resp resp;
+ struct ib_udata udata;
+ struct ib_uevent_object *obj;
+ struct ib_pd *pd;
+ struct ib_srq *srq;
+ struct ib_srq_init_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
+ down_write(&obj->uobject.mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ attr.event_handler = ib_uverbs_srq_event_handler;
+ attr.srq_context = file;
+ attr.attr.max_wr = cmd.max_wr;
+ attr.attr.max_sge = cmd.max_sge;
+ attr.attr.srq_limit = cmd.srq_limit;
+
+ obj->events_reported = 0;
+ INIT_LIST_HEAD(&obj->event_list);
+
+ srq = pd->device->create_srq(pd, &attr, &udata);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto err_put;
+ }
+
+ srq->device = pd->device;
+ srq->pd = pd;
+ srq->uobject = &obj->uobject;
+ srq->event_handler = attr.event_handler;
+ srq->srq_context = attr.srq_context;
+ srq->ext.xrc.cq = NULL;
+ srq->ext.xrc.xrcd = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&srq->usecnt, 0);
+
+ obj->uobject.object = srq;
+ ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+ if (ret)
+ goto err_destroy;
+
+ memset(&resp, 0, sizeof resp);
+ resp.srq_handle = obj->uobject.id;
+ resp.max_wr = attr.attr.max_wr;
+ resp.max_sge = attr.attr.max_sge;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
+ mutex_unlock(&file->mutex);
+
+ obj->uobject.live = 1;
+
+ up_write(&obj->uobject.mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+
+err_destroy:
+ ib_destroy_srq(srq);
+
+err_put:
+ put_pd_read(pd);
+
+err:
+ put_uobj_write(&obj->uobject);
+ return ret;
+}
+
+ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_xsrq cmd;
+ struct ib_uverbs_create_srq_resp resp;
+ struct ib_udata udata;
+ struct ib_uevent_object *obj;
+ struct ib_pd *pd;
+ struct ib_srq *srq;
+ struct ib_cq *xrc_cq;
+ struct ib_xrcd *xrcd;
+ struct ib_srq_init_attr attr;
+ struct ib_uobject *xrcd_uobj;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ init_uobj(&obj->uobject, cmd.user_handle, file->ucontext,
+ &srq_lock_key);
+ down_write(&obj->uobject.mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ xrc_cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!xrc_cq) {
+ ret = -EINVAL;
+ goto err_put_pd;
+ }
+
+ xrcd = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd) {
+ ret = -EINVAL;
+ goto err_put_cq;
+ }
+
+
+ attr.event_handler = ib_uverbs_srq_event_handler;
+ attr.srq_context = file;
+ attr.attr.max_wr = cmd.max_wr;
+ attr.attr.max_sge = cmd.max_sge;
+ attr.attr.srq_limit = cmd.srq_limit;
+
+ obj->events_reported = 0;
+ INIT_LIST_HEAD(&obj->event_list);
+
+ srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, &attr, &udata);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto err_put;
+ }
+
+ srq->device = pd->device;
+ srq->pd = pd;
+ srq->uobject = &obj->uobject;
+ srq->event_handler = attr.event_handler;
+ srq->srq_context = attr.srq_context;
+ srq->ext.xrc.cq = xrc_cq;
+ srq->ext.xrc.xrcd = xrcd;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&xrc_cq->usecnt);
+ atomic_inc(&xrcd->usecnt);
+
+ atomic_set(&srq->usecnt, 0);
+
+ obj->uobject.object = srq;
+ ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+ if (ret)
+ goto err_destroy;
+
+ memset(&resp, 0, sizeof resp);
+ resp.srq_handle = obj->uobject.id;
+ resp.max_wr = attr.attr.max_wr;
+ resp.max_sge = attr.attr.max_sge;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ put_xrcd_read(xrcd_uobj);
+ put_cq_read(xrc_cq);
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
+ mutex_unlock(&file->mutex);
+
+ obj->uobject.live = 1;
+
+ up_write(&obj->uobject.mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+
+err_destroy:
+ ib_destroy_srq(srq);
+
+err_put:
+ put_xrcd_read(xrcd_uobj);
+
+err_put_cq:
+ put_cq_read(xrc_cq);
+
+err_put_pd:
+ put_pd_read(pd);
+
+err:
+ put_uobj_write(&obj->uobject);
+ return ret;
+}
+
+ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_modify_srq cmd;
+ struct ib_udata udata;
+ struct ib_srq *srq;
+ struct ib_srq_attr attr;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+ out_len);
+
+ srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ if (!srq)
+ return -EINVAL;
+
+ attr.max_wr = cmd.max_wr;
+ attr.srq_limit = cmd.srq_limit;
+
+ ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
+
+ put_srq_read(srq);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_srq cmd;
+ struct ib_uverbs_query_srq_resp resp;
+ struct ib_srq_attr attr;
+ struct ib_srq *srq;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ if (!srq)
+ return -EINVAL;
+
+ ret = ib_query_srq(srq, &attr);
+
+ put_srq_read(srq);
+
+ if (ret)
+ return ret;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.max_wr = attr.max_wr;
+ resp.max_sge = attr.max_sge;
+ resp.srq_limit = attr.srq_limit;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_destroy_srq cmd;
+ struct ib_uverbs_destroy_srq_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_srq *srq;
+ struct ib_uevent_object *obj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ srq = uobj->object;
+ obj = container_of(uobj, struct ib_uevent_object, uobject);
+
+ ret = ib_destroy_srq(srq);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ ib_uverbs_release_uevent(file, obj);
+
+ memset(&resp, 0, sizeof resp);
+ resp.events_reported = obj->events_reported;
+
+ put_uobj(uobj);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+ return ret ? ret : in_len;
+}
+
+static struct inode *xrc_file2inode(struct file *f)
+{
+ return f->f_dentry->d_inode;
+}
+
+struct xrcd_table_entry {
+ struct rb_node node;
+ struct inode *inode;
+ struct ib_xrcd *xrcd;
+};
+
+static int xrcd_table_insert(struct ib_device *dev,
+ struct inode *i_n,
+ struct ib_xrcd *xrcd)
+{
+ struct xrcd_table_entry *entry, *scan;
+ struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
+ struct rb_node *parent = NULL;
+
+ entry = kmalloc(sizeof(struct xrcd_table_entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->inode = i_n;
+ entry->xrcd = xrcd;
+
+ while (*p) {
+ parent = *p;
+ scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+ if (i_n < scan->inode)
+ p = &(*p)->rb_left;
+ else if (i_n > scan->inode)
+ p = &(*p)->rb_right;
+ else {
+ kfree(entry);
+ return -EEXIST;
+ }
+ }
+
+ rb_link_node(&entry->node, parent, p);
+ rb_insert_color(&entry->node, &dev->ib_uverbs_xrcd_table);
+ igrab(i_n);
+ return 0;
+}
+
+static struct xrcd_table_entry *xrcd_table_search(struct ib_device *dev,
+ struct inode *i_n)
+{
+ struct xrcd_table_entry *scan;
+ struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p) {
+ parent = *p;
+ scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+ if (i_n < scan->inode)
+ p = &(*p)->rb_left;
+ else if (i_n > scan->inode)
+ p = &(*p)->rb_right;
+ else
+ return scan;
+ }
+ return NULL;
+}
+
+static int find_xrcd(struct ib_device *dev, struct inode *i_n,
+ struct ib_xrcd **xrcd)
+{
+ struct xrcd_table_entry *entry;
+
+ entry = xrcd_table_search(dev, i_n);
+ if (!entry)
+ return -EINVAL;
+
+ *xrcd = entry->xrcd;
+ return 0;
+}
+
+
+static void xrcd_table_delete(struct ib_device *dev,
+ struct inode *i_n)
+{
+ struct xrcd_table_entry *entry = xrcd_table_search(dev, i_n);
+
+ if (entry) {
+ iput(i_n);
+ rb_erase(&entry->node, &dev->ib_uverbs_xrcd_table);
+ kfree(entry);
+ }
+}
+
+ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_open_xrc_domain cmd;
+ struct ib_uverbs_open_xrc_domain_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_uxrcd_object *xrcd_uobj;
+ struct ib_xrcd *xrcd = NULL;
+ struct file *f = NULL;
+ struct inode *inode = NULL;
+ int ret = 0;
+ int new_xrcd = 0;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+ if (cmd.fd != (u32) (-1)) {
+ /* search for file descriptor */
+ f = fget(cmd.fd);
+ if (!f) {
+ ret = -EBADF;
+ goto err_table_mutex_unlock;
+ }
+
+ inode = xrc_file2inode(f);
+ if (!inode) {
+ ret = -EBADF;
+ goto err_table_mutex_unlock;
+ }
+
+ ret = find_xrcd(file->device->ib_dev, inode, &xrcd);
+ if (ret && !(cmd.oflags & O_CREAT)) {
+ /* no file descriptor. Need CREATE flag */
+ ret = -EAGAIN;
+ goto err_table_mutex_unlock;
+ }
+
+ if (xrcd && cmd.oflags & O_EXCL) {
+ ret = -EINVAL;
+ goto err_table_mutex_unlock;
+ }
+ }
+
+ xrcd_uobj = kmalloc(sizeof *xrcd_uobj, GFP_KERNEL);
+ if (!xrcd_uobj) {
+ ret = -ENOMEM;
+ goto err_table_mutex_unlock;
+ }
+
+ uobj = &xrcd_uobj->uobject;
+ init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
+ down_write(&uobj->mutex);
+
+ if (!xrcd) {
+ xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+ file->ucontext, &udata);
+ if (IS_ERR(xrcd)) {
+ ret = PTR_ERR(xrcd);
+ goto err;
+ }
+ xrcd->uobject = (cmd.fd == -1) ? uobj : NULL;
+ xrcd->inode = inode;
+ xrcd->device = file->device->ib_dev;
+ atomic_set(&xrcd->usecnt, 0);
+ new_xrcd = 1;
+ }
+
+ uobj->object = xrcd;
+ ret = idr_add_uobj(&ib_uverbs_xrc_domain_idr, uobj);
+ if (ret)
+ goto err_idr;
+
+ memset(&resp, 0, sizeof resp);
+ resp.xrcd_handle = uobj->id;
+
+ if (inode) {
+ if (new_xrcd) {
+ /* create new inode/xrcd table entry */
+ ret = xrcd_table_insert(file->device->ib_dev, inode, xrcd);
+ if (ret)
+ goto err_insert_xrcd;
+ }
+ atomic_inc(&xrcd->usecnt);
+ }
+ if (f)
+ fput(f);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ INIT_LIST_HEAD(&xrcd_uobj->xrc_reg_qp_list);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->xrcd_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ return in_len;
+
+err_copy:
+
+ if (inode) {
+ if (new_xrcd)
+ xrcd_table_delete(file->device->ib_dev, inode);
+ atomic_dec(&xrcd->usecnt);
+ }
+
+err_insert_xrcd:
+ idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
+
+err_idr:
+ ib_dealloc_xrcd(xrcd);
+
+err:
+ put_uobj_write(uobj);
+
+err_table_mutex_unlock:
+
+ if (f)
+ fput(f);
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ return ret;
+}
+
+ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_close_xrc_domain cmd;
+ struct ib_uobject *uobj, *t_uobj;
+ struct ib_uxrcd_object *xrcd_uobj;
+ struct ib_xrcd *xrcd = NULL;
+ struct inode *inode = NULL;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+ uobj = idr_write_uobj(&ib_uverbs_xrc_domain_idr, cmd.xrcd_handle,
+ file->ucontext);
+ if (!uobj) {
+ ret = -EINVAL;
+ goto err_unlock_mutex;
+ }
+
+ mutex_lock(&file->mutex);
+ if (!ret) {
+ list_for_each_entry(t_uobj, &file->ucontext->qp_list, list) {
+ struct ib_qp *qp = t_uobj->object;
+ if (qp->xrcd && qp->xrcd == uobj->object) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+ }
+ if (!ret) {
+ list_for_each_entry(t_uobj, &file->ucontext->srq_list, list) {
+ struct ib_srq *srq = t_uobj->object;
+ if (srq->ext.xrc.xrcd && srq->ext.xrc.xrcd == uobj->object) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&file->mutex);
+ if (ret) {
+ put_uobj_write(uobj);
+ goto err_unlock_mutex;
+ }
+
+ xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject);
+ if (!list_empty(&xrcd_uobj->xrc_reg_qp_list)) {
+ ret = -EBUSY;
+ put_uobj_write(uobj);
+ goto err_unlock_mutex;
+ }
+
+ xrcd = (struct ib_xrcd *) (uobj->object);
+ inode = xrcd->inode;
+
+ if (inode)
+ atomic_dec(&xrcd->usecnt);
+
+ ret = ib_dealloc_xrcd(uobj->object);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret && !inode)
+ goto err_unlock_mutex;
+
+ if (!ret && inode)
+ xrcd_table_delete(file->device->ib_dev, inode);
+
+ idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ return in_len;
+
+err_unlock_mutex:
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ return ret;
+}
+
+void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
+ struct ib_xrcd *xrcd)
+{
+ struct inode *inode = NULL;
+ int ret = 0;
+
+ inode = xrcd->inode;
+ if (inode)
+ atomic_dec(&xrcd->usecnt);
+
+ ret = ib_dealloc_xrcd(xrcd);
+ if (!ret && inode)
+ xrcd_table_delete(ib_dev, inode);
+}
+
+ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_xrc_rcv_qp cmd;
+ struct ib_uverbs_create_xrc_rcv_qp_resp resp;
+ struct ib_uxrc_rcv_object *obj;
+ struct ib_qp_init_attr init_attr;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *uobj;
+ struct ib_uxrcd_object *xrcd_uobj;
+ u32 qp_num;
+ int err;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ obj = kzalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
+ if (!xrcd) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler;
+ init_attr.qp_context = file;
+ init_attr.srq = NULL;
+ init_attr.sq_sig_type =
+ cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ init_attr.qp_type = IB_QPT_XRC;
+ init_attr.xrcd = xrcd;
+
+ init_attr.cap.max_send_wr = 1;
+ init_attr.cap.max_recv_wr = 0;
+ init_attr.cap.max_send_sge = 1;
+ init_attr.cap.max_recv_sge = 0;
+ init_attr.cap.max_inline_data = 0;
+
+ err = xrcd->device->create_xrc_rcv_qp(&init_attr, &qp_num);
+ if (err)
+ goto err_put;
+
+ memset(&resp, 0, sizeof resp);
+ resp.qpn = qp_num;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ err = -EFAULT;
+ goto err_destroy;
+ }
+
+ atomic_inc(&xrcd->usecnt);
+ put_xrcd_read(uobj);
+ obj->qp_num = qp_num;
+ obj->domain_handle = cmd.xrc_domain_handle;
+ xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject);
+ mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+ list_add_tail(&obj->list, &xrcd_uobj->xrc_reg_qp_list);
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+
+ return in_len;
+
+err_destroy:
+ xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num);
+err_put:
+ put_xrcd_read(uobj);
+err_out:
+ kfree(obj);
+ return err;
+}
+
+ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_modify_xrc_rcv_qp cmd;
+ struct ib_qp_attr *attr;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *uobj;
+ int err;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ attr = kzalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
+ if (!xrcd) {
+ kfree(attr);
+ return -EINVAL;
+ }
+
+ attr->qp_state = cmd.qp_state;
+ attr->cur_qp_state = cmd.cur_qp_state;
+ attr->qp_access_flags = cmd.qp_access_flags;
+ attr->pkey_index = cmd.pkey_index;
+ attr->port_num = cmd.port_num;
+ attr->path_mtu = cmd.path_mtu;
+ attr->path_mig_state = cmd.path_mig_state;
+ attr->qkey = cmd.qkey;
+ attr->rq_psn = cmd.rq_psn;
+ attr->sq_psn = cmd.sq_psn;
+ attr->dest_qp_num = cmd.dest_qp_num;
+ attr->alt_pkey_index = cmd.alt_pkey_index;
+ attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
+ attr->max_rd_atomic = cmd.max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd.min_rnr_timer;
+ attr->port_num = cmd.port_num;
+ attr->timeout = cmd.timeout;
+ attr->retry_cnt = cmd.retry_cnt;
+ attr->rnr_retry = cmd.rnr_retry;
+ attr->alt_port_num = cmd.alt_port_num;
+ attr->alt_timeout = cmd.alt_timeout;
+
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
+ attr->ah_attr.dlid = cmd.dest.dlid;
+ attr->ah_attr.sl = cmd.dest.sl;
+ attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd.dest.static_rate;
+ attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd.dest.port_num;
+
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd.alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+
+ err = xrcd->device->modify_xrc_rcv_qp(xrcd, cmd.qp_num, attr, cmd.attr_mask);
+ put_xrcd_read(uobj);
+ kfree(attr);
+ return err ? err : in_len;
+}
+
+ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_query_xrc_rcv_qp cmd;
+ struct ib_uverbs_query_qp_resp resp;
+ struct ib_qp_attr *attr;
+ struct ib_qp_init_attr *init_attr;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
+ if (!attr || !init_attr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
+ if (!xrcd) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr,
+ cmd.attr_mask, init_attr);
+
+ put_xrcd_read(uobj);
+
+ if (ret)
+ goto out;
+
+ memset(&resp, 0, sizeof resp);
+ resp.qp_state = attr->qp_state;
+ resp.cur_qp_state = attr->cur_qp_state;
+ resp.path_mtu = attr->path_mtu;
+ resp.path_mig_state = attr->path_mig_state;
+ resp.qkey = attr->qkey;
+ resp.rq_psn = attr->rq_psn;
+ resp.sq_psn = attr->sq_psn;
+ resp.dest_qp_num = attr->dest_qp_num;
+ resp.qp_access_flags = attr->qp_access_flags;
+ resp.pkey_index = attr->pkey_index;
+ resp.alt_pkey_index = attr->alt_pkey_index;
+ resp.sq_draining = attr->sq_draining;
+ resp.max_rd_atomic = attr->max_rd_atomic;
+ resp.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ resp.min_rnr_timer = attr->min_rnr_timer;
+ resp.port_num = attr->port_num;
+ resp.timeout = attr->timeout;
+ resp.retry_cnt = attr->retry_cnt;
+ resp.rnr_retry = attr->rnr_retry;
+ resp.alt_port_num = attr->alt_port_num;
+ resp.alt_timeout = attr->alt_timeout;
+
+ memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
+ resp.dest.flow_label = attr->ah_attr.grh.flow_label;
+ resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
+ resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
+ resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
+ resp.dest.dlid = attr->ah_attr.dlid;
+ resp.dest.sl = attr->ah_attr.sl;
+ resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
+ resp.dest.static_rate = attr->ah_attr.static_rate;
+ resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
+ resp.dest.port_num = attr->ah_attr.port_num;
+
+ memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
+ resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
+ resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
+ resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
+ resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
+ resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
+ resp.alt_dest.sl = attr->alt_ah_attr.sl;
+ resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
+ resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
+ resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
+ resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+
+ resp.max_send_wr = init_attr->cap.max_send_wr;
+ resp.max_recv_wr = init_attr->cap.max_recv_wr;
+ resp.max_send_sge = init_attr->cap.max_send_sge;
+ resp.max_recv_sge = init_attr->cap.max_recv_sge;
+ resp.max_inline_data = init_attr->cap.max_inline_data;
+ resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ kfree(attr);
+ kfree(init_attr);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_reg_xrc_rcv_qp cmd;
+ struct ib_uxrc_rcv_object *qp_obj, *tmp;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *uobj;
+ struct ib_uxrcd_object *xrcd_uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ qp_obj = kmalloc(sizeof *qp_obj, GFP_KERNEL);
+ if (!qp_obj)
+ return -ENOMEM;
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
+ if (!xrcd) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ ret = xrcd->device->reg_xrc_rcv_qp(xrcd, file, cmd.qp_num);
+ if (ret)
+ goto err_put;
+
+ xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject);
+ mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+ list_for_each_entry(tmp, &xrcd_uobj->xrc_reg_qp_list, list)
+ if (cmd.qp_num == tmp->qp_num) {
+ kfree(qp_obj);
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ put_xrcd_read(uobj);
+ return in_len;
+ }
+ qp_obj->qp_num = cmd.qp_num;
+ qp_obj->domain_handle = cmd.xrc_domain_handle;
+ list_add_tail(&qp_obj->list, &xrcd_uobj->xrc_reg_qp_list);
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ atomic_inc(&xrcd->usecnt);
+ put_xrcd_read(uobj);
+ return in_len;
+
+err_put:
+ put_xrcd_read(uobj);
+err_out:
+
+ kfree(qp_obj);
+ return ret;
+}
+
+int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file,
+ struct ib_xrcd *xrcd, u32 qp_num)
+{
+ int err;
+ err = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num);
+ if (!err)
+ atomic_dec(&xrcd->usecnt);
+ return err;
+}
+
+ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_unreg_xrc_rcv_qp cmd;
+ struct ib_uxrc_rcv_object *qp_obj, *tmp;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *uobj;
+ struct ib_uxrcd_object *xrcd_uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
+ if (!xrcd)
+ return -EINVAL;
+
+ ret = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, cmd.qp_num);
+ if (ret) {
+ put_xrcd_read(uobj);
+ return -EINVAL;
+ }
+ atomic_dec(&xrcd->usecnt);
+
+ xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject);
+ mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+ list_for_each_entry_safe(qp_obj, tmp, &xrcd_uobj->xrc_reg_qp_list, list)
+ if (cmd.qp_num == qp_obj->qp_num) {
+ list_del(&qp_obj->list);
+ kfree(qp_obj);
+ break;
+ }
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ put_xrcd_read(uobj);
+ return in_len;
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/uverbs_main.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/uverbs_main.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/uverbs_main.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,1013 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+
+#include <asm/uaccess.h>
+
+#include "uverbs.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand userspace verbs access");
+MODULE_LICENSE("Dual BSD/GPL");
+
+#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
+
+enum {
+ IB_UVERBS_MAJOR = 231,
+ IB_UVERBS_BASE_MINOR = 192,
+ IB_UVERBS_MAX_DEVICES = 32
+};
+
+#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+
+static struct class *uverbs_class;
+
+DEFINE_SPINLOCK(ib_uverbs_idr_lock);
+DEFINE_IDR(ib_uverbs_pd_idr);
+DEFINE_IDR(ib_uverbs_mr_idr);
+DEFINE_IDR(ib_uverbs_mw_idr);
+DEFINE_IDR(ib_uverbs_ah_idr);
+DEFINE_IDR(ib_uverbs_cq_idr);
+DEFINE_IDR(ib_uverbs_qp_idr);
+DEFINE_IDR(ib_uverbs_srq_idr);
+DEFINE_IDR(ib_uverbs_xrc_domain_idr);
+
+static spinlock_t map_lock;
+static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
+static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
+
+static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len) = {
+ [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
+ [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
+ [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
+ [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
+ [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
+ [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
+ [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
+ [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
+ [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
+ [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
+ [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
+ [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
+ [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
+ [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
+ [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
+ [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
+ [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
+ [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
+ [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
+ [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
+ [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
+ [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
+ [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
+ [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
+ [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq,
+ [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrc_domain,
+ [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrc_domain,
+ [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = ib_uverbs_reg_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = ib_uverbs_unreg_xrc_rcv_qp,
+};
+
+#ifdef __linux__
+/* BSD Does not require a fake mountpoint for all files. */
+static struct vfsmount *uverbs_event_mnt;
+#endif
+
+static void ib_uverbs_add_one(struct ib_device *device);
+static void ib_uverbs_remove_one(struct ib_device *device);
+
+static void ib_uverbs_release_dev(struct kref *ref)
+{
+ struct ib_uverbs_device *dev =
+ container_of(ref, struct ib_uverbs_device, ref);
+
+ complete(&dev->comp);
+}
+
+static void ib_uverbs_release_event_file(struct kref *ref)
+{
+ struct ib_uverbs_event_file *file =
+ container_of(ref, struct ib_uverbs_event_file, ref);
+
+ kfree(file);
+}
+
+void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
+ struct ib_uverbs_event_file *ev_file,
+ struct ib_ucq_object *uobj)
+{
+ struct ib_uverbs_event *evt, *tmp;
+
+ if (ev_file) {
+ spin_lock_irq(&ev_file->lock);
+ list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
+ list_del(&evt->list);
+ kfree(evt);
+ }
+ spin_unlock_irq(&ev_file->lock);
+
+ kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ }
+
+ spin_lock_irq(&file->async_file->lock);
+ list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
+ list_del(&evt->list);
+ kfree(evt);
+ }
+ spin_unlock_irq(&file->async_file->lock);
+}
+
+void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
+ struct ib_uevent_object *uobj)
+{
+ struct ib_uverbs_event *evt, *tmp;
+
+ spin_lock_irq(&file->async_file->lock);
+ list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
+ list_del(&evt->list);
+ kfree(evt);
+ }
+ spin_unlock_irq(&file->async_file->lock);
+}
+
+static void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj)
+{
+ struct ib_uverbs_mcast_entry *mcast, *tmp;
+
+ list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
+ ib_detach_mcast(qp, &mcast->gid, mcast->lid);
+ list_del(&mcast->list);
+ kfree(mcast);
+ }
+}
+
+static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
+ struct ib_ucontext *context)
+{
+ struct ib_uobject *uobj, *tmp;
+
+ if (!context)
+ return 0;
+
+ context->closing = 1;
+
+ list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
+ struct ib_ah *ah = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
+ ib_destroy_ah(ah);
+ kfree(uobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
+ struct ib_qp *qp = uobj->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobj, struct ib_uqp_object, uevent.uobject);
+
+ idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
+ ib_uverbs_detach_umcast(qp, uqp);
+ ib_destroy_qp(qp);
+ ib_uverbs_release_uevent(file, &uqp->uevent);
+ kfree(uqp);
+ }
+
+
+ list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
+ struct ib_srq *srq = uobj->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobj, struct ib_uevent_object, uobject);
+
+ idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
+ ib_destroy_srq(srq);
+ ib_uverbs_release_uevent(file, uevent);
+ kfree(uevent);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
+ struct ib_cq *cq = uobj->object;
+ struct ib_uverbs_event_file *ev_file = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobj, struct ib_ucq_object, uobject);
+
+ idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
+ ib_destroy_cq(cq);
+ ib_uverbs_release_ucq(file, ev_file, ucq);
+ kfree(ucq);
+ }
+
+ /* XXX Free MWs */
+
+ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
+ struct ib_mr *mr = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+ ib_dereg_mr(mr);
+ kfree(uobj);
+ }
+
+ mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+ list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
+ struct ib_xrcd *xrcd = uobj->object;
+ struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1;
+ struct ib_uxrcd_object *xrcd_uobj =
+ container_of(uobj, struct ib_uxrcd_object, uobject);
+
+ list_for_each_entry_safe(xrc_qp_obj, tmp1,
+ &xrcd_uobj->xrc_reg_qp_list, list) {
+ list_del(&xrc_qp_obj->list);
+ ib_uverbs_cleanup_xrc_rcv_qp(file, xrcd,
+ xrc_qp_obj->qp_num);
+ kfree(xrc_qp_obj);
+ }
+
+ idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
+ ib_uverbs_dealloc_xrcd(file->device->ib_dev, xrcd);
+ kfree(uobj);
+ }
+ mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+
+ list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
+ struct ib_pd *pd = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+ ib_dealloc_pd(pd);
+ kfree(uobj);
+ }
+
+ return context->device->dealloc_ucontext(context);
+}
+
+static void ib_uverbs_release_file(struct kref *ref)
+{
+ struct ib_uverbs_file *file =
+ container_of(ref, struct ib_uverbs_file, ref);
+
+ module_put(file->device->ib_dev->owner);
+ kref_put(&file->device->ref, ib_uverbs_release_dev);
+
+ kfree(file);
+}
+
+static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_event *event;
+ int eventsz;
+ int ret = 0;
+
+ spin_lock_irq(&file->lock);
+
+ while (list_empty(&file->event_list)) {
+ spin_unlock_irq(&file->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->event_list)))
+ return -ERESTARTSYS;
+
+ spin_lock_irq(&file->lock);
+ }
+
+ event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
+
+ if (file->is_async)
+ eventsz = sizeof (struct ib_uverbs_async_event_desc);
+ else
+ eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+
+ if (eventsz > count) {
+ ret = -EINVAL;
+ event = NULL;
+ } else {
+ list_del(file->event_list.next);
+ if (event->counter) {
+ ++(*event->counter);
+ list_del(&event->obj_list);
+ }
+ }
+
+ spin_unlock_irq(&file->lock);
+
+ if (event) {
+ if (copy_to_user(buf, event, eventsz))
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+ }
+
+ kfree(event);
+
+ return ret;
+}
+
+static unsigned int ib_uverbs_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ unsigned int pollflags = 0;
+ struct ib_uverbs_event_file *file = filp->private_data;
+
+ file->filp = filp;
+ poll_wait(filp, &file->poll_wait, wait);
+
+ spin_lock_irq(&file->lock);
+ if (!list_empty(&file->event_list))
+ pollflags = POLLIN | POLLRDNORM;
+ spin_unlock_irq(&file->lock);
+
+ return pollflags;
+}
+
+static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_event_file *file = filp->private_data;
+
+ return fasync_helper(fd, filp, on, &file->async_queue);
+}
+
+static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&file->lock);
+ file->is_closed = 1;
+ list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ kfree(entry);
+ }
+ spin_unlock_irq(&file->lock);
+
+ if (file->is_async) {
+ ib_unregister_event_handler(&file->uverbs_file->event_handler);
+ kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+ }
+ kref_put(&file->ref, ib_uverbs_release_event_file);
+
+ return 0;
+}
+
+static const struct file_operations uverbs_event_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_uverbs_event_read,
+ .poll = ib_uverbs_event_poll,
+ .release = ib_uverbs_event_close,
+ .fasync = ib_uverbs_event_fasync
+};
+
+void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+ struct ib_uverbs_event_file *file = cq_context;
+ struct ib_ucq_object *uobj;
+ struct ib_uverbs_event *entry;
+ unsigned long flags;
+
+ if (!file)
+ return;
+
+ spin_lock_irqsave(&file->lock, flags);
+ if (file->is_closed) {
+ spin_unlock_irqrestore(&file->lock, flags);
+ return;
+ }
+
+ entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ if (!entry) {
+ spin_unlock_irqrestore(&file->lock, flags);
+ return;
+ }
+
+ uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+
+ entry->desc.comp.cq_handle = cq->uobject->user_handle;
+ entry->counter = &uobj->comp_events_reported;
+
+ list_add_tail(&entry->list, &file->event_list);
+ list_add_tail(&entry->obj_list, &uobj->comp_list);
+ spin_unlock_irqrestore(&file->lock, flags);
+
+ wake_up_interruptible(&file->poll_wait);
+ if (file->filp)
+ selwakeup(&file->filp->f_selinfo);
+ kill_fasync(&file->async_queue, SIGIO, POLL_IN);
+}
+
+static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
+ __u64 element, __u64 event,
+ struct list_head *obj_list,
+ u32 *counter)
+{
+ struct ib_uverbs_event *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&file->async_file->lock, flags);
+ if (file->async_file->is_closed) {
+ spin_unlock_irqrestore(&file->async_file->lock, flags);
+ return;
+ }
+
+ entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ if (!entry) {
+ spin_unlock_irqrestore(&file->async_file->lock, flags);
+ return;
+ }
+
+ entry->desc.async.element = element;
+ entry->desc.async.event_type = event;
+ entry->counter = counter;
+
+ list_add_tail(&entry->list, &file->async_file->event_list);
+ if (obj_list)
+ list_add_tail(&entry->obj_list, obj_list);
+ spin_unlock_irqrestore(&file->async_file->lock, flags);
+
+ wake_up_interruptible(&file->async_file->poll_wait);
+ if (file->async_file->filp)
+ selwakeup(&file->async_file->filp->f_selinfo);
+ kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
+}
+
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
+ struct ib_ucq_object, uobject);
+
+ ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
+ event->event, &uobj->async_list,
+ &uobj->async_events_reported);
+}
+
+void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
+{
+ struct ib_uevent_object *uobj;
+
+ uobj = container_of(event->element.qp->uobject,
+ struct ib_uevent_object, uobject);
+
+ ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
+ event->event, &uobj->event_list,
+ &uobj->events_reported);
+}
+
+void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ struct ib_uevent_object *uobj;
+
+ uobj = container_of(event->element.srq->uobject,
+ struct ib_uevent_object, uobject);
+
+ ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
+ event->event, &uobj->event_list,
+ &uobj->events_reported);
+}
+
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct ib_uverbs_file *file =
+ container_of(handler, struct ib_uverbs_file, event_handler);
+
+ ib_uverbs_async_handler(file, event->element.port_num, event->event,
+ NULL, NULL);
+}
+
+void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event,
+ void *context_ptr)
+{
+ ib_uverbs_async_handler(context_ptr, event->element.xrc_qp_num,
+ event->event, NULL, NULL);
+}
+
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+ int is_async, int *fd)
+{
+ struct ib_uverbs_event_file *ev_file;
+ struct file *filp;
+ int ret;
+
+ ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
+ if (!ev_file)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&ev_file->ref);
+ spin_lock_init(&ev_file->lock);
+ INIT_LIST_HEAD(&ev_file->event_list);
+ init_waitqueue_head(&ev_file->poll_wait);
+ ev_file->uverbs_file = uverbs_file;
+ ev_file->async_queue = NULL;
+ ev_file->is_async = is_async;
+ ev_file->is_closed = 0;
+ ev_file->filp = NULL;
+
+ *fd = get_unused_fd();
+ if (*fd < 0) {
+ ret = *fd;
+ goto err;
+ }
+
+ /*
+ * fops_get() can't fail here, because we're coming from a
+ * system call on a uverbs file, which will already have a
+ * module reference.
+ */
+#ifdef __linux__
+ filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root),
+ FMODE_READ, fops_get(&uverbs_event_fops));
+#else
+ filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops));
+#endif
+ if (!filp) {
+ ret = -ENFILE;
+ goto err_fd;
+ }
+
+ filp->private_data = ev_file;
+
+ return filp;
+
+err_fd:
+ put_unused_fd(*fd);
+
+err:
+ kfree(ev_file);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Look up a completion event file by FD. If lookup is successful,
+ * takes a ref to the event file struct that it returns; if
+ * unsuccessful, returns NULL.
+ */
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
+{
+ struct ib_uverbs_event_file *ev_file = NULL;
+ struct file *filp;
+
+ filp = fget(fd);
+ if (!filp)
+ return NULL;
+
+ if (filp->f_op != &uverbs_event_fops)
+ goto out;
+
+ ev_file = filp->private_data;
+ if (ev_file->is_async) {
+ ev_file = NULL;
+ goto out;
+ }
+
+ kref_get(&ev_file->ref);
+
+out:
+ fput(filp);
+ return ev_file;
+}
+
+static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_cmd_hdr hdr;
+
+ if (count < sizeof hdr)
+ return -EINVAL;
+
+ if (copy_from_user(&hdr, buf, sizeof hdr))
+ return -EFAULT;
+
+ if (hdr.in_words * 4 != count)
+ return -EINVAL;
+
+ if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
+ !uverbs_cmd_table[hdr.command] ||
+ !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
+ return -EINVAL;
+
+ if (!file->ucontext &&
+ hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
+ return -EINVAL;
+
+ return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
+ hdr.in_words * 4, hdr.out_words * 4);
+}
+
+static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+
+ if (!file->ucontext)
+ return -ENODEV;
+ else
+ return file->device->ib_dev->mmap(file->ucontext, vma);
+}
+
+/*
+ * ib_uverbs_open() does not need the BKL:
+ *
+ * - dev_table[] accesses are protected by map_lock, the
+ * ib_uverbs_device structures are properly reference counted, and
+ * everything else is purely local to the file being created, so
+ * races against other open calls are not a problem;
+ * - there is no ioctl method to race against;
+ * - the device is added to dev_table[] as the last part of module
+ * initialization, the open method will either immediately run
+ * -ENXIO, or all required initialization will be done.
+ */
+static int ib_uverbs_open(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_device *dev;
+ struct ib_uverbs_file *file;
+ int ret;
+
+ spin_lock(&map_lock);
+ dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
+ if (dev)
+ kref_get(&dev->ref);
+ spin_unlock(&map_lock);
+
+ if (!dev)
+ return -ENXIO;
+
+ if (!try_module_get(dev->ib_dev->owner)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ file = kmalloc(sizeof *file, GFP_KERNEL);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err_module;
+ }
+
+ file->device = dev;
+ file->ucontext = NULL;
+ file->async_file = NULL;
+ kref_init(&file->ref);
+ mutex_init(&file->mutex);
+
+ filp->private_data = file;
+
+ return 0;
+
+err_module:
+ module_put(dev->ib_dev->owner);
+
+err:
+ kref_put(&dev->ref, ib_uverbs_release_dev);
+ return ret;
+}
+
+static int ib_uverbs_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+
+ ib_uverbs_cleanup_ucontext(file, file->ucontext);
+
+ if (file->async_file)
+ kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+
+ kref_put(&file->ref, ib_uverbs_release_file);
+
+ return 0;
+}
+
+static const struct file_operations uverbs_fops = {
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .open = ib_uverbs_open,
+ .release = ib_uverbs_close
+};
+
+static const struct file_operations uverbs_mmap_fops = {
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .mmap = ib_uverbs_mmap,
+ .open = ib_uverbs_open,
+ .release = ib_uverbs_close
+};
+
+static struct ib_client uverbs_client = {
+ .name = "uverbs",
+ .add = ib_uverbs_add_one,
+ .remove = ib_uverbs_remove_one
+};
+
+static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ib_uverbs_device *dev = dev_get_drvdata(device);
+
+ if (!dev)
+ return -ENODEV;
+
+ return sprintf(buf, "%s\n", dev->ib_dev->name);
+}
+static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static ssize_t show_dev_abi_version(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ib_uverbs_device *dev = dev_get_drvdata(device);
+
+ if (!dev)
+ return -ENODEV;
+
+ return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
+}
+static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
+
+static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+#include <linux/pci.h>
+
+static ssize_t
+show_dev_device(struct device *device, struct device_attribute *attr, char *buf)
+{
+ struct ib_uverbs_device *dev = dev_get_drvdata(device);
+
+ if (!dev)
+ return -ENODEV;
+
+ return sprintf(buf, "0x%04x\n",
+ ((struct pci_dev *)dev->ib_dev->dma_device)->device);
+}
+static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL);
+
+static ssize_t
+show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf)
+{
+ struct ib_uverbs_device *dev = dev_get_drvdata(device);
+
+ if (!dev)
+ return -ENODEV;
+
+ return sprintf(buf, "0x%04x\n",
+ ((struct pci_dev *)dev->ib_dev->dma_device)->vendor);
+}
+static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL);
+
+struct attribute *device_attrs[] =
+{
+ &dev_attr_device.attr,
+ &dev_attr_vendor.attr,
+ NULL
+};
+
+static struct attribute_group device_group = {
+ .name = "device",
+ .attrs = device_attrs
+};
+
+static void ib_uverbs_add_one(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev;
+
+ if (!device->alloc_ucontext)
+ return;
+
+ uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ if (!uverbs_dev)
+ return;
+
+ kref_init(&uverbs_dev->ref);
+ init_completion(&uverbs_dev->comp);
+
+ spin_lock(&map_lock);
+ uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
+ if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
+ spin_unlock(&map_lock);
+ goto err;
+ }
+ set_bit(uverbs_dev->devnum, dev_map);
+ spin_unlock(&map_lock);
+
+ uverbs_dev->ib_dev = device;
+ uverbs_dev->num_comp_vectors = device->num_comp_vectors;
+
+ uverbs_dev->cdev = cdev_alloc();
+ if (!uverbs_dev->cdev)
+ goto err;
+ uverbs_dev->cdev->owner = THIS_MODULE;
+ uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
+ kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum);
+ if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ goto err_cdev;
+
+ uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
+ uverbs_dev->cdev->dev, uverbs_dev,
+ "uverbs%d", uverbs_dev->devnum);
+ if (IS_ERR(uverbs_dev->dev))
+ goto err_cdev;
+
+ if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
+ goto err_class;
+ if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
+ goto err_class;
+ if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group))
+ goto err_class;
+
+ spin_lock(&map_lock);
+ dev_table[uverbs_dev->devnum] = uverbs_dev;
+ spin_unlock(&map_lock);
+
+ ib_set_client_data(device, &uverbs_client, uverbs_dev);
+
+ return;
+
+err_class:
+ device_destroy(uverbs_class, uverbs_dev->cdev->dev);
+
+err_cdev:
+ cdev_del(uverbs_dev->cdev);
+ clear_bit(uverbs_dev->devnum, dev_map);
+
+err:
+ kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
+ wait_for_completion(&uverbs_dev->comp);
+ kfree(uverbs_dev);
+ return;
+}
+
+static void ib_uverbs_remove_one(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
+
+ if (!uverbs_dev)
+ return;
+
+ sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group);
+ dev_set_drvdata(uverbs_dev->dev, NULL);
+ device_destroy(uverbs_class, uverbs_dev->cdev->dev);
+ cdev_del(uverbs_dev->cdev);
+
+ spin_lock(&map_lock);
+ dev_table[uverbs_dev->devnum] = NULL;
+ spin_unlock(&map_lock);
+
+ clear_bit(uverbs_dev->devnum, dev_map);
+
+ kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
+ wait_for_completion(&uverbs_dev->comp);
+ kfree(uverbs_dev);
+}
+#ifdef __linux__
+static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data,
+ struct vfsmount *mnt)
+{
+ return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
+ INFINIBANDEVENTFS_MAGIC, mnt);
+}
+
+static struct file_system_type uverbs_event_fs = {
+ /* No owner field so module can be unloaded */
+ .name = "infinibandeventfs",
+ .get_sb = uverbs_event_get_sb,
+ .kill_sb = kill_litter_super
+};
+#endif
+
+static int __init ib_uverbs_init(void)
+{
+ int ret;
+
+ spin_lock_init(&map_lock);
+
+ ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
+ "infiniband_verbs");
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register device number\n");
+ goto out;
+ }
+
+ uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
+ if (IS_ERR(uverbs_class)) {
+ ret = PTR_ERR(uverbs_class);
+ printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
+ goto out_chrdev;
+ }
+
+ ret = class_create_file(uverbs_class, &class_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
+ goto out_class;
+ }
+
+#ifdef __linux__
+ ret = register_filesystem(&uverbs_event_fs);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
+ goto out_class;
+ }
+
+ uverbs_event_mnt = kern_mount(&uverbs_event_fs);
+ if (IS_ERR(uverbs_event_mnt)) {
+ ret = PTR_ERR(uverbs_event_mnt);
+ printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
+ goto out_fs;
+ }
+#endif
+
+ ret = ib_register_client(&uverbs_client);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register client\n");
+ goto out_mnt;
+ }
+
+ return 0;
+
+out_mnt:
+#ifdef __linux__
+ mntput(uverbs_event_mnt);
+
+out_fs:
+ unregister_filesystem(&uverbs_event_fs);
+#endif
+
+out_class:
+ class_destroy(uverbs_class);
+
+out_chrdev:
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+
+out:
+ return ret;
+}
+
+static void __exit ib_uverbs_cleanup(void)
+{
+ ib_unregister_client(&uverbs_client);
+#ifdef __linux__
+ mntput(uverbs_event_mnt);
+ unregister_filesystem(&uverbs_event_fs);
+#endif
+ class_destroy(uverbs_class);
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ idr_destroy(&ib_uverbs_pd_idr);
+ idr_destroy(&ib_uverbs_mr_idr);
+ idr_destroy(&ib_uverbs_mw_idr);
+ idr_destroy(&ib_uverbs_ah_idr);
+ idr_destroy(&ib_uverbs_cq_idr);
+ idr_destroy(&ib_uverbs_qp_idr);
+ idr_destroy(&ib_uverbs_srq_idr);
+}
+
+module_init(ib_uverbs_init);
+module_exit(ib_uverbs_cleanup);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/uverbs_main.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/uverbs_marshall.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/uverbs_marshall.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/uverbs_marshall.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_marshall.h>
+
+void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
+ struct ib_ah_attr *src)
+{
+ memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
+ dst->grh.flow_label = src->grh.flow_label;
+ dst->grh.sgid_index = src->grh.sgid_index;
+ dst->grh.hop_limit = src->grh.hop_limit;
+ dst->grh.traffic_class = src->grh.traffic_class;
+ dst->dlid = src->dlid;
+ dst->sl = src->sl;
+ dst->src_path_bits = src->src_path_bits;
+ dst->static_rate = src->static_rate;
+ dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
+ dst->port_num = src->port_num;
+}
+EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
+
+void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
+ struct ib_qp_attr *src)
+{
+ dst->cur_qp_state = src->cur_qp_state;
+ dst->path_mtu = src->path_mtu;
+ dst->path_mig_state = src->path_mig_state;
+ dst->qkey = src->qkey;
+ dst->rq_psn = src->rq_psn;
+ dst->sq_psn = src->sq_psn;
+ dst->dest_qp_num = src->dest_qp_num;
+ dst->qp_access_flags = src->qp_access_flags;
+
+ dst->max_send_wr = src->cap.max_send_wr;
+ dst->max_recv_wr = src->cap.max_recv_wr;
+ dst->max_send_sge = src->cap.max_send_sge;
+ dst->max_recv_sge = src->cap.max_recv_sge;
+ dst->max_inline_data = src->cap.max_inline_data;
+
+ ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
+ ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr);
+
+ dst->pkey_index = src->pkey_index;
+ dst->alt_pkey_index = src->alt_pkey_index;
+ dst->en_sqd_async_notify = src->en_sqd_async_notify;
+ dst->sq_draining = src->sq_draining;
+ dst->max_rd_atomic = src->max_rd_atomic;
+ dst->max_dest_rd_atomic = src->max_dest_rd_atomic;
+ dst->min_rnr_timer = src->min_rnr_timer;
+ dst->port_num = src->port_num;
+ dst->timeout = src->timeout;
+ dst->retry_cnt = src->retry_cnt;
+ dst->rnr_retry = src->rnr_retry;
+ dst->alt_port_num = src->alt_port_num;
+ dst->alt_timeout = src->alt_timeout;
+}
+EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct ib_sa_path_rec *src)
+{
+ memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
+ memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
+
+ dst->dlid = src->dlid;
+ dst->slid = src->slid;
+ dst->raw_traffic = src->raw_traffic;
+ dst->flow_label = src->flow_label;
+ dst->hop_limit = src->hop_limit;
+ dst->traffic_class = src->traffic_class;
+ dst->reversible = src->reversible;
+ dst->numb_path = src->numb_path;
+ dst->pkey = src->pkey;
+ dst->sl = src->sl;
+ dst->mtu_selector = src->mtu_selector;
+ dst->mtu = src->mtu;
+ dst->rate_selector = src->rate_selector;
+ dst->rate = src->rate;
+ dst->packet_life_time = src->packet_life_time;
+ dst->preference = src->preference;
+ dst->packet_life_time_selector = src->packet_life_time_selector;
+}
+EXPORT_SYMBOL(ib_copy_path_rec_to_user);
+
+void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+ struct ib_user_path_rec *src)
+{
+ memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
+ memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
+
+ dst->dlid = src->dlid;
+ dst->slid = src->slid;
+ dst->raw_traffic = src->raw_traffic;
+ dst->flow_label = src->flow_label;
+ dst->hop_limit = src->hop_limit;
+ dst->traffic_class = src->traffic_class;
+ dst->reversible = src->reversible;
+ dst->numb_path = src->numb_path;
+ dst->pkey = src->pkey;
+ dst->sl = src->sl;
+ dst->mtu_selector = src->mtu_selector;
+ dst->mtu = src->mtu;
+ dst->rate_selector = src->rate_selector;
+ dst->rate = src->rate;
+ dst->packet_life_time = src->packet_life_time;
+ dst->preference = src->preference;
+ dst->packet_life_time_selector = src->packet_life_time_selector;
+}
+EXPORT_SYMBOL(ib_copy_path_rec_from_user);
Property changes on: trunk/sys/ofed/drivers/infiniband/core/uverbs_marshall.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/core/verbs.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/core/verbs.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/core/verbs.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,1073 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/string.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+
+int ib_rate_to_mult(enum ib_rate rate)
+{
+ switch (rate) {
+ case IB_RATE_2_5_GBPS: return 1;
+ case IB_RATE_5_GBPS: return 2;
+ case IB_RATE_10_GBPS: return 4;
+ case IB_RATE_20_GBPS: return 8;
+ case IB_RATE_30_GBPS: return 12;
+ case IB_RATE_40_GBPS: return 16;
+ case IB_RATE_60_GBPS: return 24;
+ case IB_RATE_80_GBPS: return 32;
+ case IB_RATE_120_GBPS: return 48;
+ default: return -1;
+ }
+}
+EXPORT_SYMBOL(ib_rate_to_mult);
+
+enum ib_rate mult_to_ib_rate(int mult)
+{
+ switch (mult) {
+ case 1: return IB_RATE_2_5_GBPS;
+ case 2: return IB_RATE_5_GBPS;
+ case 4: return IB_RATE_10_GBPS;
+ case 8: return IB_RATE_20_GBPS;
+ case 12: return IB_RATE_30_GBPS;
+ case 16: return IB_RATE_40_GBPS;
+ case 24: return IB_RATE_60_GBPS;
+ case 32: return IB_RATE_80_GBPS;
+ case 48: return IB_RATE_120_GBPS;
+ default: return IB_RATE_PORT_CURRENT;
+ }
+}
+EXPORT_SYMBOL(mult_to_ib_rate);
+
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+ switch (node_type) {
+ case RDMA_NODE_IB_CA:
+ case RDMA_NODE_IB_SWITCH:
+ case RDMA_NODE_IB_ROUTER:
+ return RDMA_TRANSPORT_IB;
+ case RDMA_NODE_RNIC:
+ return RDMA_TRANSPORT_IWARP;
+ default:
+ BUG();
+ return 0;
+ }
+}
+EXPORT_SYMBOL(rdma_node_get_transport);
+
+enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
+{
+ if (device->get_link_layer)
+ return device->get_link_layer(device, port_num);
+
+ switch (rdma_node_get_transport(device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ return IB_LINK_LAYER_INFINIBAND;
+ case RDMA_TRANSPORT_IWARP:
+ return IB_LINK_LAYER_ETHERNET;
+ default:
+ return IB_LINK_LAYER_UNSPECIFIED;
+ }
+}
+EXPORT_SYMBOL(rdma_port_get_link_layer);
+
+/* Protection domains */
+
+struct ib_pd *ib_alloc_pd(struct ib_device *device)
+{
+ struct ib_pd *pd;
+
+ pd = device->alloc_pd(device, NULL, NULL);
+
+ if (!IS_ERR(pd)) {
+ pd->device = device;
+ pd->uobject = NULL;
+ atomic_set(&pd->usecnt, 0);
+ }
+
+ return pd;
+}
+EXPORT_SYMBOL(ib_alloc_pd);
+
+int ib_dealloc_pd(struct ib_pd *pd)
+{
+ if (atomic_read(&pd->usecnt))
+ return -EBUSY;
+
+ return pd->device->dealloc_pd(pd);
+}
+EXPORT_SYMBOL(ib_dealloc_pd);
+
+/* Address handles */
+
+struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+ struct ib_ah *ah;
+
+ ah = pd->device->create_ah(pd, ah_attr);
+
+ if (!IS_ERR(ah)) {
+ ah->device = pd->device;
+ ah->pd = pd;
+ ah->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ }
+
+ return ah;
+}
+EXPORT_SYMBOL(ib_create_ah);
+
+int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
+ struct ib_grh *grh, struct ib_ah_attr *ah_attr)
+{
+ u32 flow_class;
+ u16 gid_index;
+ int ret;
+
+ memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->dlid = wc->slid;
+ ah_attr->sl = wc->sl;
+ ah_attr->src_path_bits = wc->dlid_path_bits;
+ ah_attr->port_num = port_num;
+
+ if (wc->wc_flags & IB_WC_GRH) {
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.dgid = grh->sgid;
+
+ ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
+ &gid_index);
+ if (ret)
+ return ret;
+
+ ah_attr->grh.sgid_index = (u8) gid_index;
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ ah_attr->grh.flow_label = flow_class & 0xFFFFF;
+ ah_attr->grh.hop_limit = 0xFF;
+ ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_wc);
+
+struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
+ struct ib_grh *grh, u8 port_num)
+{
+ struct ib_ah_attr ah_attr;
+ int ret;
+
+ ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return ib_create_ah(pd, &ah_attr);
+}
+EXPORT_SYMBOL(ib_create_ah_from_wc);
+
+int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+ return ah->device->modify_ah ?
+ ah->device->modify_ah(ah, ah_attr) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_modify_ah);
+
+int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+ return ah->device->query_ah ?
+ ah->device->query_ah(ah, ah_attr) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_ah);
+
+int ib_destroy_ah(struct ib_ah *ah)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ pd = ah->pd;
+ ret = ah->device->destroy_ah(ah);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_destroy_ah);
+
+/* Shared receive queues */
+
+struct ib_srq *ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr)
+{
+ struct ib_srq *srq;
+
+ if (!pd->device->create_srq)
+ return ERR_PTR(-ENOSYS);
+
+ srq = pd->device->create_srq(pd, srq_init_attr, NULL);
+
+ if (!IS_ERR(srq)) {
+ srq->device = pd->device;
+ srq->pd = pd;
+ srq->uobject = NULL;
+ srq->event_handler = srq_init_attr->event_handler;
+ srq->srq_context = srq_init_attr->srq_context;
+ srq->ext.xrc.cq = NULL;
+ srq->ext.xrc.xrcd = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&srq->usecnt, 0);
+ }
+
+ return srq;
+}
+EXPORT_SYMBOL(ib_create_srq);
+
+struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
+ struct ib_cq *xrc_cq,
+ struct ib_xrcd *xrcd,
+ struct ib_srq_init_attr *srq_init_attr)
+{
+ struct ib_srq *srq;
+
+ if (!pd->device->create_xrc_srq)
+ return ERR_PTR(-ENOSYS);
+
+ srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, srq_init_attr, NULL);
+
+ if (!IS_ERR(srq)) {
+ srq->device = pd->device;
+ srq->pd = pd;
+ srq->uobject = NULL;
+ srq->event_handler = srq_init_attr->event_handler;
+ srq->srq_context = srq_init_attr->srq_context;
+ srq->ext.xrc.cq = xrc_cq;
+ srq->ext.xrc.xrcd = xrcd;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&xrcd->usecnt);
+ atomic_inc(&xrc_cq->usecnt);
+ atomic_set(&srq->usecnt, 0);
+ }
+
+ return srq;
+}
+EXPORT_SYMBOL(ib_create_xrc_srq);
+
+int ib_modify_srq(struct ib_srq *srq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask)
+{
+ return srq->device->modify_srq ?
+ srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_modify_srq);
+
+int ib_query_srq(struct ib_srq *srq,
+ struct ib_srq_attr *srq_attr)
+{
+ return srq->device->query_srq ?
+ srq->device->query_srq(srq, srq_attr) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_srq);
+
+int ib_destroy_srq(struct ib_srq *srq)
+{
+ struct ib_pd *pd;
+ struct ib_cq *xrc_cq;
+ struct ib_xrcd *xrcd;
+ int ret;
+
+ if (atomic_read(&srq->usecnt))
+ return -EBUSY;
+
+ pd = srq->pd;
+ xrc_cq = srq->ext.xrc.cq;
+ xrcd = srq->ext.xrc.xrcd;
+
+ ret = srq->device->destroy_srq(srq);
+ if (!ret) {
+ atomic_dec(&pd->usecnt);
+ if (xrc_cq)
+ atomic_dec(&xrc_cq->usecnt);
+ if (xrcd)
+ atomic_dec(&xrcd->usecnt);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_destroy_srq);
+
+/* Queue pairs */
+
+struct ib_qp *ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct ib_qp *qp;
+
+ qp = pd->device->create_qp(pd, qp_init_attr, NULL);
+
+ if (!IS_ERR(qp)) {
+ qp->device = pd->device;
+ qp->pd = pd;
+ qp->send_cq = qp_init_attr->send_cq;
+ qp->recv_cq = qp_init_attr->recv_cq;
+ qp->srq = qp_init_attr->srq;
+ qp->uobject = NULL;
+ qp->event_handler = qp_init_attr->event_handler;
+ qp->qp_context = qp_init_attr->qp_context;
+ qp->qp_type = qp_init_attr->qp_type;
+ qp->xrcd = qp->qp_type == IB_QPT_XRC ?
+ qp_init_attr->xrcd : NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&qp_init_attr->send_cq->usecnt);
+ atomic_inc(&qp_init_attr->recv_cq->usecnt);
+ if (qp_init_attr->srq)
+ atomic_inc(&qp_init_attr->srq->usecnt);
+ if (qp->qp_type == IB_QPT_XRC)
+ atomic_inc(&qp->xrcd->usecnt);
+ }
+
+ return qp;
+}
+EXPORT_SYMBOL(ib_create_qp);
+
+static const struct {
+ int valid;
+ enum ib_qp_attr_mask req_param[IB_QPT_RAW_PACKET + 1];
+ enum ib_qp_attr_mask opt_param[IB_QPT_RAW_PACKET + 1];
+} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .req_param = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY),
+ [IB_QPT_RAW_PACKET] = IB_QP_PORT,
+ [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_XRC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ },
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_XRC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ },
+ [IB_QPS_RTR] = {
+ .valid = 1,
+ .req_param = {
+ [IB_QPT_UC] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN),
+ [IB_QPT_RC] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_MIN_RNR_TIMER),
+ [IB_QPT_XRC] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_MIN_RNR_TIMER),
+ },
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
+ [IB_QPT_RC] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
+ [IB_QPT_XRC] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .req_param = {
+ [IB_QPT_UD] = IB_QP_SQ_PSN,
+ [IB_QPT_UC] = IB_QP_SQ_PSN,
+ [IB_QPT_RC] = (IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_SQ_PSN |
+ IB_QP_MAX_QP_RD_ATOMIC),
+ [IB_QPT_XRC] = (IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_SQ_PSN |
+ IB_QP_MAX_QP_RD_ATOMIC),
+ [IB_QPT_SMI] = IB_QP_SQ_PSN,
+ [IB_QPT_GSI] = IB_QP_SQ_PSN,
+ },
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_RC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_SMI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_RC] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE |
+ IB_QP_MIN_RNR_TIMER),
+ [IB_QPT_XRC] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE |
+ IB_QP_MIN_RNR_TIMER),
+ [IB_QPT_SMI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_XRC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
+ }
+ },
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_RC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_SMI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_AV |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_RC] = (IB_QP_PORT |
+ IB_QP_AV |
+ IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC] = (IB_QP_PORT |
+ IB_QP_AV |
+ IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_SMI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 }
+ }
+};
+
+int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+ enum ib_qp_type type, enum ib_qp_attr_mask mask)
+{
+ enum ib_qp_attr_mask req_param, opt_param;
+
+ if (cur_state < 0 || cur_state > IB_QPS_ERR ||
+ next_state < 0 || next_state > IB_QPS_ERR)
+ return 0;
+
+ if (mask & IB_QP_CUR_STATE &&
+ cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
+ cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
+ return 0;
+
+ if (!qp_state_table[cur_state][next_state].valid)
+ return 0;
+
+ req_param = qp_state_table[cur_state][next_state].req_param[type];
+ opt_param = qp_state_table[cur_state][next_state].opt_param[type];
+
+ if ((mask & req_param) != req_param)
+ return 0;
+
+ if (mask & ~(req_param | opt_param | IB_QP_STATE))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(ib_modify_qp_is_ok);
+
+int ib_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask)
+{
+ return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
+}
+EXPORT_SYMBOL(ib_modify_qp);
+
+int ib_query_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ return qp->device->query_qp ?
+ qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_qp);
+
+int ib_destroy_qp(struct ib_qp *qp)
+{
+ struct ib_pd *pd;
+ struct ib_cq *scq, *rcq;
+ struct ib_srq *srq;
+ struct ib_xrcd *xrcd;
+ enum ib_qp_type qp_type = qp->qp_type;
+ int ret;
+
+ pd = qp->pd;
+ scq = qp->send_cq;
+ rcq = qp->recv_cq;
+ srq = qp->srq;
+ xrcd = qp->xrcd;
+
+ ret = qp->device->destroy_qp(qp);
+ if (!ret) {
+ atomic_dec(&pd->usecnt);
+ atomic_dec(&scq->usecnt);
+ atomic_dec(&rcq->usecnt);
+ if (srq)
+ atomic_dec(&srq->usecnt);
+ if (qp_type == IB_QPT_XRC)
+ atomic_dec(&xrcd->usecnt);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_destroy_qp);
+
+/* Completion queues */
+
+struct ib_cq *ib_create_cq(struct ib_device *device,
+ ib_comp_handler comp_handler,
+ void (*event_handler)(struct ib_event *, void *),
+ void *cq_context, int cqe, int comp_vector)
+{
+ struct ib_cq *cq;
+
+ cq = device->create_cq(device, cqe, comp_vector, NULL, NULL);
+
+ if (!IS_ERR(cq)) {
+ cq->device = device;
+ cq->uobject = NULL;
+ cq->comp_handler = comp_handler;
+ cq->event_handler = event_handler;
+ cq->cq_context = cq_context;
+ atomic_set(&cq->usecnt, 0);
+ }
+
+ return cq;
+}
+EXPORT_SYMBOL(ib_create_cq);
+
+int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+ return cq->device->modify_cq ?
+ cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_modify_cq);
+
+int ib_destroy_cq(struct ib_cq *cq)
+{
+ if (atomic_read(&cq->usecnt))
+ return -EBUSY;
+
+ return cq->device->destroy_cq(cq);
+}
+EXPORT_SYMBOL(ib_destroy_cq);
+
+int ib_resize_cq(struct ib_cq *cq, int cqe)
+{
+ return cq->device->resize_cq ?
+ cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_resize_cq);
+
+/* Memory regions */
+
+struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
+{
+ struct ib_mr *mr;
+
+ mr = pd->device->get_dma_mr(pd, mr_access_flags);
+
+ if (!IS_ERR(mr)) {
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+ }
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_get_dma_mr);
+
+struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start)
+{
+ struct ib_mr *mr;
+
+ if (!pd->device->reg_phys_mr)
+ return ERR_PTR(-ENOSYS);
+
+ mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
+ mr_access_flags, iova_start);
+
+ if (!IS_ERR(mr)) {
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+ }
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_reg_phys_mr);
+
+int ib_rereg_phys_mr(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start)
+{
+ struct ib_pd *old_pd;
+ int ret;
+
+ if (!mr->device->rereg_phys_mr)
+ return -ENOSYS;
+
+ if (atomic_read(&mr->usecnt))
+ return -EBUSY;
+
+ old_pd = mr->pd;
+
+ ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd,
+ phys_buf_array, num_phys_buf,
+ mr_access_flags, iova_start);
+
+ if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) {
+ atomic_dec(&old_pd->usecnt);
+ atomic_inc(&pd->usecnt);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_rereg_phys_mr);
+
+int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
+{
+ return mr->device->query_mr ?
+ mr->device->query_mr(mr, mr_attr) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_mr);
+
+int ib_dereg_mr(struct ib_mr *mr)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ if (atomic_read(&mr->usecnt))
+ return -EBUSY;
+
+ pd = mr->pd;
+ ret = mr->device->dereg_mr(mr);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_dereg_mr);
+
+struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
+{
+ struct ib_mr *mr;
+
+ if (!pd->device->alloc_fast_reg_mr)
+ return ERR_PTR(-ENOSYS);
+
+ mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
+
+ if (!IS_ERR(mr)) {
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+ }
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
+
+struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
+ int max_page_list_len)
+{
+ struct ib_fast_reg_page_list *page_list;
+
+ if (!device->alloc_fast_reg_page_list)
+ return ERR_PTR(-ENOSYS);
+
+ page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
+
+ if (!IS_ERR(page_list)) {
+ page_list->device = device;
+ page_list->max_page_list_len = max_page_list_len;
+ }
+
+ return page_list;
+}
+EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
+
+void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+ page_list->device->free_fast_reg_page_list(page_list);
+}
+EXPORT_SYMBOL(ib_free_fast_reg_page_list);
+
+/* Memory windows */
+
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
+{
+ struct ib_mw *mw;
+
+ if (!pd->device->alloc_mw)
+ return ERR_PTR(-ENOSYS);
+
+ mw = pd->device->alloc_mw(pd);
+ if (!IS_ERR(mw)) {
+ mw->device = pd->device;
+ mw->pd = pd;
+ mw->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ }
+
+ return mw;
+}
+EXPORT_SYMBOL(ib_alloc_mw);
+
+int ib_dealloc_mw(struct ib_mw *mw)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ pd = mw->pd;
+ ret = mw->device->dealloc_mw(mw);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_dealloc_mw);
+
+/* "Fast" memory regions */
+
+struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
+ int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct ib_fmr *fmr;
+
+ if (!pd->device->alloc_fmr)
+ return ERR_PTR(-ENOSYS);
+
+ fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
+ if (!IS_ERR(fmr)) {
+ fmr->device = pd->device;
+ fmr->pd = pd;
+ atomic_inc(&pd->usecnt);
+ }
+
+ return fmr;
+}
+EXPORT_SYMBOL(ib_alloc_fmr);
+
+int ib_unmap_fmr(struct list_head *fmr_list)
+{
+ struct ib_fmr *fmr;
+
+ if (list_empty(fmr_list))
+ return 0;
+
+ fmr = list_entry(fmr_list->next, struct ib_fmr, list);
+ return fmr->device->unmap_fmr(fmr_list);
+}
+EXPORT_SYMBOL(ib_unmap_fmr);
+
+int ib_dealloc_fmr(struct ib_fmr *fmr)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ pd = fmr->pd;
+ ret = fmr->device->dealloc_fmr(fmr);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_dealloc_fmr);
+
+/* Multicast groups */
+
+int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
+{
+ if (!qp->device->attach_mcast)
+ return -ENOSYS;
+
+ switch (rdma_node_get_transport(qp->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ if (qp->qp_type == IB_QPT_RAW_PACKET) {
+ /* In raw Etherent mgids the 63 msb's should be 0 */
+ if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
+ return -EINVAL;
+ } else if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ return -EINVAL;
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ if (qp->qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+ break;
+ }
+ return qp->device->attach_mcast(qp, gid, lid);
+}
+EXPORT_SYMBOL(ib_attach_mcast);
+
+int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
+{
+ if (!qp->device->detach_mcast)
+ return -ENOSYS;
+
+ switch (rdma_node_get_transport(qp->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ if (qp->qp_type == IB_QPT_RAW_PACKET) {
+ /* In raw Etherent mgids the 63 msb's should be 0 */
+ if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
+ return -EINVAL;
+ } else if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ return -EINVAL;
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ if (qp->qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+ break;
+ }
+ return qp->device->detach_mcast(qp, gid, lid);
+}
+EXPORT_SYMBOL(ib_detach_mcast);
+
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+ if (atomic_read(&xrcd->usecnt))
+ return -EBUSY;
+
+ return xrcd->device->dealloc_xrcd(xrcd);
+}
+EXPORT_SYMBOL(ib_dealloc_xrcd);
+
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+{
+ struct ib_xrcd *xrcd;
+
+ if (!device->alloc_xrcd)
+ return ERR_PTR(-ENOSYS);
+
+ xrcd = device->alloc_xrcd(device, NULL, NULL);
+ if (!IS_ERR(xrcd)) {
+ xrcd->device = device;
+ xrcd->inode = NULL;
+ xrcd->uobject = NULL;
+ atomic_set(&xrcd->usecnt, 0);
+ }
+ return xrcd;
+}
+EXPORT_SYMBOL(ib_alloc_xrcd);
+
Property changes on: trunk/sys/ofed/drivers/infiniband/core/verbs.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,7 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
+ depends on NETDEVICES && ETHERNET && PCI
+ select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---
This driver provides low-level InfiniBand support for
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/Makefile 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/Makefile 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,4 +1,31 @@
-obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
+# $FreeBSD$
+#.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
+#.PATH: ${.CURDIR}/../../../../include/linux
-mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
-mlx4_ib-y += wc.o
+.include <bsd.own.mk>
+
+KMOD = mlx4ib
+SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
+#SRCS+= linux_compat.c linux_radix.c
+SRCS+= ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c
+SRCS+= opt_inet.h opt_inet6.h
+
+#CFLAGS+= -I${.CURDIR}/../../ofed/include/
+CFLAGS+= -I${.CURDIR}/../../../../include
+CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
+
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ @echo "#define INET 1" > ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+ @echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
+.include <bsd.kmod.mk>
+
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/ah.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/ah.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -30,25 +30,24 @@
* SOFTWARE.
*/
-#include "mlx4_ib.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
#include <rdma/ib_addr.h>
-#include <linux/inet.h>
-#include <linux/string.h>
#include <rdma/ib_cache.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "mlx4_ib.h"
+
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port)
{
- struct mlx4_ib_iboe *iboe = &dev->iboe;
struct in6_addr in6;
*is_mcast = 0;
- spin_lock(&iboe->lock);
- if (!iboe->netdevs[port - 1]) {
- spin_unlock(&iboe->lock);
- return -EINVAL;
- }
- spin_unlock(&iboe->lock);
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
if (rdma_link_local_addr(&in6))
@@ -92,15 +91,15 @@
}
static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct mlx4_ib_ah *ah)
+ struct mlx4_ib_ah *ah)
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
+ union ib_gid sgid;
u8 mac[6];
int err;
int is_mcast;
u16 vlan_tag;
- union ib_gid sgid;
err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
if (err)
@@ -130,7 +129,7 @@
ah->av.ib.dlid = cpu_to_be16(0xc000);
memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
- ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29);
return &ah->ibah;
}
@@ -147,25 +146,24 @@
if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
if (!(ah_attr->ah_flags & IB_AH_GRH)) {
ret = ERR_PTR(-EINVAL);
- goto out;
} else {
- /* TBD: need to handle the case when we get called
- in an atomic context and there we might sleep. We
- don't expect this currently since we're working with
- link local addresses which we can translate without
- going to sleep */
+ /*
+ * TBD: need to handle the case when we get
+ * called in an atomic context and there we
+ * might sleep. We don't expect this
+ * currently since we're working with link
+ * local addresses which we can translate
+ * without going to sleep.
+ */
ret = create_iboe_ah(pd, ah_attr, ah);
- if (IS_ERR(ret))
- goto out;
- else
- return ret;
}
+
+ if (IS_ERR(ret))
+ kfree(ah);
+
+ return ret;
} else
return create_ib_ah(pd, ah_attr, ah); /* never fails */
-
-out:
- kfree(ah);
- return ret;
}
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
@@ -202,4 +200,3 @@
kfree(to_mah(ah));
return 0;
}
-
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,687 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+ /***********************************************************/
+/*This file support the handling of the Alias GUID feature. */
+/***********************************************************/
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_pack.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/delay.h>
+#include "mlx4_ib.h"
+
+/*
+The driver keeps the current state of all guids, as they are in the HW.
+Whenever we receive an smp mad GUIDInfo record, the data will be cached.
+*/
+
+struct mlx4_alias_guid_work_context {
+ u8 port;
+ struct mlx4_ib_dev *dev ;
+ struct ib_sa_query *sa_query;
+ struct completion done;
+ int query_id;
+ struct list_head list;
+ int block_num;
+};
+
+struct mlx4_next_alias_guid_work {
+ u8 port;
+ u8 block_num;
+ struct mlx4_sriov_alias_guid_info_rec_det rec_det;
+};
+
+
+void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
+ u8 port_num, u8 *p_data)
+{
+ int i;
+ u64 guid_indexes;
+ int slave_id;
+ int port_index = port_num - 1;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+
+ guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
+ ports_guid[port_num - 1].
+ all_rec_per_port[block_num].guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes);
+
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ /* The location of the specific index starts from bit number 4
+ * until bit num 11 */
+ if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
+ slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
+ if (slave_id >= dev->dev->num_slaves) {
+ pr_debug("The last slave: %d\n", slave_id);
+ return;
+ }
+
+ /* cache the guid: */
+ memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
+ &p_data[i * GUID_REC_SIZE],
+ GUID_REC_SIZE);
+ } else
+ pr_debug("Guid number: %d in block: %d"
+ " was not updated\n", i, block_num);
+ }
+}
+
+static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
+{
+ if (index >= NUM_ALIAS_GUID_PER_PORT) {
+ pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
+ return (__force __be64) -1;
+ }
+ return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
+}
+
+
+ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
+{
+ return IB_SA_COMP_MASK(4 + index);
+}
+
+/*
+ * Whenever new GUID is set/unset (guid table change) create event and
+ * notify the relevant slave (master also should be notified).
+ * If the GUID value is not as we have in the cache the slave will not be
+ * updated; in this case it waits for the smp_snoop or the port management
+ * event to call the function and to update the slave.
+ * block_number - the index of the block (16 blocks available)
+ * port_number - 1 or 2
+ */
+void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num, u8 port_num,
+ u8 *p_data)
+{
+ int i;
+ u64 guid_indexes;
+ int slave_id;
+ enum slave_port_state new_state;
+ enum slave_port_state prev_state;
+ __be64 tmp_cur_ag, form_cache_ag;
+ enum slave_port_gen_event gen_event;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+
+ guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
+ ports_guid[port_num - 1].
+ all_rec_per_port[block_num].guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes);
+
+ /*calculate the slaves and notify them*/
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ /* the location of the specific index runs from bits 4..11 */
+ if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
+ continue;
+
+ slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
+ if (slave_id >= dev->dev->num_slaves)
+ return;
+ tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
+ form_cache_ag = get_cached_alias_guid(dev, port_num,
+ (NUM_ALIAS_GUID_IN_REC * block_num) + i);
+ /*
+ * Check if guid is not the same as in the cache,
+ * If it is different, wait for the snoop_smp or the port mgmt
+ * change event to update the slave on its port state change
+ */
+ if (tmp_cur_ag != form_cache_ag)
+ continue;
+ mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
+
+ /*2 cases: Valid GUID, and Invalid Guid*/
+
+ if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
+ prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
+ new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
+ MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
+ &gen_event);
+ pr_debug("slave: %d, port: %d prev_port_state: %d,"
+ " new_port_state: %d, gen_event: %d\n",
+ slave_id, port_num, prev_state, new_state, gen_event);
+ if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
+ pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
+ slave_id, port_num);
+ mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
+ port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
+ }
+ } else { /* request to invalidate GUID */
+ set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
+ MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
+ &gen_event);
+ pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+ slave_id, port_num);
+ mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
+ MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+ }
+ }
+}
+
+static void aliasguid_query_handler(int status,
+ struct ib_sa_guidinfo_rec *guid_rec,
+ void *context)
+{
+ struct mlx4_ib_dev *dev;
+ struct mlx4_alias_guid_work_context *cb_ctx = context;
+ u8 port_index ;
+ int i;
+ struct mlx4_sriov_alias_guid_info_rec_det *rec;
+ unsigned long flags, flags1;
+
+ if (!context)
+ return;
+
+ dev = cb_ctx->dev;
+ port_index = cb_ctx->port - 1;
+ rec = &dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[cb_ctx->block_num];
+
+ if (status) {
+ rec->status = MLX4_GUID_INFO_STATUS_IDLE;
+ pr_debug("(port: %d) failed: status = %d\n",
+ cb_ctx->port, status);
+ goto out;
+ }
+
+ if (guid_rec->block_num != cb_ctx->block_num) {
+ pr_err("block num mismatch: %d != %d\n",
+ cb_ctx->block_num, guid_rec->block_num);
+ goto out;
+ }
+
+ pr_debug("lid/port: %d/%d, block_num: %d\n",
+ be16_to_cpu(guid_rec->lid), cb_ctx->port,
+ guid_rec->block_num);
+
+ rec = &dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[guid_rec->block_num];
+
+ rec->status = MLX4_GUID_INFO_STATUS_SET;
+ rec->method = MLX4_GUID_INFO_RECORD_SET;
+
+ for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ __be64 tmp_cur_ag;
+ tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
+ /* check if the SM didn't assign one of the records.
+ * if it didn't, if it was not sysadmin request:
+ * ask the SM to give a new GUID, (instead of the driver request).
+ */
+ if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
+ mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
+ "block_num: %d was declined by SM, "
+ "ownership by %d (0 = driver, 1=sysAdmin,"
+ " 2=None)\n", __func__, i,
+ guid_rec->block_num, rec->ownership);
+ if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
+ /* if it is driver assign, asks for new GUID from SM*/
+ *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
+ MLX4_NOT_SET_GUID;
+
+ /* Mark the record as not assigned, and let it
+ * be sent again in the next work sched.*/
+ rec->status = MLX4_GUID_INFO_STATUS_IDLE;
+ rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ }
+ } else {
+ /* properly assigned record. */
+ /* We save the GUID we just got from the SM in the
+ * admin_guid in order to be persistent, and in the
+ * request from the sm the process will ask for the same GUID */
+ if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
+ tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
+ /* the sysadmin assignment failed.*/
+ mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
+ " admin guid after SysAdmin "
+ "configuration. "
+ "Record num %d in block_num:%d "
+ "was declined by SM, "
+ "new val(0x%llx) was kept\n",
+ __func__, i,
+ guid_rec->block_num,
+ (long long)be64_to_cpu(*(__be64 *) &
+ rec->all_recs[i * GUID_REC_SIZE]));
+ } else {
+ memcpy(&rec->all_recs[i * GUID_REC_SIZE],
+ &guid_rec->guid_info_list[i * GUID_REC_SIZE],
+ GUID_REC_SIZE);
+ }
+ }
+ }
+ /*
+ The func is call here to close the cases when the
+ sm doesn't send smp, so in the sa response the driver
+ notifies the slave.
+ */
+ mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
+ cb_ctx->port,
+ guid_rec->guid_info_list);
+out:
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ if (!dev->sriov.is_going_down)
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
+ &dev->sriov.alias_guid.ports_guid[port_index].
+ alias_guid_work, 0);
+ if (cb_ctx->sa_query) {
+ list_del(&cb_ctx->list);
+ kfree(cb_ctx);
+ } else
+ complete(&cb_ctx->done);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
+{
+ int i;
+ u64 cur_admin_val;
+ ib_sa_comp_mask comp_mask = 0;
+
+ dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
+ = MLX4_GUID_INFO_STATUS_IDLE;
+ dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
+ = MLX4_GUID_INFO_RECORD_SET;
+
+ /* calculate the comp_mask for that record.*/
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ cur_admin_val =
+ *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
+ /*
+ check the admin value: if it's for delete (~00LL) or
+ it is the first guid of the first record (hw guid) or
+ the records is not in ownership of the sysadmin and the sm doesn't
+ need to assign GUIDs, then don't put it up for assignment.
+ */
+ if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
+ (!index && !i) ||
+ MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
+ ports_guid[port - 1].all_rec_per_port[index].ownership)
+ continue;
+ comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ }
+ dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].guid_indexes = comp_mask;
+}
+
+static int set_guid_rec(struct ib_device *ibdev,
+ u8 port, int index,
+ struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+{
+ int err;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_guidinfo_rec guid_info_rec;
+ ib_sa_comp_mask comp_mask;
+ struct ib_port_attr attr;
+ struct mlx4_alias_guid_work_context *callback_context;
+ unsigned long resched_delay, flags, flags1;
+ struct list_head *head =
+ &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
+
+ err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
+ if (err) {
+ pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
+ err, port);
+ return err;
+ }
+ /*check the port was configured by the sm, otherwise no need to send */
+ if (attr.state != IB_PORT_ACTIVE) {
+ pr_debug("port %d not active...rescheduling\n", port);
+ resched_delay = 5 * HZ;
+ err = -EAGAIN;
+ goto new_schedule;
+ }
+
+ callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
+ if (!callback_context) {
+ err = -ENOMEM;
+ resched_delay = HZ * 5;
+ goto new_schedule;
+ }
+ callback_context->port = port;
+ callback_context->dev = dev;
+ callback_context->block_num = index;
+
+ memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
+
+ guid_info_rec.lid = cpu_to_be16(attr.lid);
+ guid_info_rec.block_num = index;
+
+ memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
+ GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
+ comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
+ rec_det->guid_indexes;
+
+ init_completion(&callback_context->done);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ list_add_tail(&callback_context->list, head);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+
+ callback_context->query_id =
+ ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
+ ibdev, port, &guid_info_rec,
+ comp_mask, rec_det->method, 1000,
+ GFP_KERNEL, aliasguid_query_handler,
+ callback_context,
+ &callback_context->sa_query);
+ if (callback_context->query_id < 0) {
+ pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
+ "%d. will reschedule to the next 1 sec.\n",
+ callback_context->query_id);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ list_del(&callback_context->list);
+ kfree(callback_context);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ resched_delay = 1 * HZ;
+ err = -EAGAIN;
+ goto new_schedule;
+ }
+ err = 0;
+ goto out;
+
+new_schedule:
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ invalidate_guid_record(dev, port, index);
+ if (!dev->sriov.is_going_down) {
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
+ &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
+ resched_delay);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+
+out:
+ return err;
+}
+
+void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
+{
+ int i;
+ unsigned long flags, flags1;
+
+ pr_debug("port %d\n", port);
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
+ invalidate_guid_record(dev, port, i);
+
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
+ /*
+ make sure no work waits in the queue, if the work is already
+ queued(not on the timer) the cancel will fail. That is not a problem
+ because we just want the work started.
+ */
+ cancel_delayed_work(&dev->sriov.alias_guid.
+ ports_guid[port - 1].alias_guid_work);
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
+ &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
+ 0);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+/* The function returns the next record that was
+ * not configured (or failed to be configured) */
+static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
+ struct mlx4_next_alias_guid_work *rec)
+{
+ int j;
+ unsigned long flags;
+
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+ if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
+ MLX4_GUID_INFO_STATUS_IDLE) {
+ memcpy(&rec->rec_det,
+ &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
+ sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
+ rec->port = port;
+ rec->block_num = j;
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
+ MLX4_GUID_INFO_STATUS_PENDING;
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+ return 0;
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+ }
+ return -ENOENT;
+}
+
+static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
+ int rec_index,
+ struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+{
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
+ rec_det->guid_indexes;
+ memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
+ rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
+ rec_det->status;
+}
+
+static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
+{
+ int j;
+ struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
+
+ for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
+ memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
+ rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
+ IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
+ IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
+ IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
+ IB_SA_GUIDINFO_REC_GID7;
+ rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
+ set_administratively_guid_record(dev, port, j, &rec_det);
+ }
+}
+
+static void alias_guid_work(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ int ret = 0;
+ struct mlx4_next_alias_guid_work *rec;
+ struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
+ container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
+ alias_guid_work);
+ struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
+ struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
+ struct mlx4_ib_sriov,
+ alias_guid);
+ struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
+
+ rec = kzalloc(sizeof *rec, GFP_KERNEL);
+ if (!rec) {
+ pr_err("alias_guid_work: No Memory\n");
+ return;
+ }
+
+ pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
+ ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
+ if (ret) {
+ pr_debug("No more records to update.\n");
+ goto out;
+ }
+
+ set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
+ &rec->rec_det);
+
+out:
+ kfree(rec);
+}
+
+
+void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
+{
+ unsigned long flags, flags1;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ if (!dev->sriov.is_going_down) {
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
+ &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
+{
+ int i;
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct mlx4_alias_guid_work_context *cb_ctx;
+ struct mlx4_sriov_alias_guid_port_rec_det *det;
+ struct ib_sa_query *sa_query;
+ unsigned long flags;
+
+ for (i = 0 ; i < dev->num_ports; i++) {
+ cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
+ det = &sriov->alias_guid.ports_guid[i];
+ spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
+ while (!list_empty(&det->cb_list)) {
+ cb_ctx = list_entry(det->cb_list.next,
+ struct mlx4_alias_guid_work_context,
+ list);
+ sa_query = cb_ctx->sa_query;
+ cb_ctx->sa_query = NULL;
+ list_del(&cb_ctx->list);
+ spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
+ ib_sa_cancel_query(cb_ctx->query_id, sa_query);
+ wait_for_completion(&cb_ctx->done);
+ kfree(cb_ctx);
+ spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
+ }
+ spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
+ }
+ for (i = 0 ; i < dev->num_ports; i++) {
+ flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ }
+ ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
+ kfree(dev->sriov.alias_guid.sa_client);
+}
+
+int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
+{
+ char alias_wq_name[15];
+ int ret = 0;
+ int i, j, k;
+ union ib_gid gid;
+
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+ dev->sriov.alias_guid.sa_client =
+ kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
+ if (!dev->sriov.alias_guid.sa_client)
+ return -ENOMEM;
+
+ ib_sa_register_client(dev->sriov.alias_guid.sa_client);
+
+ spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
+
+ for (i = 1; i <= dev->num_ports; ++i) {
+ if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
+ ret = -EFAULT;
+ goto err_unregister;
+ }
+ }
+
+ for (i = 0 ; i < dev->num_ports; i++) {
+ memset(&dev->sriov.alias_guid.ports_guid[i], 0,
+ sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
+ /*Check if the SM doesn't need to assign the GUIDs*/
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ if (mlx4_ib_sm_guid_assign) {
+ dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].
+ ownership = MLX4_GUID_DRIVER_ASSIGN;
+ continue;
+ }
+ dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
+ ownership = MLX4_GUID_NONE_ASSIGN;
+ /*mark each val as it was deleted,
+ till the sysAdmin will give it valid val*/
+ for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
+ *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
+ cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
+ }
+ }
+ INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
+ /*prepare the records, set them to be allocated by sm*/
+ for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
+ invalidate_guid_record(dev, i + 1, j);
+
+ dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
+ dev->sriov.alias_guid.ports_guid[i].port = i;
+ if (mlx4_ib_sm_guid_assign)
+ set_all_slaves_guids(dev, i);
+
+ snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
+ dev->sriov.alias_guid.ports_guid[i].wq =
+ create_singlethread_workqueue(alias_wq_name);
+ if (!dev->sriov.alias_guid.ports_guid[i].wq) {
+ ret = -ENOMEM;
+ goto err_thread;
+ }
+ INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
+ alias_guid_work);
+ }
+ return 0;
+
+err_thread:
+ for (--i; i >= 0; i--) {
+ destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ dev->sriov.alias_guid.ports_guid[i].wq = NULL;
+ }
+
+err_unregister:
+ ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
+ kfree(dev->sriov.alias_guid.sa_client);
+ dev->sriov.alias_guid.sa_client = NULL;
+ pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
+ return ret;
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/hw/mlx4/cm.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/cm.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/cm.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/idr.h>
+#include <rdma/ib_cm.h>
+
+#include "mlx4_ib.h"
+
+#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
+
+struct id_map_entry {
+ struct rb_node node;
+
+ u32 sl_cm_id;
+ u32 pv_cm_id;
+ int slave_id;
+ int scheduled_delete;
+ struct mlx4_ib_dev *dev;
+
+ struct list_head list;
+ struct delayed_work timeout;
+};
+
+struct cm_generic_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+};
+
+struct cm_req_msg {
+ unsigned char unused[0x60];
+ union ib_gid primary_path_sgid;
+};
+
+
+static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->local_comm_id = cpu_to_be32(cm_id);
+}
+
+static u32 get_local_comm_id(struct ib_mad *mad)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+
+ return be32_to_cpu(msg->local_comm_id);
+}
+
+static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->remote_comm_id = cpu_to_be32(cm_id);
+}
+
+static u32 get_remote_comm_id(struct ib_mad *mad)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+
+ return be32_to_cpu(msg->remote_comm_id);
+}
+
+static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
+{
+ struct cm_req_msg *msg = (struct cm_req_msg *)mad;
+
+ return msg->primary_path_sgid;
+}
+
+/* Lock should be taken before called */
+static struct id_map_entry *
+id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
+{
+ struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
+ struct rb_node *node = sl_id_map->rb_node;
+
+ while (node) {
+ struct id_map_entry *id_map_entry =
+ rb_entry(node, struct id_map_entry, node);
+
+ if (id_map_entry->sl_cm_id > sl_cm_id)
+ node = node->rb_left;
+ else if (id_map_entry->sl_cm_id < sl_cm_id)
+ node = node->rb_right;
+ else if (id_map_entry->slave_id > slave_id)
+ node = node->rb_left;
+ else if (id_map_entry->slave_id < slave_id)
+ node = node->rb_right;
+ else
+ return id_map_entry;
+ }
+ return NULL;
+}
+
+static void id_map_ent_timeout(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
+ struct id_map_entry *db_ent, *found_ent;
+ struct mlx4_ib_dev *dev = ent->dev;
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ int pv_id = (int) ent->pv_cm_id;
+
+ spin_lock(&sriov->id_map_lock);
+ db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
+ if (!db_ent)
+ goto out;
+ found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
+ if (found_ent && found_ent == ent)
+ rb_erase(&found_ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, pv_id);
+
+out:
+ list_del(&ent->list);
+ spin_unlock(&sriov->id_map_lock);
+ kfree(ent);
+}
+
+static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
+{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ struct id_map_entry *ent, *found_ent;
+
+ spin_lock(&sriov->id_map_lock);
+ ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
+ if (!ent)
+ goto out;
+ found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
+ if (found_ent && found_ent == ent)
+ rb_erase(&found_ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, pv_cm_id);
+out:
+ spin_unlock(&sriov->id_map_lock);
+}
+
+static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
+{
+ struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
+ struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
+ struct id_map_entry *ent;
+ int slave_id = new->slave_id;
+ int sl_cm_id = new->sl_cm_id;
+
+ ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
+ if (ent) {
+ pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
+ sl_cm_id);
+
+ rb_replace_node(&ent->node, &new->node, sl_id_map);
+ return;
+ }
+
+ /* Go to the bottom of the tree */
+ while (*link) {
+ parent = *link;
+ ent = rb_entry(parent, struct id_map_entry, node);
+
+ if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ rb_link_node(&new->node, parent, link);
+ rb_insert_color(&new->node, sl_id_map);
+}
+
+static struct id_map_entry *
+id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
+{
+ int ret, id;
+ static int next_id;
+ struct id_map_entry *ent;
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+
+ ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
+ if (!ent) {
+ mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ent->sl_cm_id = sl_cm_id;
+ ent->slave_id = slave_id;
+ ent->scheduled_delete = 0;
+ ent->dev = to_mdev(ibdev);
+ INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
+
+ do {
+ spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
+ ret = idr_get_new_above(&sriov->pv_id_table, ent,
+ next_id, &id);
+ if (!ret) {
+ next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
+ ent->pv_cm_id = (u32)id;
+ sl_id_map_add(ibdev, ent);
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+ } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
+ /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
+ if (!ret) {
+ spin_lock(&sriov->id_map_lock);
+ list_add_tail(&ent->list, &sriov->cm_list);
+ spin_unlock(&sriov->id_map_lock);
+ return ent;
+ }
+ /*error flow*/
+ kfree(ent);
+ mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
+ return ERR_PTR(-ENOMEM);
+}
+
+static struct id_map_entry *
+id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
+{
+ struct id_map_entry *ent;
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+
+ spin_lock(&sriov->id_map_lock);
+ if (*pv_cm_id == -1) {
+ ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
+ if (ent)
+ *pv_cm_id = (int) ent->pv_cm_id;
+ } else
+ ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
+ spin_unlock(&sriov->id_map_lock);
+
+ return ent;
+}
+
+static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
+{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ unsigned long flags;
+
+ spin_lock(&sriov->id_map_lock);
+ spin_lock_irqsave(&sriov->going_down_lock, flags);
+ /*make sure that there is no schedule inside the scheduled work.*/
+ if (!sriov->is_going_down) {
+ id->scheduled_delete = 1;
+ schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ }
+ spin_unlock_irqrestore(&sriov->going_down_lock, flags);
+ spin_unlock(&sriov->id_map_lock);
+}
+
+int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
+ struct ib_mad *mad)
+{
+ struct id_map_entry *id;
+ u32 sl_cm_id;
+ int pv_cm_id = -1;
+
+ sl_cm_id = get_local_comm_id(mad);
+
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
+ id = id_map_alloc(ibdev, slave_id, sl_cm_id);
+ if (IS_ERR(id)) {
+ mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
+ __func__, slave_id, sl_cm_id);
+ return PTR_ERR(id);
+ }
+ } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
+ return 0;
+ } else {
+ id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
+ }
+
+ if (!id) {
+ pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
+ slave_id, sl_cm_id);
+ return -EINVAL;
+ }
+
+ set_local_comm_id(mad, id->pv_cm_id);
+
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ schedule_delayed(ibdev, id);
+ else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
+ id_map_find_del(ibdev, pv_cm_id);
+
+ return 0;
+}
+
+int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
+ struct ib_mad *mad, int is_eth)
+{
+ u32 pv_cm_id;
+ struct id_map_entry *id;
+
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
+ union ib_gid gid;
+
+ if (is_eth)
+ return 0;
+
+ gid = gid_from_req_msg(ibdev, mad);
+ *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
+ if (*slave < 0) {
+ mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
+ (long long)gid.global.interface_id);
+ return -ENOENT;
+ }
+ return 0;
+ }
+
+ pv_cm_id = get_remote_comm_id(mad);
+ id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
+
+ if (!id) {
+ pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
+ return -ENOENT;
+ }
+
+ if (!is_eth)
+ *slave = id->slave_id;
+ set_remote_comm_id(mad, id->sl_cm_id);
+
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ schedule_delayed(ibdev, id);
+ else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
+ id_map_find_del(ibdev, (int) pv_cm_id);
+ }
+
+ return 0;
+}
+
+void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
+{
+ spin_lock_init(&dev->sriov.id_map_lock);
+ INIT_LIST_HEAD(&dev->sriov.cm_list);
+ dev->sriov.sl_id_map = RB_ROOT;
+ idr_init(&dev->sriov.pv_id_table);
+ idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL);
+}
+
+/* slave = -1 ==> all slaves */
+/* TBD -- call paravirt clean for single slave. Need for slave RESET event */
+void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
+{
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ struct list_head lh;
+ struct rb_node *nd;
+ int need_flush = 1;
+ struct id_map_entry *map, *tmp_map;
+ /* cancel all delayed work queue entries */
+ INIT_LIST_HEAD(&lh);
+ spin_lock(&sriov->id_map_lock);
+ list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
+ if (slave < 0 || slave == map->slave_id) {
+ if (map->scheduled_delete)
+ need_flush &= !!cancel_delayed_work(&map->timeout);
+ }
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+
+ if (!need_flush)
+ flush_scheduled_work(); /* make sure all timers were flushed */
+
+ /* now, remove all leftover entries from databases*/
+ spin_lock(&sriov->id_map_lock);
+ if (slave < 0) {
+ while (rb_first(sl_id_map)) {
+ struct id_map_entry *ent =
+ rb_entry(rb_first(sl_id_map),
+ struct id_map_entry, node);
+
+ rb_erase(&ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id);
+ }
+ list_splice_init(&dev->sriov.cm_list, &lh);
+ } else {
+ /* first, move nodes belonging to slave to db remove list */
+ nd = rb_first(sl_id_map);
+ while (nd) {
+ struct id_map_entry *ent =
+ rb_entry(nd, struct id_map_entry, node);
+ nd = rb_next(nd);
+ if (ent->slave_id == slave)
+ list_move_tail(&ent->list, &lh);
+ }
+ /* remove those nodes from databases */
+ list_for_each_entry_safe(map, tmp_map, &lh, list) {
+ rb_erase(&map->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id);
+ }
+
+ /* add remaining nodes from cm_list */
+ list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
+ if (slave == map->slave_id)
+ list_move_tail(&map->list, &lh);
+ }
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+
+ /* free any map entries left behind due to cancel_delayed_work above */
+ list_for_each_entry_safe(map, tmp_map, &lh, list) {
+ list_del(&map->list);
+ kfree(map);
+ }
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/cm.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/cq.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/cq.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -33,7 +33,7 @@
#include <linux/mlx4/cq.h>
#include <linux/mlx4/qp.h>
-#include <linux/mlx4/srq.h>
+#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
@@ -40,6 +40,7 @@
/* Which firmware version adds support for Resize CQ */
#define MLX4_FW_VER_RESIZE_CQ mlx4_fw_ver(2, 5, 0)
+#define MLX4_FW_VER_IGNORE_OVERRUN_CQ mlx4_fw_ver(2, 7, 8200)
static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
{
@@ -53,7 +54,7 @@
struct ib_cq *ibcq;
if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
- printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ pr_warn("Unexpected event type %d "
"on CQ %06x\n", type, cq->cqn);
return;
}
@@ -69,7 +70,7 @@
static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
{
- return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
+ return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
}
static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -80,8 +81,9 @@
static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
{
struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+ struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
- return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+ return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
}
@@ -102,12 +104,13 @@
{
int err;
- err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
+ err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
PAGE_SIZE * 2, &buf->buf);
if (err)
goto out;
+ buf->entry_size = dev->dev->caps.cqe_size;
err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
&buf->mtt);
if (err)
@@ -123,8 +126,7 @@
mlx4_mtt_cleanup(dev->dev, &buf->mtt);
err_buf:
- mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
- &buf->buf);
+ mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
out:
return err;
@@ -132,7 +134,7 @@
static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
{
- mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
+ mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -140,14 +142,19 @@
u64 buf_addr, int cqe)
{
int err;
+ int cqe_size = dev->dev->caps.cqe_size;
+ int shift;
+ int n;
- *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
+ *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
- ilog2((*umem)->page_size), &buf->mtt);
+ n = ib_umem_page_count(*umem);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
+ err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
+
if (err)
goto err_buf;
@@ -175,12 +182,10 @@
struct mlx4_uar *uar;
int err;
- if (entries < 1 || entries > dev->dev->caps.max_cqes) {
- mlx4_ib_dbg("invalid num of entries: %d", entries);
+ if (entries < 1 || entries > dev->dev->caps.max_cqes)
return ERR_PTR(-EINVAL);
- }
- cq = kzalloc(sizeof *cq, GFP_KERNEL);
+ cq = kmalloc(sizeof *cq, GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
@@ -227,10 +232,11 @@
uar = &dev->priv_uar;
}
+ if (dev->eq_table)
+ vector = dev->eq_table[vector % ibdev->num_comp_vectors];
+
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
- cq->db.dma, &cq->mcq,
- vector == IB_CQ_VECTOR_LEAST_ATTACHED ?
- MLX4_LEAST_ATTACHED_VECTOR : vector, 0);
+ cq->db.dma, &cq->mcq, vector, 0, 0);
if (err)
goto err_dbmap;
@@ -335,16 +341,23 @@
{
struct mlx4_cqe *cqe, *new_cqe;
int i;
+ int cqe_size = cq->buf.entry_size;
+ int cqe_inc = cqe_size == 64 ? 1 : 0;
i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe);
+ cqe += cqe_inc;
+
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
(i + 1) & cq->resize_buf->cqe);
- memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+ memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
+ new_cqe += cqe_inc;
+
new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+ cqe += cqe_inc;
}
++cq->mcq.cons_index;
}
@@ -409,7 +422,7 @@
} else {
struct mlx4_ib_cq_buf tmp_buf;
int tmp_cqe = 0;
-
+
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
mlx4_ib_cq_resize_copy_cqes(cq);
@@ -445,9 +458,21 @@
out:
mutex_unlock(&cq->resize_mutex);
+
return err;
}
+int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx4_ib_cq *cq = to_mcq(ibcq);
+
+ if (dev->dev->caps.fw_ver < MLX4_FW_VER_IGNORE_OVERRUN_CQ)
+ return -ENOSYS;
+
+ return mlx4_cq_ignore_overrun(dev->dev, &cq->mcq);
+}
+
int mlx4_ib_destroy_cq(struct ib_cq *cq)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
@@ -473,7 +498,7 @@
{
__be32 *buf = cqe;
- printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ pr_debug("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
@@ -483,7 +508,7 @@
struct ib_wc *wc)
{
if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
- printk(KERN_DEBUG "local QP operation err "
+ pr_debug("local QP operation err "
"(QPN %06x, WQE index %x, vendor syndrome %02x, "
"opcode = %02x)\n",
be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
@@ -554,6 +579,26 @@
checksum == cpu_to_be16(0xffff);
}
+static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
+ unsigned tail, struct mlx4_cqe *cqe)
+{
+ struct mlx4_ib_proxy_sqp_hdr *hdr;
+
+ ib_dma_sync_single_for_cpu(qp->ibqp.device,
+ qp->sqp_proxy_rcv[tail].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
+ wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
+ wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
+ wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
+ wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
+ wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
+ wc->dlid_path_bits = 0;
+
+ return 0;
+}
+
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_ib_qp **cur_qp,
struct ib_wc *wc)
@@ -562,12 +607,11 @@
struct mlx4_qp *mqp;
struct mlx4_ib_wq *wq;
struct mlx4_ib_srq *srq;
- struct mlx4_srq *msrq;
int is_send;
int is_error;
u32 g_mlpath_rqpn;
- int is_xrc_recv = 0;
u16 wqe_ctr;
+ unsigned tail = 0;
repoll:
cqe = next_cqe_sw(cq);
@@ -574,6 +618,9 @@
if (!cqe)
return -EAGAIN;
+ if (cq->buf.entry_size == 64)
+ cqe++;
+
++cq->mcq.cons_index;
/*
@@ -588,7 +635,7 @@
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
is_send)) {
- printk(KERN_WARNING "Completion for NOP opcode detected!\n");
+ pr_warn("Completion for NOP opcode detected!\n");
return -EINVAL;
}
@@ -608,24 +655,7 @@
goto repoll;
}
- if ((be32_to_cpu(cqe->vlan_my_qpn) & (1 << 23)) && !is_send) {
- /*
- * We do not have to take the XRC SRQ table lock here,
- * because CQs will be locked while XRC SRQs are removed
- * from the table.
- */
- msrq = __mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
- be32_to_cpu(cqe->g_mlpath_rqpn) &
- 0xffffff);
- if (unlikely(!msrq)) {
- printk(KERN_WARNING "CQ %06x with entry for unknown "
- "XRC SRQ %06x\n", cq->mcq.cqn,
- be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff);
- return -EINVAL;
- }
- is_xrc_recv = 1;
- srq = to_mibsrq(msrq);
- } else if (!*cur_qp ||
+ if (!*cur_qp ||
(be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) {
/*
* We do not have to take the QP table lock here,
@@ -635,7 +665,7 @@
mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
be32_to_cpu(cqe->vlan_my_qpn));
if (unlikely(!mqp)) {
- printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
+ pr_warn("CQ %06x with entry for unknown QPN %06x\n",
cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
return -EINVAL;
}
@@ -643,7 +673,7 @@
*cur_qp = to_mibqp(mqp);
}
- wc->qp = is_xrc_recv ? NULL: &(*cur_qp)->ibqp;
+ wc->qp = &(*cur_qp)->ibqp;
if (is_send) {
wq = &(*cur_qp)->sq;
@@ -653,10 +683,6 @@
}
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++wq->tail;
- } else if (is_xrc_recv) {
- wqe_ctr = be16_to_cpu(cqe->wqe_index);
- wc->wr_id = srq->wrid[wqe_ctr];
- mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else if ((*cur_qp)->ibqp.srq) {
srq = to_msrq((*cur_qp)->ibqp.srq);
wqe_ctr = be16_to_cpu(cqe->wqe_index);
@@ -664,7 +690,8 @@
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else {
wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ tail = wq->tail & (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[tail];
++wq->tail;
}
@@ -747,14 +774,26 @@
break;
}
+ if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
+ if ((*cur_qp)->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
+ return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
+ }
+
wc->slid = be16_to_cpu(cqe->rlid);
- wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->src_qp = g_mlpath_rqpn & 0xffffff;
wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
- wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum);
+ wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
+ cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
+ if (rdma_port_get_link_layer(wc->qp->device,
+ (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
+ wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
+ else
+ wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
}
return 0;
@@ -776,8 +815,7 @@
break;
}
- if (npolled)
- mlx4_cq_set_ci(&cq->mcq);
+ mlx4_cq_set_ci(&cq->mcq);
spin_unlock_irqrestore(&cq->lock, flags);
@@ -804,11 +842,8 @@
int nfreed = 0;
struct mlx4_cqe *cqe, *dest;
u8 owner_bit;
- int is_xrc_srq = 0;
+ int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
- if (srq && srq->ibsrq.xrc_cq)
- is_xrc_srq = 1;
-
/*
* First we need to find the current producer index, so we
* know where to start cleaning from. It doesn't matter if HW
@@ -826,15 +861,16 @@
*/
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
- if (((be32_to_cpu(cqe->vlan_my_qpn) & 0xffffff) == qpn) ||
- (is_xrc_srq &&
- (be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff) ==
- srq->msrq.srqn)) {
+ cqe += cqe_inc;
+
+ if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
++nfreed;
} else if (nfreed) {
dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+ dest += cqe_inc;
+
owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
memcpy(dest, cqe, sizeof *cqe);
dest->owner_sr_opcode = owner_bit |
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/mad.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/mad.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -32,8 +32,13 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_cache.h>
+#include <linux/random.h>
#include <linux/mlx4/cmd.h>
+#include <linux/gfp.h>
+#include <rdma/ib_pma.h>
#include "mlx4_ib.h"
@@ -42,7 +47,62 @@
MLX4_IB_VENDOR_CLASS2 = 0xa
};
-int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+#define MLX4_TUN_SEND_WRID_SHIFT 34
+#define MLX4_TUN_QPN_SHIFT 32
+#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
+#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
+
+#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
+#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
+
+ /* Port mgmt change event handling */
+
+#define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
+#define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
+#define NUM_IDX_IN_PKEY_TBL_BLK 32
+#define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */
+#define GUID_TBL_BLK_NUM_ENTRIES 8
+#define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
+
+struct mlx4_mad_rcv_buf {
+ struct ib_grh grh;
+ u8 payload[256];
+} __packed;
+
+struct mlx4_mad_snd_buf {
+ u8 payload[256];
+} __packed;
+
+struct mlx4_tunnel_mad {
+ struct ib_grh grh;
+ struct mlx4_ib_tunnel_header hdr;
+ struct ib_mad mad;
+} __packed;
+
+struct mlx4_rcv_tunnel_mad {
+ struct mlx4_rcv_tunnel_hdr hdr;
+ struct ib_grh grh;
+ struct ib_mad mad;
+} __packed;
+
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
+static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
+ int block, u32 change_bitmap);
+
+__be64 mlx4_ib_gen_node_guid(void)
+{
+#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
+ return cpu_to_be64(NODE_GUID_HI | random());
+}
+
+__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
+{
+ return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
+ cpu_to_be64(0xff00000000000000LL);
+}
+
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad)
{
@@ -69,10 +129,13 @@
* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
- if (ignore_mkey || !in_wc)
+ if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
op_modifier |= 0x1;
- if (ignore_bkey || !in_wc)
+ if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
op_modifier |= 0x2;
+ if (mlx4_is_mfunc(dev->dev) &&
+ (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
+ op_modifier |= 0x8;
if (in_wc) {
struct {
@@ -105,9 +168,10 @@
in_modifier |= in_wc->slid << 16;
}
- err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
- in_modifier, op_modifier,
- MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+ err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
+ mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
if (!err)
memcpy(response_mad, outmailbox->buf, 256);
@@ -122,6 +186,7 @@
{
struct ib_ah *new_ah;
struct ib_ah_attr ah_attr;
+ unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
@@ -136,53 +201,134 @@
if (IS_ERR(new_ah))
return;
- spin_lock(&dev->sm_lock);
+ spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
ib_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
- spin_unlock(&dev->sm_lock);
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
}
/*
- * Snoop SM MADs for port info and P_Key table sets, so we can
- * synthesize LID change and P_Key change events.
+ * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can
+ * synthesize LID change, Client-Rereg, GID change, and P_Key change events.
*/
static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
- u16 prev_lid)
+ u16 prev_lid)
{
- struct ib_event event;
+ struct ib_port_info *pinfo;
+ u16 lid;
+ __be16 *base;
+ u32 bn, pkey_change_bitmap;
+ int i;
+
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
- mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
- if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
- struct ib_port_info *pinfo =
- (struct ib_port_info *) ((struct ib_smp *) mad)->data;
- u16 lid = be16_to_cpu(pinfo->lid);
+ mad->mad_hdr.method == IB_MGMT_METHOD_SET)
+ switch (mad->mad_hdr.attr_id) {
+ case IB_SMP_ATTR_PORT_INFO:
+ pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
+ lid = be16_to_cpu(pinfo->lid);
- update_sm_ah(to_mdev(ibdev), port_num,
+ update_sm_ah(dev, port_num,
be16_to_cpu(pinfo->sm_lid),
pinfo->neighbormtu_mastersmsl & 0xf);
- event.device = ibdev;
- event.element.port_num = port_num;
+ if (pinfo->clientrereg_resv_subnetto & 0x80)
+ handle_client_rereg_event(dev, port_num);
- if (pinfo->clientrereg_resv_subnetto & 0x80) {
- event.event = IB_EVENT_CLIENT_REREGISTER;
- ib_dispatch_event(&event);
+ if (prev_lid != lid)
+ handle_lid_change_event(dev, port_num);
+ break;
+
+ case IB_SMP_ATTR_PKEY_TABLE:
+ if (!mlx4_is_mfunc(dev->dev)) {
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_PKEY_CHANGE);
+ break;
}
- if (prev_lid != lid) {
- event.event = IB_EVENT_LID_CHANGE;
- ib_dispatch_event(&event);
+ /* at this point, we are running in the master.
+ * Slaves do not receive SMPs.
+ */
+ bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
+ base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
+ pkey_change_bitmap = 0;
+ for (i = 0; i < 32; i++) {
+ pr_debug("PKEY[%d] = x%x\n",
+ i + bn*32, be16_to_cpu(base[i]));
+ if (be16_to_cpu(base[i]) !=
+ dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
+ pkey_change_bitmap |= (1 << i);
+ dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
+ be16_to_cpu(base[i]);
+ }
}
+ pr_debug("PKEY Change event: port=%d, "
+ "block=0x%x, change_bitmap=0x%x\n",
+ port_num, bn, pkey_change_bitmap);
+
+ if (pkey_change_bitmap) {
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_PKEY_CHANGE);
+ if (!dev->sriov.is_going_down)
+ __propagate_pkey_ev(dev, port_num, bn,
+ pkey_change_bitmap);
+ }
+ break;
+
+ case IB_SMP_ATTR_GUID_INFO:
+ /* paravirtualized master's guid is guid 0 -- does not change */
+ if (!mlx4_is_master(dev->dev))
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_GID_CHANGE);
+ /*if master, notify relevant slaves*/
+ if (mlx4_is_master(dev->dev) &&
+ !dev->sriov.is_going_down) {
+ bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
+ mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
+ (u8 *)(&((struct ib_smp *)mad)->data));
+ mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
+ (u8 *)(&((struct ib_smp *)mad)->data));
+ }
+ break;
+
+ default:
+ break;
}
+}
- if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
- event.device = ibdev;
- event.event = IB_EVENT_PKEY_CHANGE;
- event.element.port_num = port_num;
- ib_dispatch_event(&event);
+static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
+ int block, u32 change_bitmap)
+{
+ int i, ix, slave, err;
+ int have_event = 0;
+
+ for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
+ if (slave == mlx4_master_func_num(dev->dev))
+ continue;
+ if (!mlx4_is_slave_active(dev->dev, slave))
+ continue;
+
+ have_event = 0;
+ for (i = 0; i < 32; i++) {
+ if (!(change_bitmap & (1 << i)))
+ continue;
+ for (ix = 0;
+ ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
+ if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
+ [ix] == i + 32 * block) {
+ err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
+ pr_debug("propagate_pkey_ev: slave %d,"
+ " port %d, ix %d (%d)\n",
+ slave, port_num, ix, err);
+ have_event = 1;
+ break;
+ }
+ }
+ if (have_event)
+ break;
}
}
}
@@ -190,13 +336,15 @@
static void node_desc_override(struct ib_device *dev,
struct ib_mad *mad)
{
+ unsigned long flags;
+
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
- spin_lock(&to_mdev(dev)->sm_lock);
+ spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags);
memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
- spin_unlock(&to_mdev(dev)->sm_lock);
+ spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags);
}
}
@@ -206,10 +354,13 @@
struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
int ret;
+ unsigned long flags;
if (agent) {
send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ if (IS_ERR(send_buf))
+ return;
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
@@ -216,13 +367,13 @@
* wrong following the IB spec strictly, but we know
* it's OK for our devices).
*/
- spin_lock(&dev->sm_lock);
+ spin_lock_irqsave(&dev->sm_lock, flags);
memcpy(send_buf->mad, mad, sizeof *mad);
if ((send_buf->ah = dev->sm_ah[port_num - 1]))
ret = ib_post_send_mad(send_buf, NULL);
else
ret = -EINVAL;
- spin_unlock(&dev->sm_lock);
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
if (ret)
ib_free_send_mad(send_buf);
@@ -229,24 +380,331 @@
}
}
-static int is_vendor_id(__be16 attr_id)
+static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *sa_mad)
{
- return (attr_id & IB_SMP_ATTR_VENDOR_MASK) == IB_SMP_ATTR_VENDOR_MASK;
+ int ret = 0;
+
+ /* dispatch to different sa handlers */
+ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
+ case IB_SA_ATTR_MC_MEMBER_REC:
+ ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
+ break;
+ default:
+ break;
+ }
+ return ret;
}
-static int supported_vendor_id(__be16 attr_id)
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
{
- return 1;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int i;
+
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
+ return i;
+ }
+ return -1;
}
+
+static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
+ u8 port, u16 pkey, u16 *ix)
+{
+ int i, ret;
+ u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
+ u16 slot_pkey;
+
+ if (slave == mlx4_master_func_num(dev->dev))
+ return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
+
+ unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
+
+ for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
+ if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
+ continue;
+
+ pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
+
+ ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
+ if (ret)
+ continue;
+ if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
+ if (slot_pkey & 0x8000) {
+ *ix = (u16) pkey_ix;
+ return 0;
+ } else {
+ /* take first partial pkey index found */
+ if (partial_ix == 0xFF)
+ partial_ix = pkey_ix;
+ }
+ }
+ }
+
+ if (partial_ix < 0xFF) {
+ *ix = (u16) partial_ix;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type dest_qpt, struct ib_wc *wc,
+ struct ib_grh *grh, struct ib_mad *mad)
+{
+ struct ib_sge list;
+ struct ib_send_wr wr, *bad_wr;
+ struct mlx4_ib_demux_pv_ctx *tun_ctx;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ struct mlx4_rcv_tunnel_mad *tun_mad;
+ struct ib_ah_attr attr;
+ struct ib_ah *ah;
+ struct ib_qp *src_qp = NULL;
+ unsigned tun_tx_ix = 0;
+ int dqpn;
+ int ret = 0;
+ u16 tun_pkey_ix;
+ u16 cached_pkey;
+ u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
+
+ if (dest_qpt > IB_QPT_GSI)
+ return -EINVAL;
+
+ tun_ctx = dev->sriov.demux[port-1].tun[slave];
+
+ /* check if proxy qp created */
+ if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
+ return -EAGAIN;
+
+ /* QP0 forwarding only for Dom0 */
+ if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
+ return -EINVAL;
+
+ if (!dest_qpt)
+ tun_qp = &tun_ctx->qp[0];
+ else
+ tun_qp = &tun_ctx->qp[1];
+
+ /* compute P_Key index to put in tunnel header for slave */
+ if (dest_qpt) {
+ u16 pkey_ix;
+ ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
+ if (ret)
+ return -EINVAL;
+
+ ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
+ if (ret)
+ return -EINVAL;
+ tun_pkey_ix = pkey_ix;
+ } else
+ tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
+
+ dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
+
+ /* get tunnel tx data buf for slave */
+ src_qp = tun_qp->qp;
+
+ /* create ah. Just need an empty one with the port num for the post send.
+ * The driver will set the force loopback bit in post_send */
+ memset(&attr, 0, sizeof attr);
+ attr.port_num = port;
+ if (is_eth) {
+ memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+ attr.ah_flags = IB_AH_GRH;
+ }
+ ah = ib_create_ah(tun_ctx->pd, &attr);
+ if (IS_ERR(ah))
+ return -ENOMEM;
+
+ /* allocate tunnel tx buf after pass failure returns */
+ spin_lock(&tun_qp->tx_lock);
+ if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
+ (MLX4_NUM_TUNNEL_BUFS - 1))
+ ret = -EAGAIN;
+ else
+ tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ spin_unlock(&tun_qp->tx_lock);
+ if (ret)
+ goto out;
+
+ tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
+ if (tun_qp->tx_ring[tun_tx_ix].ah)
+ ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+ tun_qp->tx_ring[tun_tx_ix].ah = ah;
+ ib_dma_sync_single_for_cpu(&dev->ib_dev,
+ tun_qp->tx_ring[tun_tx_ix].buf.map,
+ sizeof (struct mlx4_rcv_tunnel_mad),
+ DMA_TO_DEVICE);
+
+ /* copy over to tunnel buffer */
+ if (grh)
+ memcpy(&tun_mad->grh, grh, sizeof *grh);
+ memcpy(&tun_mad->mad, mad, sizeof *mad);
+
+ /* adjust tunnel data */
+ tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
+ tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
+ tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+ tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
+ tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
+
+ ib_dma_sync_single_for_device(&dev->ib_dev,
+ tun_qp->tx_ring[tun_tx_ix].buf.map,
+ sizeof (struct mlx4_rcv_tunnel_mad),
+ DMA_TO_DEVICE);
+
+ list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
+ list.length = sizeof (struct mlx4_rcv_tunnel_mad);
+ list.lkey = tun_ctx->mr->lkey;
+
+ wr.wr.ud.ah = ah;
+ wr.wr.ud.port_num = port;
+ wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
+ wr.wr.ud.remote_qpn = dqpn;
+ wr.next = NULL;
+ wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(src_qp, &wr, &bad_wr);
+out:
+ if (ret)
+ ib_destroy_ah(ah);
+ return ret;
+}
+
+static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
+ struct ib_wc *wc, struct ib_grh *grh,
+ struct ib_mad *mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int err;
+ int slave;
+ u8 *slave_id;
+ int is_eth = 0;
+
+ if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+ is_eth = 0;
+ else
+ is_eth = 1;
+
+ if (is_eth) {
+ if (!wc->wc_flags & IB_WC_GRH) {
+ mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
+ return -EINVAL;
+ }
+ if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
+ mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
+ return -EINVAL;
+ }
+ if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
+ if (slave >= dev->dev->caps.sqp_demux) {
+ mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+ slave, dev->dev->caps.sqp_demux);
+ return -ENOENT;
+ }
+
+ if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad, is_eth))
+ return 0;
+
+ err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+ if (err)
+ pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ slave, err);
+ return 0;
+ }
+
+ /* Initially assume that this mad is for us */
+ slave = mlx4_master_func_num(dev->dev);
+
+ /* See if the slave id is encoded in a response mad */
+ if (mad->mad_hdr.method & 0x80) {
+ slave_id = (u8 *) &mad->mad_hdr.tid;
+ slave = *slave_id;
+ if (slave != 255) /*255 indicates the dom0*/
+ *slave_id = 0; /* remap tid */
+ }
+
+ /* If a grh is present, we demux according to it */
+ if (wc->wc_flags & IB_WC_GRH) {
+ slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
+ if (slave < 0) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
+ }
+ /* Class-specific handling */
+ switch (mad->mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_ADM:
+ if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
+ (struct ib_sa_mad *) mad))
+ return 0;
+ break;
+ case IB_MGMT_CLASS_CM:
+ if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad, is_eth))
+ return 0;
+ break;
+ case IB_MGMT_CLASS_DEVICE_MGMT:
+ if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
+ return 0;
+ break;
+ default:
+ /* Drop unsupported classes for slaves in tunnel mode */
+ if (slave != mlx4_master_func_num(dev->dev)) {
+ pr_debug("dropping unsupported ingress mad from class:%d "
+ "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
+ return 0;
+ }
+ }
+ /*make sure that no slave==255 was not handled yet.*/
+ if (slave >= dev->dev->caps.sqp_demux) {
+ mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+ slave, dev->dev->caps.sqp_demux);
+ return -ENOENT;
+ }
+
+ err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+ if (err)
+ pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ slave, err);
+ return 0;
+}
+
static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- struct ib_wc *in_wc, struct ib_grh *in_grh,
- struct ib_mad *in_mad, struct ib_mad *out_mad)
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
{
u16 slid, prev_lid = 0;
int err;
struct ib_port_attr pattr;
+ if (in_wc && in_wc->qp->qp_num) {
+ pr_debug("received MAD: slid:%d sqpn:%d "
+ "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
+ in_wc->slid, in_wc->src_qp,
+ in_wc->dlid_path_bits,
+ in_wc->qp->qp_num,
+ in_wc->wc_flags,
+ in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
+ be16_to_cpu(in_mad->mad_hdr.attr_id));
+ if (in_wc->wc_flags & IB_WC_GRH) {
+ pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
+ (long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix),
+ (long long)
+ be64_to_cpu(in_grh->sgid.global.interface_id));
+ pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
+ (long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix),
+ (long long)be64_to_cpu(in_grh->dgid.global.interface_id));
+ }
+ }
+
slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
@@ -262,12 +720,9 @@
return IB_MAD_RESULT_SUCCESS;
/*
- * Don't process SMInfo queries or vendor-specific
- * MADs -- the SMA can't handle them.
+ * Don't process SMInfo queries -- the SMA can't handle them.
*/
- if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
- (is_vendor_id(in_mad->mad_hdr.attr_id) &&
- !supported_vendor_id(in_mad->mad_hdr.attr_id)))
+ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
return IB_MAD_RESULT_SUCCESS;
} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
@@ -287,15 +742,19 @@
prev_lid = pattr.lid;
err = mlx4_MAD_IFC(to_mdev(ibdev),
- mad_flags & IB_MAD_IGNORE_MKEY,
- mad_flags & IB_MAD_IGNORE_BKEY,
+ (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
+ (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
+ MLX4_MAD_IFC_NET_VIEW,
port_num, in_wc, in_grh, in_mad, out_mad);
if (err)
return IB_MAD_RESULT_FAILURE;
if (!out_mad->mad_hdr.status) {
- smp_snoop(ibdev, port_num, in_mad, prev_lid);
- node_desc_override(ibdev, out_mad);
+ if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV))
+ smp_snoop(ibdev, port_num, in_mad, prev_lid);
+ /* slaves get node desc from FW */
+ if (!mlx4_is_slave(to_mdev(ibdev)->dev))
+ node_desc_override(ibdev, out_mad);
}
/* set return bit in status of directed route responses */
@@ -309,60 +768,224 @@
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
-static __be32 be64_to_be32(__be64 b64)
+static void edit_counter_ext(struct mlx4_if_stat_extended *cnt, void *counters,
+ __be16 attr_id)
{
- return cpu_to_be32(be64_to_cpu(b64) & 0xffffffff);
+ switch (attr_id) {
+ case IB_PMA_PORT_COUNTERS:
+ {
+ struct ib_pma_portcounters *pma_cnt =
+ (struct ib_pma_portcounters *)counters;
+ pma_cnt->port_xmit_data =
+ cpu_to_be32((be64_to_cpu(cnt->counters[0].
+ IfTxUnicastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxMulticastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxDroppedOctets)) >> 2);
+ pma_cnt->port_rcv_data =
+ cpu_to_be32((be64_to_cpu(cnt->counters[0].
+ IfRxUnicastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxMulticastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxBroadcastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxNoBufferOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxErrorOctets)) >> 2);
+ pma_cnt->port_xmit_packets =
+ cpu_to_be32(be64_to_cpu(cnt->counters[0].
+ IfTxUnicastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxDroppedFrames));
+ pma_cnt->port_rcv_packets =
+ cpu_to_be32(be64_to_cpu(cnt->counters[0].
+ IfRxUnicastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxBroadcastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxNoBufferFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxErrorFrames));
+ pma_cnt->port_rcv_errors = cpu_to_be32(be64_to_cpu(cnt->
+ counters[0].
+ IfRxErrorFrames));
+ break;
+ }
+
+ case IB_PMA_PORT_COUNTERS_EXT:
+ {
+ struct ib_pma_portcounters_ext *pma_cnt_ext =
+ (struct ib_pma_portcounters_ext *)counters;
+
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64((be64_to_cpu(cnt->counters[0].
+ IfTxUnicastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxMulticastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxDroppedOctets)) >> 2);
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64((be64_to_cpu(cnt->counters[0].
+ IfRxUnicastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxMulticastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxBroadcastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxNoBufferOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxErrorOctets)) >> 2);
+ pma_cnt_ext->port_xmit_packets =
+ cpu_to_be64(be64_to_cpu(cnt->counters[0].
+ IfTxUnicastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxDroppedFrames));
+ pma_cnt_ext->port_rcv_packets =
+ cpu_to_be64(be64_to_cpu(cnt->counters[0].
+ IfRxUnicastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxBroadcastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxNoBufferFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxErrorFrames));
+ pma_cnt_ext->port_unicast_xmit_packets = cnt->counters[0].
+ IfTxUnicastFrames;
+ pma_cnt_ext->port_unicast_rcv_packets = cnt->counters[0].
+ IfRxUnicastFrames;
+ pma_cnt_ext->port_multicast_xmit_packets =
+ cpu_to_be64(be64_to_cpu(cnt->counters[0].
+ IfTxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastFrames));
+ pma_cnt_ext->port_multicast_rcv_packets =
+ cpu_to_be64(be64_to_cpu(cnt->counters[0].
+ IfTxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastFrames));
+
+ break;
+ }
+
+ default:
+ pr_warn("Unsupported attr_id 0x%x\n", attr_id);
+ break;
+ }
+
}
-static void edit_counters(struct mlx4_counters *cnt, void *data)
+static void edit_counter(struct mlx4_if_stat_basic *cnt, void *counters,
+ __be16 attr_id)
{
- *(__be32 *)(data + 40 + 24) = be64_to_be32(cnt->tx_bytes);
- *(__be32 *)(data + 40 + 28) = be64_to_be32(cnt->rx_bytes);
- *(__be32 *)(data + 40 + 32) = be64_to_be32(cnt->tx_frames);
- *(__be32 *)(data + 40 + 36) = be64_to_be32(cnt->rx_frames);
-}
+ switch (attr_id) {
+ case IB_PMA_PORT_COUNTERS:
+ {
+ struct ib_pma_portcounters *pma_cnt =
+ (struct ib_pma_portcounters *) counters;
+ pma_cnt->port_xmit_data =
+ cpu_to_be32(be64_to_cpu(
+ cnt->counters[0].IfTxOctets) >> 2);
+ pma_cnt->port_rcv_data =
+ cpu_to_be32(be64_to_cpu(
+ cnt->counters[0].IfRxOctets) >> 2);
+ pma_cnt->port_xmit_packets =
+ cpu_to_be32(be64_to_cpu(cnt->counters[0].IfTxFrames));
+ pma_cnt->port_rcv_packets =
+ cpu_to_be32(be64_to_cpu(cnt->counters[0].IfRxFrames));
+ break;
+ }
+ case IB_PMA_PORT_COUNTERS_EXT:
+ {
+ struct ib_pma_portcounters_ext *pma_cnt_ext =
+ (struct ib_pma_portcounters_ext *) counters;
-static void edit_ext_counters(struct mlx4_counters_ext *cnt, void *data)
-{
- *(__be32 *)(data + 40 + 24) = be64_to_be32(cnt->tx_uni_bytes);
- *(__be32 *)(data + 40 + 28) = be64_to_be32(cnt->rx_uni_bytes);
- *(__be32 *)(data + 40 + 32) = be64_to_be32(cnt->tx_uni_frames);
- *(__be32 *)(data + 40 + 36) = be64_to_be32(cnt->rx_uni_frames);
- *(__be32 *)(data + 40 + 8) = be64_to_be32(cnt->rx_err_frames);
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64((be64_to_cpu(cnt->counters[0].
+ IfTxOctets) >> 2));
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64((be64_to_cpu(cnt->counters[0].
+ IfRxOctets) >> 2));
+ pma_cnt_ext->port_xmit_packets = cnt->counters[0].IfTxFrames;
+ pma_cnt_ext->port_rcv_packets = cnt->counters[0].IfRxFrames;
+ break;
+ }
+ default:
+ pr_warn("Unsupported attr_id 0x%x\n", attr_id);
+ break;
+ }
}
-static int rdmaoe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- struct ib_wc *in_wc, struct ib_grh *in_grh,
- struct ib_mad *in_mad, struct ib_mad *out_mad)
+int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index,
+ union mlx4_counter *counter, u8 clear)
{
struct mlx4_cmd_mailbox *mailbox;
- struct mlx4_ib_dev *dev = to_mdev(ibdev);
int err;
- u32 inmod = dev->counters[port_num - 1] & 0xffff;
- int mode;
+ u32 inmod = counter_index | ((clear & 1) << 31);
- if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
- return -EINVAL;
-
mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(mailbox))
return IB_MAD_RESULT_FAILURE;
err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
- MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
- if (err)
+ MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_WRAPPED);
+ if (!err)
+ memcpy(counter, mailbox->buf, MLX4_IF_STAT_SZ(1));
+
+ mlx4_free_cmd_mailbox(dev->dev, mailbox);
+
+ return err;
+}
+
+static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int err;
+ u32 counter_index = dev->counters[port_num - 1] & 0xffff;
+ u8 mode;
+ char counter_buf[MLX4_IF_STAT_SZ(1)];
+ union mlx4_counter *counter = (union mlx4_counter *)
+ counter_buf;
+
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return -EINVAL;
+
+ if (mlx4_ib_query_if_stat(dev, counter_index, counter, 0)) {
err = IB_MAD_RESULT_FAILURE;
- else {
+ } else {
memset(out_mad->data, 0, sizeof out_mad->data);
- mode = be32_to_cpu(((struct mlx4_counters *)mailbox->buf)->counter_mode) & 0xf;
- switch (mode) {
+ mode = counter->control.cnt_mode & 0xFF;
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ switch (mode & 0xf) {
case 0:
- edit_counters(mailbox->buf, out_mad->data);
- err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ edit_counter((void *)counter,
+ (void *)(out_mad->data + 40),
+ in_mad->mad_hdr.attr_id);
break;
case 1:
- edit_ext_counters(mailbox->buf, out_mad->data);
- err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ edit_counter_ext((void *)counter,
+ (void *)(out_mad->data + 40),
+ in_mad->mad_hdr.attr_id);
break;
default:
err = IB_MAD_RESULT_FAILURE;
@@ -369,12 +992,11 @@
}
}
- mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
}
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
@@ -383,7 +1005,7 @@
return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
in_grh, in_mad, out_mad);
case IB_LINK_LAYER_ETHERNET:
- return rdmaoe_process_mad(ibdev, mad_flags, port_num, in_wc,
+ return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
in_grh, in_mad, out_mad);
default:
return -EINVAL;
@@ -393,6 +1015,8 @@
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
+ if (mad_send_wc->send_buf->context[0])
+ ib_destroy_ah(mad_send_wc->send_buf->context[0]);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -450,3 +1074,1221 @@
ib_destroy_ah(dev->sm_ah[p]);
}
}
+
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
+{
+ mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
+
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
+ MLX4_EQ_PORT_INFO_LID_CHANGE_MASK, 0, 0);
+}
+
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
+{
+ /* re-configure the alias-guid and mcg's */
+ if (mlx4_is_master(dev->dev)) {
+ mlx4_ib_invalidate_all_guid_record(dev, port_num);
+
+ if (!dev->sriov.is_going_down) {
+ mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
+ MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK, 0, 0);
+ }
+ }
+ mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
+}
+
+static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
+ struct mlx4_eqe *eqe)
+{
+ __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
+ GET_MASK_FROM_EQE(eqe));
+}
+
+static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
+ u32 guid_tbl_blk_num, u32 change_bitmap)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ u16 i;
+
+ if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
+ return;
+
+ in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad) {
+ mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
+ goto out;
+ }
+
+ guid_tbl_blk_num *= 4;
+
+ for (i = 0; i < 4; i++) {
+ if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
+ continue;
+ memset(in_mad, 0, sizeof *in_mad);
+ memset(out_mad, 0, sizeof *out_mad);
+
+ in_mad->base_version = 1;
+ in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ in_mad->class_version = 1;
+ in_mad->method = IB_MGMT_METHOD_GET;
+ in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
+ in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i);
+
+ if (mlx4_MAD_IFC(dev,
+ MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW,
+ port_num, NULL, NULL, in_mad, out_mad)) {
+ mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
+ goto out;
+ }
+
+ mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
+ port_num,
+ (u8 *)(&((struct ib_smp *)out_mad)->data));
+ mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
+ port_num,
+ (u8 *)(&((struct ib_smp *)out_mad)->data));
+ }
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return;
+}
+
+void handle_port_mgmt_change_event(struct work_struct *work)
+{
+ struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
+ struct mlx4_ib_dev *dev = ew->ib_dev;
+ struct mlx4_eqe *eqe = &(ew->ib_eqe);
+ u8 port = eqe->event.port_mgmt_change.port;
+ u32 changed_attr;
+ u32 tbl_block;
+ u32 change_bitmap;
+
+ switch (eqe->subtype) {
+ case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
+ changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr);
+
+ /* Update the SM ah - This should be done before handling
+ the other changed attributes so that MADs can be sent to the SM */
+ if (changed_attr & MSTR_SM_CHANGE_MASK) {
+ u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
+ u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
+ update_sm_ah(dev, port, lid, sl);
+ }
+
+ /* Check if it is a lid change event */
+ if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
+ handle_lid_change_event(dev, port);
+
+ /* Generate GUID changed event */
+ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
+ /*if master, notify all slaves*/
+ if (mlx4_is_master(dev->dev))
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
+ MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK, 0, 0);
+ }
+
+ if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
+ handle_client_rereg_event(dev, port);
+ break;
+
+ case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
+ propagate_pkey_ev(dev, port, eqe);
+ break;
+ case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
+ /* paravirtualized master's guid is guid 0 -- does not change */
+ if (!mlx4_is_master(dev->dev))
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
+ /*if master, notify relevant slaves*/
+ else if (!dev->sriov.is_going_down) {
+ tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
+ change_bitmap = GET_MASK_FROM_EQE(eqe);
+ handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
+ }
+ break;
+ default:
+ pr_warn("Unsupported subtype 0x%x for "
+ "Port Management Change event\n", eqe->subtype);
+ }
+
+ kfree(ew);
+}
+
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+ enum ib_event_type type)
+{
+ struct ib_event event;
+
+ event.device = &dev->ib_dev;
+ event.element.port_num = port_num;
+ event.event = type;
+
+ ib_dispatch_event(&event);
+}
+
+static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
+{
+ unsigned long flags;
+ struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
+ queue_work(ctx->wq, &ctx->work);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
+ struct mlx4_ib_demux_pv_qp *tun_qp,
+ int index)
+{
+ struct ib_sge sg_list;
+ struct ib_recv_wr recv_wr, *bad_recv_wr;
+ int size;
+
+ size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
+ sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
+
+ sg_list.addr = tun_qp->ring[index].map;
+ sg_list.length = size;
+ sg_list.lkey = ctx->mr->lkey;
+
+ recv_wr.next = NULL;
+ recv_wr.sg_list = &sg_list;
+ recv_wr.num_sge = 1;
+ recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
+ MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
+ ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
+ size, DMA_FROM_DEVICE);
+ return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
+}
+
+static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
+ int slave, struct ib_sa_mad *sa_mad)
+{
+ int ret = 0;
+
+ /* dispatch to different sa handlers */
+ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
+ case IB_SA_ATTR_MC_MEMBER_REC:
+ ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
+{
+ int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
+
+ return (qpn >= proxy_start && qpn <= proxy_start + 1);
+}
+
+
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
+ u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
+{
+ struct ib_sge list;
+ struct ib_send_wr wr, *bad_wr;
+ struct mlx4_ib_demux_pv_ctx *sqp_ctx;
+ struct mlx4_ib_demux_pv_qp *sqp;
+ struct mlx4_mad_snd_buf *sqp_mad;
+ struct ib_ah *ah;
+ struct ib_qp *send_qp = NULL;
+ unsigned wire_tx_ix = 0;
+ int ret = 0;
+ u16 wire_pkey_ix;
+ int src_qpnum;
+ u8 sgid_index;
+
+
+ sqp_ctx = dev->sriov.sqps[port-1];
+
+ /* check if proxy qp created */
+ if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
+ return -EAGAIN;
+
+ /* QP0 forwarding only for Dom0 */
+ if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
+ return -EINVAL;
+
+ if (dest_qpt == IB_QPT_SMI) {
+ src_qpnum = 0;
+ sqp = &sqp_ctx->qp[0];
+ wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
+ } else {
+ src_qpnum = 1;
+ sqp = &sqp_ctx->qp[1];
+ wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
+ }
+
+ send_qp = sqp->qp;
+
+ /* create ah */
+ sgid_index = attr->grh.sgid_index;
+ attr->grh.sgid_index = 0;
+ ah = ib_create_ah(sqp_ctx->pd, attr);
+ if (IS_ERR(ah))
+ return -ENOMEM;
+ attr->grh.sgid_index = sgid_index;
+ to_mah(ah)->av.ib.gid_index = sgid_index;
+ /* get rid of force-loopback bit */
+ to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+ spin_lock(&sqp->tx_lock);
+ if (sqp->tx_ix_head - sqp->tx_ix_tail >=
+ (MLX4_NUM_TUNNEL_BUFS - 1))
+ ret = -EAGAIN;
+ else
+ wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ spin_unlock(&sqp->tx_lock);
+ if (ret)
+ goto out;
+
+ sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
+ if (sqp->tx_ring[wire_tx_ix].ah)
+ ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+ sqp->tx_ring[wire_tx_ix].ah = ah;
+ ib_dma_sync_single_for_cpu(&dev->ib_dev,
+ sqp->tx_ring[wire_tx_ix].buf.map,
+ sizeof (struct mlx4_mad_snd_buf),
+ DMA_TO_DEVICE);
+
+ memcpy(&sqp_mad->payload, mad, sizeof *mad);
+
+ ib_dma_sync_single_for_device(&dev->ib_dev,
+ sqp->tx_ring[wire_tx_ix].buf.map,
+ sizeof (struct mlx4_mad_snd_buf),
+ DMA_TO_DEVICE);
+
+ list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
+ list.length = sizeof (struct mlx4_mad_snd_buf);
+ list.lkey = sqp_ctx->mr->lkey;
+
+ wr.wr.ud.ah = ah;
+ wr.wr.ud.port_num = port;
+ wr.wr.ud.pkey_index = wire_pkey_ix;
+ wr.wr.ud.remote_qkey = qkey;
+ wr.wr.ud.remote_qpn = remote_qpn;
+ wr.next = NULL;
+ wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(send_qp, &wr, &bad_wr);
+out:
+ if (ret)
+ ib_destroy_ah(ah);
+ return ret;
+}
+
+static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
+{
+ int gids;
+ int vfs;
+
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ return slave;
+
+ gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+ vfs = dev->dev->num_vfs;
+
+ if (slave == 0)
+ return 0;
+ if (slave <= gids % vfs)
+ return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
+
+ return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
+}
+
+static int get_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
+ struct ib_ah_attr *ah_attr)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) {
+ ah_attr->grh.sgid_index = slave;
+ return 0;
+ }
+ ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+ return 0;
+}
+
+static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
+ int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
+ struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
+ struct mlx4_ib_ah ah;
+ struct ib_ah_attr ah_attr;
+ u8 *slave_id;
+ int slave;
+
+ /* Get slave that sent this packet */
+ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
+ wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
+ (wc->src_qp & 0x1) != ctx->port - 1 ||
+ wc->src_qp & 0x4) {
+ mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
+ return;
+ }
+ slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
+ if (slave != ctx->slave) {
+ mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
+ "belongs to another slave\n", wc->src_qp);
+ return;
+ }
+ if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
+ mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
+ "non-master trying to send QP0 packets\n", wc->src_qp);
+ return;
+ }
+
+ /* Map transaction ID */
+ ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
+ sizeof (struct mlx4_tunnel_mad),
+ DMA_FROM_DEVICE);
+ switch (tunnel->mad.mad_hdr.method) {
+ case IB_MGMT_METHOD_SET:
+ case IB_MGMT_METHOD_GET:
+ case IB_MGMT_METHOD_REPORT:
+ case IB_SA_METHOD_GET_TABLE:
+ case IB_SA_METHOD_DELETE:
+ case IB_SA_METHOD_GET_MULTI:
+ case IB_SA_METHOD_GET_TRACE_TBL:
+ slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
+ if (*slave_id) {
+ mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
+ "class:%d slave:%d\n", *slave_id,
+ tunnel->mad.mad_hdr.mgmt_class, slave);
+ return;
+ } else
+ *slave_id = slave;
+ default:
+ /* nothing */;
+ }
+
+ /* Class-specific handling */
+ switch (tunnel->mad.mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_ADM:
+ if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
+ (struct ib_sa_mad *) &tunnel->mad))
+ return;
+ break;
+ case IB_MGMT_CLASS_CM:
+ if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
+ (struct ib_mad *) &tunnel->mad))
+ return;
+ break;
+ case IB_MGMT_CLASS_DEVICE_MGMT:
+ if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
+ tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
+ return;
+ break;
+ default:
+ /* Drop unsupported classes for slaves in tunnel mode */
+ if (slave != mlx4_master_func_num(dev->dev)) {
+ mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
+ "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
+ return;
+ }
+ }
+
+ /* We are using standard ib_core services to send the mad, so generate a
+ * stadard address handle by decoding the tunnelled mlx4_ah fields */
+ memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
+ ah.ibah.device = ctx->ib_dev;
+ mlx4_ib_query_ah(&ah.ibah, &ah_attr);
+ if (ah_attr.ah_flags & IB_AH_GRH)
+ if (get_real_sgid_index(dev, slave, ctx->port, &ah_attr))
+ return;
+
+ mlx4_ib_send_to_wire(dev, slave, ctx->port,
+ is_proxy_qp0(dev, wc->src_qp, slave) ?
+ IB_QPT_SMI : IB_QPT_GSI,
+ be16_to_cpu(tunnel->hdr.pkey_index),
+ be32_to_cpu(tunnel->hdr.remote_qpn),
+ be32_to_cpu(tunnel->hdr.qkey),
+ &ah_attr, &tunnel->mad);
+}
+
+static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
+ enum ib_qp_type qp_type, int is_tun)
+{
+ int i;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ int rx_buf_size, tx_buf_size;
+
+ if (qp_type > IB_QPT_GSI)
+ return -EINVAL;
+
+ tun_qp = &ctx->qp[qp_type];
+
+ tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
+ GFP_KERNEL);
+ if (!tun_qp->ring)
+ return -ENOMEM;
+
+ tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ sizeof (struct mlx4_ib_tun_tx_buf),
+ GFP_KERNEL);
+ if (!tun_qp->tx_ring) {
+ kfree(tun_qp->ring);
+ tun_qp->ring = NULL;
+ return -ENOMEM;
+ }
+
+ if (is_tun) {
+ rx_buf_size = sizeof (struct mlx4_tunnel_mad);
+ tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
+ } else {
+ rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
+ tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
+ if (!tun_qp->ring[i].addr)
+ goto err;
+ tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
+ tun_qp->ring[i].addr,
+ rx_buf_size,
+ DMA_FROM_DEVICE);
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ tun_qp->tx_ring[i].buf.addr =
+ kmalloc(tx_buf_size, GFP_KERNEL);
+ if (!tun_qp->tx_ring[i].buf.addr)
+ goto tx_err;
+ tun_qp->tx_ring[i].buf.map =
+ ib_dma_map_single(ctx->ib_dev,
+ tun_qp->tx_ring[i].buf.addr,
+ tx_buf_size,
+ DMA_TO_DEVICE);
+ tun_qp->tx_ring[i].ah = NULL;
+ }
+ spin_lock_init(&tun_qp->tx_lock);
+ tun_qp->tx_ix_head = 0;
+ tun_qp->tx_ix_tail = 0;
+ tun_qp->proxy_qpt = qp_type;
+
+ return 0;
+
+tx_err:
+ while (i > 0) {
+ --i;
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
+ tx_buf_size, DMA_TO_DEVICE);
+ kfree(tun_qp->tx_ring[i].buf.addr);
+ }
+ kfree(tun_qp->tx_ring);
+ tun_qp->tx_ring = NULL;
+ i = MLX4_NUM_TUNNEL_BUFS;
+err:
+ while (i > 0) {
+ --i;
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
+ rx_buf_size, DMA_FROM_DEVICE);
+ kfree(tun_qp->ring[i].addr);
+ }
+ kfree(tun_qp->ring);
+ tun_qp->ring = NULL;
+ return -ENOMEM;
+}
+
+static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
+ enum ib_qp_type qp_type, int is_tun)
+{
+ int i;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ int rx_buf_size, tx_buf_size;
+
+ if (qp_type > IB_QPT_GSI)
+ return;
+
+ tun_qp = &ctx->qp[qp_type];
+ if (is_tun) {
+ rx_buf_size = sizeof (struct mlx4_tunnel_mad);
+ tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
+ } else {
+ rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
+ tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
+ }
+
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
+ rx_buf_size, DMA_FROM_DEVICE);
+ kfree(tun_qp->ring[i].addr);
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
+ tx_buf_size, DMA_TO_DEVICE);
+ kfree(tun_qp->tx_ring[i].buf.addr);
+ if (tun_qp->tx_ring[i].ah)
+ ib_destroy_ah(tun_qp->tx_ring[i].ah);
+ }
+ kfree(tun_qp->tx_ring);
+ kfree(tun_qp->ring);
+}
+
+static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
+{
+ struct mlx4_ib_demux_pv_ctx *ctx;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ struct ib_wc wc;
+ int ret;
+ ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
+ ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+
+ while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
+ tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
+ if (wc.status == IB_WC_SUCCESS) {
+ switch (wc.opcode) {
+ case IB_WC_RECV:
+ mlx4_ib_multiplex_mad(ctx, &wc);
+ ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
+ wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1));
+ if (ret)
+ pr_err("Failed reposting tunnel "
+ "buf:%lld\n", (long long)wc.wr_id);
+ break;
+ case IB_WC_SEND:
+ pr_debug("received tunnel send completion:"
+ "wrid=0x%llx, status=0x%x\n",
+ (long long)wc.wr_id, wc.status);
+ ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&tun_qp->tx_lock);
+ tun_qp->tx_ix_tail++;
+ spin_unlock(&tun_qp->tx_lock);
+
+ break;
+ default:
+ break;
+ }
+ } else {
+ pr_debug("mlx4_ib: completion error in tunnel: %d."
+ " status = %d, wrid = 0x%llx\n",
+ ctx->slave, wc.status, (long long)wc.wr_id);
+ if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
+ ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&tun_qp->tx_lock);
+ tun_qp->tx_ix_tail++;
+ spin_unlock(&tun_qp->tx_lock);
+ }
+ }
+ }
+}
+
+static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
+{
+ struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
+
+ /* It's worse than that! He's dead, Jim! */
+ pr_err("Fatal error (%d) on a MAD QP on port %d\n",
+ event->event, sqp->port);
+}
+
+static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
+ enum ib_qp_type qp_type, int create_tun)
+{
+ int i, ret;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
+ struct ib_qp_attr attr;
+ int qp_attr_mask_INIT;
+
+ if (qp_type > IB_QPT_GSI)
+ return -EINVAL;
+
+ tun_qp = &ctx->qp[qp_type];
+
+ memset(&qp_init_attr, 0, sizeof qp_init_attr);
+ qp_init_attr.init_attr.send_cq = ctx->cq;
+ qp_init_attr.init_attr.recv_cq = ctx->cq;
+ qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_send_sge = 1;
+ qp_init_attr.init_attr.cap.max_recv_sge = 1;
+ if (create_tun) {
+ qp_init_attr.init_attr.qp_type = IB_QPT_UD;
+ qp_init_attr.init_attr.create_flags = (enum ib_qp_create_flags)MLX4_IB_SRIOV_TUNNEL_QP;
+ qp_init_attr.port = ctx->port;
+ qp_init_attr.slave = ctx->slave;
+ qp_init_attr.proxy_qp_type = qp_type;
+ qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
+ IB_QP_QKEY | IB_QP_PORT;
+ } else {
+ qp_init_attr.init_attr.qp_type = qp_type;
+ qp_init_attr.init_attr.create_flags = (enum ib_qp_create_flags)MLX4_IB_SRIOV_SQP;
+ qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
+ }
+ qp_init_attr.init_attr.port_num = ctx->port;
+ qp_init_attr.init_attr.qp_context = ctx;
+ qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
+ tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
+ if (IS_ERR(tun_qp->qp)) {
+ ret = PTR_ERR(tun_qp->qp);
+ tun_qp->qp = NULL;
+ pr_err("Couldn't create %s QP (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ return ret;
+ }
+
+ memset(&attr, 0, sizeof attr);
+ attr.qp_state = IB_QPS_INIT;
+ attr.pkey_index =
+ to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
+ attr.qkey = IB_QP1_QKEY;
+ attr.port_num = ctx->port;
+ ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
+ if (ret) {
+ pr_err("Couldn't change %s qp state to INIT (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ goto err_qp;
+ }
+ attr.qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
+ if (ret) {
+ pr_err("Couldn't change %s qp state to RTR (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ goto err_qp;
+ }
+ attr.qp_state = IB_QPS_RTS;
+ attr.sq_psn = 0;
+ ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
+ if (ret) {
+ pr_err("Couldn't change %s qp state to RTS (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ goto err_qp;
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
+ if (ret) {
+ pr_err(" mlx4_ib_post_pv_buf error"
+ " (err = %d, i = %d)\n", ret, i);
+ goto err_qp;
+ }
+ }
+ return 0;
+
+err_qp:
+ ib_destroy_qp(tun_qp->qp);
+ tun_qp->qp = NULL;
+ return ret;
+}
+
+/*
+ * IB MAD completion callback for real SQPs
+ */
+static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
+{
+ struct mlx4_ib_demux_pv_ctx *ctx;
+ struct mlx4_ib_demux_pv_qp *sqp;
+ struct ib_wc wc;
+ struct ib_grh *grh;
+ struct ib_mad *mad;
+
+ ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
+ ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+
+ while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
+ sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
+ if (wc.status == IB_WC_SUCCESS) {
+ switch (wc.opcode) {
+ case IB_WC_SEND:
+ ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&sqp->tx_lock);
+ sqp->tx_ix_tail++;
+ spin_unlock(&sqp->tx_lock);
+ break;
+ case IB_WC_RECV:
+ mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
+ (sqp->ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+ grh = &(((struct mlx4_mad_rcv_buf *)
+ (sqp->ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+ mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
+ if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)))
+ pr_err("Failed reposting SQP "
+ "buf:%lld\n", (long long)wc.wr_id);
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ } else {
+ pr_debug("mlx4_ib: completion error in tunnel: %d."
+ " status = %d, wrid = 0x%llx\n",
+ ctx->slave, wc.status, (long long)wc.wr_id);
+ if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
+ ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&sqp->tx_lock);
+ sqp->tx_ix_tail++;
+ spin_unlock(&sqp->tx_lock);
+ }
+ }
+ }
+}
+
+static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
+ struct mlx4_ib_demux_pv_ctx **ret_ctx)
+{
+ struct mlx4_ib_demux_pv_ctx *ctx;
+
+ *ret_ctx = NULL;
+ ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
+ if (!ctx) {
+ pr_err("failed allocating pv resource context "
+ "for port %d, slave %d\n", port, slave);
+ return -ENOMEM;
+ }
+
+ ctx->ib_dev = &dev->ib_dev;
+ ctx->port = port;
+ ctx->slave = slave;
+ *ret_ctx = ctx;
+ return 0;
+}
+
+static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
+{
+ if (dev->sriov.demux[port - 1].tun[slave]) {
+ kfree(dev->sriov.demux[port - 1].tun[slave]);
+ dev->sriov.demux[port - 1].tun[slave] = NULL;
+ }
+}
+
+static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
+ int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
+{
+ int ret, cq_size;
+
+ if (ctx->state != DEMUX_PV_STATE_DOWN)
+ return -EEXIST;
+
+ ctx->state = DEMUX_PV_STATE_STARTING;
+ /* have QP0 only on port owner, and only if link layer is IB */
+ if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
+ rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+ ctx->has_smi = 1;
+
+ if (ctx->has_smi) {
+ ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
+ if (ret) {
+ pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
+ goto err_out;
+ }
+ }
+
+ ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
+ if (ret) {
+ pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
+ goto err_out_qp0;
+ }
+
+ cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
+ if (ctx->has_smi)
+ cq_size *= 2;
+
+ ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
+ NULL, ctx, cq_size, 0);
+ if (IS_ERR(ctx->cq)) {
+ ret = PTR_ERR(ctx->cq);
+ pr_err("Couldn't create tunnel CQ (%d)\n", ret);
+ goto err_buf;
+ }
+
+ ctx->pd = ib_alloc_pd(ctx->ib_dev);
+ if (IS_ERR(ctx->pd)) {
+ ret = PTR_ERR(ctx->pd);
+ pr_err("Couldn't create tunnel PD (%d)\n", ret);
+ goto err_cq;
+ }
+
+ ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(ctx->mr)) {
+ ret = PTR_ERR(ctx->mr);
+ pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
+ goto err_pd;
+ }
+
+ if (ctx->has_smi) {
+ ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
+ if (ret) {
+ pr_err("Couldn't create %s QP0 (%d)\n",
+ create_tun ? "tunnel for" : "", ret);
+ goto err_mr;
+ }
+ }
+
+ ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
+ if (ret) {
+ pr_err("Couldn't create %s QP1 (%d)\n",
+ create_tun ? "tunnel for" : "", ret);
+ goto err_qp0;
+ }
+
+ if (create_tun)
+ INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
+ else
+ INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
+
+ ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
+
+ ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+ if (ret) {
+ pr_err("Couldn't arm tunnel cq (%d)\n", ret);
+ goto err_wq;
+ }
+ ctx->state = DEMUX_PV_STATE_ACTIVE;
+ return 0;
+
+err_wq:
+ ctx->wq = NULL;
+ ib_destroy_qp(ctx->qp[1].qp);
+ ctx->qp[1].qp = NULL;
+
+
+err_qp0:
+ if (ctx->has_smi)
+ ib_destroy_qp(ctx->qp[0].qp);
+ ctx->qp[0].qp = NULL;
+
+err_mr:
+ ib_dereg_mr(ctx->mr);
+ ctx->mr = NULL;
+
+err_pd:
+ ib_dealloc_pd(ctx->pd);
+ ctx->pd = NULL;
+
+err_cq:
+ ib_destroy_cq(ctx->cq);
+ ctx->cq = NULL;
+
+err_buf:
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
+
+err_out_qp0:
+ if (ctx->has_smi)
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
+err_out:
+ ctx->state = DEMUX_PV_STATE_DOWN;
+ return ret;
+}
+
+static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
+ struct mlx4_ib_demux_pv_ctx *ctx, int flush)
+{
+ if (!ctx)
+ return;
+ if (ctx->state > DEMUX_PV_STATE_DOWN) {
+ ctx->state = DEMUX_PV_STATE_DOWNING;
+ if (flush)
+ flush_workqueue(ctx->wq);
+ if (ctx->has_smi) {
+ ib_destroy_qp(ctx->qp[0].qp);
+ ctx->qp[0].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
+ }
+ ib_destroy_qp(ctx->qp[1].qp);
+ ctx->qp[1].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
+ ib_dereg_mr(ctx->mr);
+ ctx->mr = NULL;
+ ib_dealloc_pd(ctx->pd);
+ ctx->pd = NULL;
+ ib_destroy_cq(ctx->cq);
+ ctx->cq = NULL;
+ ctx->state = DEMUX_PV_STATE_DOWN;
+ }
+}
+
+static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
+ int port, int do_init)
+{
+ int ret = 0;
+
+ if (!do_init) {
+ clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
+ /* for master, destroy real sqp resources */
+ if (slave == mlx4_master_func_num(dev->dev))
+ destroy_pv_resources(dev, slave, port,
+ dev->sriov.sqps[port - 1], 1);
+ /* destroy the tunnel qp resources */
+ destroy_pv_resources(dev, slave, port,
+ dev->sriov.demux[port - 1].tun[slave], 1);
+ return 0;
+ }
+
+ /* create the tunnel qp resources */
+ ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
+ dev->sriov.demux[port - 1].tun[slave]);
+
+ /* for master, create the real sqp resources */
+ if (!ret && slave == mlx4_master_func_num(dev->dev))
+ ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
+ dev->sriov.sqps[port - 1]);
+ return ret;
+}
+
+void mlx4_ib_tunnels_update_work(struct work_struct *work)
+{
+ struct mlx4_ib_demux_work *dmxw;
+
+ dmxw = container_of(work, struct mlx4_ib_demux_work, work);
+ mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
+ dmxw->do_init);
+ kfree(dmxw);
+ return;
+}
+
+static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_demux_ctx *ctx,
+ int port)
+{
+ char name[12];
+ int ret = 0;
+ int i;
+
+ ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
+ sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
+ if (!ctx->tun)
+ return -ENOMEM;
+
+ ctx->dev = dev;
+ ctx->port = port;
+ ctx->ib_dev = &dev->ib_dev;
+
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
+ if (ret) {
+ ret = -ENOMEM;
+ goto err_mcg;
+ }
+ }
+
+ ret = mlx4_ib_mcg_port_init(ctx);
+ if (ret) {
+ pr_err("Failed initializing mcg para-virt (%d)\n", ret);
+ goto err_mcg;
+ }
+
+ snprintf(name, sizeof name, "mlx4_ibt%d", port);
+ ctx->wq = create_singlethread_workqueue(name);
+ if (!ctx->wq) {
+ pr_err("Failed to create tunnelling WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_wq;
+ }
+
+ snprintf(name, sizeof name, "mlx4_ibud%d", port);
+ ctx->ud_wq = create_singlethread_workqueue(name);
+ if (!ctx->ud_wq) {
+ pr_err("Failed to create up/down WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_udwq;
+ }
+
+ return 0;
+
+err_udwq:
+ destroy_workqueue(ctx->wq);
+ ctx->wq = NULL;
+
+err_wq:
+ mlx4_ib_mcg_port_cleanup(ctx, 1);
+err_mcg:
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++)
+ free_pv_object(dev, i, port);
+ kfree(ctx->tun);
+ ctx->tun = NULL;
+ return ret;
+}
+
+static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
+{
+ if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
+ sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
+ flush_workqueue(sqp_ctx->wq);
+ if (sqp_ctx->has_smi) {
+ ib_destroy_qp(sqp_ctx->qp[0].qp);
+ sqp_ctx->qp[0].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
+ }
+ ib_destroy_qp(sqp_ctx->qp[1].qp);
+ sqp_ctx->qp[1].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
+ ib_dereg_mr(sqp_ctx->mr);
+ sqp_ctx->mr = NULL;
+ ib_dealloc_pd(sqp_ctx->pd);
+ sqp_ctx->pd = NULL;
+ ib_destroy_cq(sqp_ctx->cq);
+ sqp_ctx->cq = NULL;
+ sqp_ctx->state = DEMUX_PV_STATE_DOWN;
+ }
+}
+
+static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
+{
+ int i;
+ if (ctx) {
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ mlx4_ib_mcg_port_cleanup(ctx, 1);
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ if (!ctx->tun[i])
+ continue;
+ if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
+ ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
+ }
+ flush_workqueue(ctx->wq);
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
+ free_pv_object(dev, i, ctx->port);
+ }
+ kfree(ctx->tun);
+ destroy_workqueue(ctx->ud_wq);
+ destroy_workqueue(ctx->wq);
+ }
+}
+
+static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
+{
+ int i;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+ /* initialize or tear down tunnel QPs for the master */
+ for (i = 0; i < dev->dev->caps.num_ports; i++)
+ mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
+ return;
+}
+
+int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
+{
+ int i = 0;
+ int err;
+
+ if (!mlx4_is_mfunc(dev->dev))
+ return 0;
+
+ dev->sriov.is_going_down = 0;
+ spin_lock_init(&dev->sriov.going_down_lock);
+ mlx4_ib_cm_paravirt_init(dev);
+
+ mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
+
+ if (mlx4_is_slave(dev->dev)) {
+ mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
+ return 0;
+ }
+
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ if (i == mlx4_master_func_num(dev->dev))
+ mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
+ else
+ mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid());
+ }
+
+ err = mlx4_ib_init_alias_guid_service(dev);
+ if (err) {
+ mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
+ goto paravirt_err;
+ }
+ err = mlx4_ib_device_register_sysfs(dev);
+ if (err) {
+ mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
+ goto sysfs_err;
+ }
+
+ mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
+ dev->dev->caps.sqp_demux);
+ for (i = 0; i < dev->num_ports; i++) {
+ union ib_gid gid;
+ err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
+ if (err)
+ goto demux_err;
+ dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
+ err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
+ &dev->sriov.sqps[i]);
+ if (err)
+ goto demux_err;
+ err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
+ if (err)
+ goto demux_err;
+ }
+ mlx4_ib_master_tunnels(dev, 1);
+ return 0;
+
+demux_err:
+ while (i > 0) {
+ free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
+ mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
+ --i;
+ }
+ mlx4_ib_device_unregister_sysfs(dev);
+
+sysfs_err:
+ mlx4_ib_destroy_alias_guid_service(dev);
+
+paravirt_err:
+ mlx4_ib_cm_paravirt_clean(dev, -1);
+
+ return err;
+}
+
+void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
+{
+ int i;
+ unsigned long flags;
+
+ if (!mlx4_is_mfunc(dev->dev))
+ return;
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ dev->sriov.is_going_down = 1;
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+ if (mlx4_is_master(dev->dev)) {
+ for (i = 0; i < dev->num_ports; i++) {
+ flush_workqueue(dev->sriov.demux[i].ud_wq);
+ mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
+ kfree(dev->sriov.sqps[i]);
+ dev->sriov.sqps[i] = NULL;
+ mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
+ }
+
+ mlx4_ib_cm_paravirt_clean(dev, -1);
+ mlx4_ib_destroy_alias_guid_service(dev);
+ mlx4_ib_device_unregister_sysfs(dev);
+ }
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/main.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/main.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/main.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -32,12 +32,19 @@
*/
#include <linux/module.h>
-#include <linux/init.h>
+
+#ifdef __linux__
+#include <linux/proc_fs.h>
+#endif
+
+#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
-#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <linux/bitops.h>
+#include <linux/if_ether.h>
+#include <linux/fs.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
@@ -45,45 +52,63 @@
#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
-
+#include <linux/sched.h>
#include "mlx4_ib.h"
#include "user.h"
#include "wc.h"
#define DRV_NAME MLX4_IB_DRV_NAME
-#define DRV_VERSION "1.0-ofed1.5.2"
-#define DRV_RELDATE "August 4, 2010"
+#define DRV_VERSION "1.0"
+#define DRV_RELDATE "April 4, 2008"
+#define MLX4_IB_DRIVER_PROC_DIR_NAME "driver/mlx4_ib"
+#define MLX4_IB_MRS_PROC_DIR_NAME "mrs"
+
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
-#ifdef CONFIG_MLX4_DEBUG
+int mlx4_ib_sm_guid_assign = 1;
-int mlx4_ib_debug_level = 0;
-module_param_named(debug_level, mlx4_ib_debug_level, int, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+#ifdef __linux__
+struct proc_dir_entry *mlx4_mrs_dir_entry;
+static struct proc_dir_entry *mlx4_ib_driver_dir_entry;
+#endif
-#endif /* CONFIG_MLX4_DEBUG */
+module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
+MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
+static char dev_assign_str[512];
+//module_param_string(dev_assign_str, dev_assign_str, sizeof(dev_assign_str), 0644);
+MODULE_PARM_DESC(dev_assign_str, "Map all device function numbers to "
+ "IB device numbers following the pattern: "
+ "bb:dd.f-0,bb:dd.f-1,... (all numbers are hexadecimals)."
+ " Max supported devices - 32");
+
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
-static void *get_ibdev(struct mlx4_dev *dev, void *ctx, u8 port)
-{
- struct mlx4_ib_dev *mlxibdev = ctx;
- return &mlxibdev->ib_dev;
-}
-
struct update_gid_work {
- struct work_struct work;
- union ib_gid gids[128];
- int port;
- struct mlx4_ib_dev *dev;
+ struct work_struct work;
+ union ib_gid gids[128];
+ struct mlx4_ib_dev *dev;
+ int port;
};
+struct dev_rec {
+ int bus;
+ int dev;
+ int func;
+ int nr;
+};
+
+#define MAX_DR 32
+static struct dev_rec dr[MAX_DR];
+
+static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
+
static struct workqueue_struct *wq;
static void init_query_mad(struct ib_smp *mad)
@@ -112,7 +137,8 @@
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
+ 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -123,7 +149,9 @@
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ IB_DEVICE_BLOCK_MULTICAST_LOOPBACK |
+ IB_DEVICE_SHARED_MR;
+
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -144,42 +172,45 @@
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
props->device_cap_flags |= IB_DEVICE_XRC;
- if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY)
- props->max_raw_ethy_qp = dev->ib_dev.phys_port_cnt;
+ props->device_cap_flags |= IB_DEVICE_QPG;
+ if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
+ props->device_cap_flags |= IB_DEVICE_UD_RSS;
+ props->max_rss_tbl_sz = dev->dev->caps.max_rss_tbl_sz;
+ }
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
- props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
+ props->vendor_part_id = dev->dev->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
props->max_mr_size = ~0ull;
props->page_size_cap = dev->dev->caps.page_size_cap;
- props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
+ props->max_qp = dev->dev->quotas.qp;
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
- props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
+ props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
- props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
+ props->max_mr = dev->dev->quotas.mpt;
props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
- props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
+ props->max_srq = dev->dev->quotas.srq;
props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
props->max_srq_sge = dev->dev->caps.max_srq_sge;
- props->max_fast_reg_page_list_len = MAX_FAST_REG_PAGES;
+ props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
- props->masked_atomic_cap = IB_ATOMIC_HCA;
+ props->masked_atomic_cap = props->atomic_cap;
props->max_pkeys = dev->dev->caps.pkey_table_len[1];
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
- props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1;
+ props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
out:
kfree(in_mad);
@@ -197,10 +228,33 @@
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
}
-static void ib_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props,
- struct ib_smp *out_mad)
+static int ib_link_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view)
{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int ext_active_speed;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
+ in_mad, out_mad);
+ if (err)
+ goto out;
+
+
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -208,7 +262,10 @@
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
- props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
+ if (netw_view)
+ props->gid_tbl_len = out_mad->data[50];
+ else
+ props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
@@ -220,39 +277,46 @@
props->subnet_timeout = out_mad->data[51] & 0x1f;
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
- props->link_layer = IB_LINK_LAYER_INFINIBAND;
-}
-#ifdef notyet
-static int eth_to_ib_width(int w)
-{
- switch (w) {
- case 4:
- return IB_WIDTH_4X;
- case 8:
- case 16:
- return IB_WIDTH_8X;
- case 32:
- return IB_WIDTH_12X;
- default:
- return IB_WIDTH_1X;
+ /* Check if extended speeds (EDR/FDR/...) are supported */
+ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
+ ext_active_speed = out_mad->data[62] >> 4;
+
+ switch (ext_active_speed) {
+ case 1:
+ props->active_speed = IB_SPEED_FDR;
+ break;
+ case 2:
+ props->active_speed = IB_SPEED_EDR;
+ break;
+ }
}
-}
-static int eth_to_ib_speed(int s)
-{
- switch (s) {
- case 256:
- return 1;
- case 512:
- return 2;
- case 1024:
- return 4;
- default:
- return 1;
+ /* If reported active speed is QDR, check if is FDR-10 */
+ if (props->active_speed == IB_SPEED_QDR) {
+ init_query_mad(in_mad);
+ in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
+ NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ /* Checking LinkSpeedActive for FDR-10 */
+ if (out_mad->data[15] & 0x1)
+ props->active_speed = IB_SPEED_FDR10;
}
+
+ /* Avoid wrong speed value returned by FW if the IB link is down. */
+ if (props->state == IB_PORT_DOWN)
+ props->active_speed = IB_SPEED_SDR;
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
}
-#endif
static u8 state_to_phys_state(enum ib_port_state state)
{
@@ -260,26 +324,39 @@
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props,
- struct ib_smp *out_mad)
+ struct ib_port_attr *props, int netw_view)
{
- struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe;
+
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ struct mlx4_ib_iboe *iboe = &mdev->iboe;
struct net_device *ndev;
enum ib_mtu tmp;
+ struct mlx4_cmd_mailbox *mailbox;
+ int err = 0;
- props->active_width = IB_WIDTH_4X;
- props->active_speed = 1;
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
+ IB_WIDTH_4X : IB_WIDTH_1X;
+ props->active_speed = IB_SPEED_QDR;
props->port_cap_flags = IB_PORT_CM_SUP;
- props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
- props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
+ if (netw_view)
+ props->gid_tbl_len = MLX4_ROCE_MAX_GIDS;
+ else
+ props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
+
+ props->max_msg_sz = mdev->dev->caps.max_msg_sz;
props->pkey_tbl_len = 1;
- props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
- props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
- props->max_mtu = IB_MTU_2048;
- props->subnet_timeout = 0;
- props->max_vl_num = out_mad->data[37] >> 4;
- props->init_type_reply = 0;
- props->link_layer = IB_LINK_LAYER_ETHERNET;
+ props->max_mtu = IB_MTU_4096;
+ props->max_vl_num = 2;
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
@@ -286,62 +363,51 @@
spin_lock(&iboe->lock);
ndev = iboe->netdevs[port - 1];
if (!ndev)
- goto out;
+ goto out_unlock;
-#ifdef __linux__
- tmp = iboe_get_mtu(ndev->mtu);
-#else
tmp = iboe_get_mtu(ndev->if_mtu);
-#endif
props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
- props->state = netif_carrier_ok(ndev) && netif_oper_up(ndev) ?
+
+ props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
IB_PORT_ACTIVE : IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
-
+out_unlock:
+ spin_unlock(&iboe->lock);
out:
- spin_unlock(&iboe->lock);
- return 0;
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return err;
}
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props)
+int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ int err;
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad)
- goto out;
-
memset(props, 0, sizeof *props);
- init_query_mad(in_mad);
- in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
- in_mad->attr_mod = cpu_to_be32(port);
+ err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
+ ib_link_query_port(ibdev, port, props, netw_view) :
+ eth_link_query_port(ibdev, port, props, netw_view);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
- if (err)
- goto out;
-
- mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
- ib_link_query_port(ibdev, port, props, out_mad) :
- eth_link_query_port(ibdev, port, props, out_mad);
-
-out:
- kfree(in_mad);
- kfree(out_mad);
-
return err;
}
-static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
- union ib_gid *gid)
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
{
+ /* returns host view */
+ return __mlx4_ib_query_port(ibdev, port, props, 0);
+}
+
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid, int netw_view)
+{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int clear = 0;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -352,17 +418,30 @@
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (mlx4_is_mfunc(dev->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw, out_mad->data + 8, 8);
+ if (mlx4_is_mfunc(dev->dev) && !netw_view) {
+ if (index) {
+ /* For any index > 0, return the null guid */
+ err = 0;
+ clear = 1;
+ goto out;
+ }
+ }
+
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
+ NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -369,6 +448,8 @@
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
out:
+ if (clear)
+ memset(gid->raw + 8, 0, 8);
kfree(in_mad);
kfree(out_mad);
return err;
@@ -375,7 +456,7 @@
}
static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
- union ib_gid *gid)
+ union ib_gid *gid)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -388,16 +469,17 @@
union ib_gid *gid)
{
if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
- return __mlx4_ib_query_gid(ibdev, port, index, gid);
+ return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
else
return iboe_query_gid(ibdev, port, index, gid);
}
-static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -409,7 +491,11 @@
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
+ in_mad, out_mad);
if (err)
goto out;
@@ -421,11 +507,16 @@
return err;
}
+static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+ return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
+}
+
static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
struct mlx4_cmd_mailbox *mailbox;
- int err;
+ unsigned long flags;
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
@@ -433,12 +524,16 @@
if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
return 0;
- spin_lock(&to_mdev(ibdev)->sm_lock);
+ if (mlx4_is_slave(to_mdev(ibdev)->dev))
+ return -EOPNOTSUPP;
+
+ spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
memcpy(ibdev->node_desc, props->node_desc, 64);
- spin_unlock(&to_mdev(ibdev)->sm_lock);
+ spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
- /* if possible, pass node desc to FW, so it can generate
- * a 144 trap. If cmd fails, just ignore.
+ /*
+ * If possible, pass node desc to FW, so it can generate
+ * a 144 trap. If cmd fails, just ignore.
*/
mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
if (IS_ERR(mailbox))
@@ -446,10 +541,8 @@
memset(mailbox->buf, 0, 256);
memcpy(mailbox->buf, props->node_desc, 64);
- err = mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
- MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A);
- if (err)
- mlx4_ib_dbg("SET_NODE command failed (%d)", err);
+ mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
+ MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
@@ -478,7 +571,7 @@
}
err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
@@ -514,6 +607,7 @@
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context;
+ struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
@@ -520,17 +614,29 @@
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- resp.qp_tab_size = dev->dev->caps.num_qps;
-
- if (mlx4_wc_enabled()) {
- resp.bf_reg_size = dev->dev->caps.bf_reg_size;
- resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+ resp_v3.qp_tab_size = dev->dev->caps.num_qps;
+ if (mlx4_wc_enabled()) {
+ resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ } else {
+ resp_v3.bf_reg_size = 0;
+ resp_v3.bf_regs_per_page = 0;
+ }
} else {
- resp.bf_reg_size = 0;
- resp.bf_regs_per_page = 0;
+ resp.dev_caps = dev->dev->caps.userspace_caps;
+ resp.qp_tab_size = dev->dev->caps.num_qps;
+ if (mlx4_wc_enabled()) {
+ resp.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ } else {
+ resp.bf_reg_size = 0;
+ resp.bf_regs_per_page = 0;
+ }
+ resp.cqe_size = dev->dev->caps.cqe_size;
}
- context = kzalloc(sizeof *context, GFP_KERNEL);
+ context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -543,7 +649,11 @@
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- err = ib_copy_to_udata(udata, &resp, sizeof resp);
+ if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+ err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
+ else
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context);
@@ -562,15 +672,75 @@
return 0;
}
+#ifdef __linux__
+static unsigned long mlx4_ib_get_unmapped_area(struct file *file,
+ unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long start_addr;
+ unsigned long page_size_order;
+ unsigned long command;
+ mm = current->mm;
+ if (addr)
+ return current->mm->get_unmapped_area(file, addr, len,
+ pgoff, flags);
+
+ /* Last 8 bits hold the command others are data per that command */
+ command = pgoff & MLX4_IB_MMAP_CMD_MASK;
+ if (command != MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES)
+ return current->mm->get_unmapped_area(file, addr, len,
+ pgoff, flags);
+
+ page_size_order = pgoff >> MLX4_IB_MMAP_CMD_BITS;
+ /* code is based on the huge-pages get_unmapped_area code */
+ start_addr = mm->free_area_cache;
+
+ if (len <= mm->cached_hole_size)
+ start_addr = TASK_UNMAPPED_BASE;
+
+
+full_search:
+ addr = ALIGN(start_addr, 1 << page_size_order);
+
+ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+ /* At this point: (!vma || addr < vma->vm_end). */
+ if (TASK_SIZE - len < addr) {
+ /*
+ * Start a new search - just in case we missed
+ * some holes.
+ */
+ if (start_addr != TASK_UNMAPPED_BASE) {
+ start_addr = TASK_UNMAPPED_BASE;
+ goto full_search;
+ }
+ return -ENOMEM;
+ }
+
+ if (!vma || addr + len <= vma->vm_start)
+ return addr;
+ addr = ALIGN(vma->vm_end, 1 << page_size_order);
+ }
+}
+#endif
+
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
+ int err;
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
+ /* Last 8 bits hold the command others are data per that command */
+ unsigned long command = vma->vm_pgoff & MLX4_IB_MMAP_CMD_MASK;
- if (vma->vm_pgoff == 0) {
+ if (command < MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) {
+ /* compatability handling for commands 0 & 1*/
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+ }
+ if (command == MLX4_IB_MMAP_UAR_PAGE) {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
@@ -577,7 +747,8 @@
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
- } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
+ } else if (command == MLX4_IB_MMAP_BLUE_FLAME_PAGE &&
+ dev->dev->caps.bf_reg_size != 0) {
vma->vm_page_prot = pgprot_wc(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
@@ -585,6 +756,25 @@
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
+ } else if (command == MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) {
+ /* Getting contiguous physical pages */
+ unsigned long total_size = vma->vm_end - vma->vm_start;
+ unsigned long page_size_order = (vma->vm_pgoff) >>
+ MLX4_IB_MMAP_CMD_BITS;
+ struct ib_cmem *ib_cmem;
+ ib_cmem = ib_cmem_alloc_contiguous_pages(context, total_size,
+ page_size_order);
+ if (IS_ERR(ib_cmem)) {
+ err = PTR_ERR(ib_cmem);
+ return err;
+ }
+
+ err = ib_cmem_map_contiguous_pages_to_vma(ib_cmem, vma);
+ if (err) {
+ ib_cmem_release_contiguous_pages(ib_cmem);
+ return err;
+ }
+ return 0;
} else
return -EINVAL;
@@ -598,7 +788,7 @@
struct mlx4_ib_pd *pd;
int err;
- pd = kzalloc(sizeof *pd, GFP_KERNEL);
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
@@ -626,11 +816,62 @@
return 0;
}
+static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_xrcd *xrcd;
+ int err;
+
+ if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return ERR_PTR(-ENOSYS);
+
+ xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
+ if (!xrcd)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
+ if (err)
+ goto err1;
+
+ xrcd->pd = ib_alloc_pd(ibdev);
+ if (IS_ERR(xrcd->pd)) {
+ err = PTR_ERR(xrcd->pd);
+ goto err2;
+ }
+
+ xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
+ if (IS_ERR(xrcd->cq)) {
+ err = PTR_ERR(xrcd->cq);
+ goto err3;
+ }
+
+ return &xrcd->ibxrcd;
+
+err3:
+ ib_dealloc_pd(xrcd->pd);
+err2:
+ mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
+err1:
+ kfree(xrcd);
+ return ERR_PTR(err);
+}
+
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+ ib_destroy_cq(to_mxrcd(xrcd)->cq);
+ ib_dealloc_pd(to_mxrcd(xrcd)->pd);
+ mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
+ kfree(xrcd);
+
+ return 0;
+}
+
static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
{
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
- struct gid_entry *ge;
+ struct mlx4_ib_gid_entry *ge;
ge = kzalloc(sizeof *ge, GFP_KERNEL);
if (!ge)
@@ -658,11 +899,13 @@
if (!mqp->port)
return 0;
+
spin_lock(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
+
if (ndev) {
rdma_get_mcast_mac((struct in6_addr *)gid, mac);
rtnl_lock();
@@ -675,38 +918,270 @@
return ret;
}
+struct mlx4_ib_steering {
+ struct list_head list;
+ u64 reg_id;
+ union ib_gid gid;
+};
+
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ u64 reg_id;
+ struct mlx4_ib_steering *ib_steering = NULL;
- err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags &
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- (ibqp->qp_type == IB_QPT_RAW_ETH) ?
- MLX4_MCAST_PROT_EN : MLX4_MCAST_PROT_IB);
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
+ if (!ib_steering)
+ return -ENOMEM;
+ }
+
+ err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
+ !!(mqp->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+ MLX4_PROT_IB_IPV6, ®_id);
if (err)
- return err;
+ goto err_malloc;
err = add_gid_entry(ibqp, gid);
if (err)
goto err_add;
+ if (ib_steering) {
+ memcpy(ib_steering->gid.raw, gid->raw, 16);
+ ib_steering->reg_id = reg_id;
+ mutex_lock(&mqp->mutex);
+ list_add(&ib_steering->list, &mqp->steering_rules);
+ mutex_unlock(&mqp->mutex);
+ }
return 0;
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- (ibqp->qp_type == IB_QPT_RAW_ETH) ?
- MLX4_MCAST_PROT_EN : MLX4_MCAST_PROT_IB);
+ MLX4_PROT_IB_IPV6, reg_id);
+err_malloc:
+ kfree(ib_steering);
+
return err;
}
-static struct gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
+enum {
+ IBV_FLOW_L4_NONE = 0,
+ IBV_FLOW_L4_OTHER = 3,
+ IBV_FLOW_L4_UDP = 5,
+ IBV_FLOW_L4_TCP = 6
+};
+
+struct mlx4_cm_steering {
+ struct list_head list;
+ u64 reg_id;
+ struct ib_flow_spec spec;
+};
+
+static int flow_spec_to_net_rule(struct ib_device *dev, struct ib_flow_spec *flow_spec,
+ struct list_head *rule_list_h)
{
- struct gid_entry *ge;
- struct gid_entry *tmp;
- struct gid_entry *ret = NULL;
+ struct mlx4_spec_list *spec_l2, *spec_l3, *spec_l4;
+ u64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
+ spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL);
+ if (!spec_l2)
+ return -ENOMEM;
+
+ switch (flow_spec->type) {
+ case IB_FLOW_ETH:
+ spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec_l2->eth.dst_mac, flow_spec->l2_id.eth.mac, ETH_ALEN);
+ memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
+ spec_l2->eth.ether_type = flow_spec->l2_id.eth.ethertype;
+ if (flow_spec->l2_id.eth.vlan_present) {
+ spec_l2->eth.vlan_id = flow_spec->l2_id.eth.vlan;
+ spec_l2->eth.vlan_id_msk = cpu_to_be16(0x0fff);
+ }
+ break;
+ case IB_FLOW_IB_UC:
+ spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB;
+ if(flow_spec->l2_id.ib_uc.qpn) {
+ spec_l2->ib.l3_qpn = cpu_to_be32(flow_spec->l2_id.ib_uc.qpn);
+ spec_l2->ib.qpn_msk = cpu_to_be32(0xffffff);
+ }
+ break;
+ case IB_FLOW_IB_MC_IPV4:
+ case IB_FLOW_IB_MC_IPV6:
+ spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB;
+ memcpy(spec_l2->ib.dst_gid, flow_spec->l2_id.ib_mc.mgid, 16);
+ memset(spec_l2->ib.dst_gid_msk, 0xff, 16);
+ break;
+ }
+
+
+ list_add_tail(&spec_l2->list, rule_list_h);
+
+ if (flow_spec->l2_id.eth.ethertype == cpu_to_be16(ETH_P_IP) ||
+ flow_spec->type != IB_FLOW_ETH) {
+ spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
+ if (!spec_l3)
+ return -ENOMEM;
+
+ spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+ spec_l3->ipv4.src_ip = flow_spec->src_ip;
+ if (flow_spec->type != IB_FLOW_IB_MC_IPV4 &&
+ flow_spec->type != IB_FLOW_IB_MC_IPV6)
+ spec_l3->ipv4.dst_ip = flow_spec->dst_ip;
+
+ if (spec_l3->ipv4.src_ip)
+ spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK;
+ if (spec_l3->ipv4.dst_ip)
+ spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK;
+
+ list_add_tail(&spec_l3->list, rule_list_h);
+ }
+
+ if (flow_spec->l4_protocol) {
+ spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL);
+ if (!spec_l4)
+ return -ENOMEM;
+
+ spec_l4->tcp_udp.src_port = flow_spec->src_port;
+ spec_l4->tcp_udp.dst_port = flow_spec->dst_port;
+ if (spec_l4->tcp_udp.src_port)
+ spec_l4->tcp_udp.src_port_msk =
+ MLX4_BE_SHORT_MASK;
+ if (spec_l4->tcp_udp.dst_port)
+ spec_l4->tcp_udp.dst_port_msk =
+ MLX4_BE_SHORT_MASK;
+
+ switch (flow_spec->l4_protocol) {
+ case IBV_FLOW_L4_UDP:
+ spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP;
+ break;
+ case IBV_FLOW_L4_TCP:
+ spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP;
+ break;
+ default:
+ dev_err(dev->dma_device,
+ "Unsupported l4 protocol.\n");
+ kfree(spec_l4);
+ return -EPROTONOSUPPORT;
+ }
+ list_add_tail(&spec_l4->list, rule_list_h);
+ }
+ return 0;
+}
+
+static int __mlx4_ib_flow_attach(struct mlx4_ib_dev *mdev,
+ struct mlx4_ib_qp *mqp,
+ struct ib_flow_spec *flow_spec,
+ int priority, int lock_qp)
+{
+ u64 reg_id = 0;
+ int err = 0;
+ struct mlx4_cm_steering *cm_flow;
+ struct mlx4_spec_list *spec, *tmp_spec;
+
+ struct mlx4_net_trans_rule rule =
+ { .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ };
+
+ rule.promisc_mode = flow_spec->rule_type;
+ rule.port = mqp->port;
+ rule.qpn = mqp->mqp.qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ cm_flow = kmalloc(sizeof(*cm_flow), GFP_KERNEL);
+ if (!cm_flow)
+ return -ENOMEM;
+
+ if (rule.promisc_mode == MLX4_FS_REGULAR) {
+ rule.allow_loopback = !flow_spec->block_mc_loopback;
+ rule.priority = MLX4_DOMAIN_UVERBS | priority;
+ err = flow_spec_to_net_rule(&mdev->ib_dev, flow_spec,
+ &rule.list);
+ if (err)
+ goto free_list;
+ }
+
+ err = mlx4_flow_attach(mdev->dev, &rule, ®_id);
+ if (err)
+ goto free_list;
+
+ memcpy(&cm_flow->spec, flow_spec, sizeof(*flow_spec));
+ cm_flow->reg_id = reg_id;
+
+ if (lock_qp)
+ mutex_lock(&mqp->mutex);
+ list_add(&cm_flow->list, &mqp->rules_list);
+ if (lock_qp)
+ mutex_unlock(&mqp->mutex);
+
+free_list:
+ list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) {
+ list_del(&spec->list);
+ kfree(spec);
+ }
+ if (err) {
+ kfree(cm_flow);
+ dev_err(mdev->ib_dev.dma_device,
+ "Fail to attach flow steering rule\n");
+ }
+ return err;
+}
+
+static int __mlx4_ib_flow_detach(struct mlx4_ib_dev *mdev,
+ struct mlx4_ib_qp *mqp,
+ struct ib_flow_spec *spec, int priority,
+ int lock_qp)
+{
+ struct mlx4_cm_steering *cm_flow;
+ int ret;
+
+ if (lock_qp)
+ mutex_lock(&mqp->mutex);
+ list_for_each_entry(cm_flow, &mqp->rules_list, list) {
+ if (!memcmp(&cm_flow->spec, spec, sizeof(*spec))) {
+ list_del(&cm_flow->list);
+ break;
+ }
+ }
+ if (lock_qp)
+ mutex_unlock(&mqp->mutex);
+
+ if (&cm_flow->list == &mqp->rules_list) {
+ dev_err(mdev->ib_dev.dma_device, "Couldn't find reg_id for flow spec. "
+ "Steering rule is left attached\n");
+ return -EINVAL;
+ }
+
+ ret = mlx4_flow_detach(mdev->dev, cm_flow->reg_id);
+
+ kfree(cm_flow);
+ return ret;
+}
+
+static int mlx4_ib_flow_attach(struct ib_qp *qp, struct ib_flow_spec *flow_spec,
+ int priority)
+{
+ return __mlx4_ib_flow_attach(to_mdev(qp->device), to_mqp(qp),
+ flow_spec, priority, 1);
+}
+
+static int mlx4_ib_flow_detach(struct ib_qp *qp, struct ib_flow_spec *spec,
+ int priority)
+{
+ return __mlx4_ib_flow_detach(to_mdev(qp->device), to_mqp(qp),
+ spec, priority, 1);
+}
+
+static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
+{
+ struct mlx4_ib_gid_entry *ge;
+ struct mlx4_ib_gid_entry *tmp;
+ struct mlx4_ib_gid_entry *ret = NULL;
+
list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
if (!memcmp(raw, ge->gid.raw, 16)) {
ret = ge;
@@ -724,11 +1199,31 @@
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
u8 mac[6];
struct net_device *ndev;
- struct gid_entry *ge;
+ struct mlx4_ib_gid_entry *ge;
+ u64 reg_id = 0;
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ struct mlx4_ib_steering *ib_steering;
+
+ mutex_lock(&mqp->mutex);
+ list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
+ if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
+ list_del(&ib_steering->list);
+ break;
+ }
+ }
+ mutex_unlock(&mqp->mutex);
+ if (&ib_steering->list == &mqp->steering_rules) {
+ pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
+ return -EINVAL;
+ }
+ reg_id = ib_steering->reg_id;
+ kfree(ib_steering);
+ }
+
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- (ibqp->qp_type == IB_QPT_RAW_ETH) ?
- MLX4_MCAST_PROT_EN : MLX4_MCAST_PROT_IB);
+ MLX4_PROT_IB_IPV6, reg_id);
if (err)
return err;
@@ -750,7 +1245,7 @@
list_del(&ge->list);
kfree(ge);
} else
- printk(KERN_WARNING "could not find mgid entry\n");
+ pr_warn("could not find mgid entry\n");
mutex_unlock(&mqp->mutex);
@@ -757,84 +1252,11 @@
return 0;
}
-static void mlx4_dummy_comp_handler(struct ib_cq *cq, void *cq_context)
-{
-}
-
-static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- struct mlx4_ib_xrcd *xrcd;
- struct mlx4_ib_dev *mdev = to_mdev(ibdev);
- struct ib_pd *pd;
- struct ib_cq *cq;
- int err;
-
- if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
-
- err = mlx4_xrcd_alloc(mdev->dev, &xrcd->xrcdn);
- if (err)
- goto err_xrcd;
-
- pd = mlx4_ib_alloc_pd(ibdev, NULL, NULL);
- if (IS_ERR(pd)) {
- err = PTR_ERR(pd);
- goto err_pd;
- }
- pd->device = ibdev;
-
- cq = mlx4_ib_create_cq(ibdev, 1, 0, NULL, NULL);
- if (IS_ERR(cq)) {
- err = PTR_ERR(cq);
- goto err_cq;
- }
- cq->device = ibdev;
- cq->comp_handler = mlx4_dummy_comp_handler;
-
- if (context)
- if (ib_copy_to_udata(udata, &xrcd->xrcdn, sizeof(__u32))) {
- err = -EFAULT;
- goto err_copy;
- }
-
- xrcd->cq = cq;
- xrcd->pd = pd;
- return &xrcd->ibxrcd;
-
-err_copy:
- mlx4_ib_destroy_cq(cq);
-err_cq:
- mlx4_ib_dealloc_pd(pd);
-err_pd:
- mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
-err_xrcd:
- kfree(xrcd);
- return ERR_PTR(err);
-}
-
-static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
-{
- struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
-
- mlx4_ib_destroy_cq(mxrcd->cq);
- mlx4_ib_dealloc_pd(mxrcd->pd);
- mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
- kfree(xrcd);
-
- return 0;
-}
-
-
static int init_node_data(struct mlx4_ib_dev *dev)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -844,8 +1266,10 @@
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+ if (mlx4_is_master(dev->dev))
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
- err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -853,7 +1277,7 @@
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
- err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -913,136 +1337,6 @@
&dev_attr_board_id
};
-/*
- * create show function and a device_attribute struct pointing to
- * the function for _name
- */
-#define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod) \
-static ssize_t show_rprt_##_name(struct device *dev, \
- struct device_attribute *attr, \
- char *buf){ \
- return show_diag_rprt(dev, buf, _offset, _op_mod); \
-} \
-static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL);
-
-#define MLX4_DIAG_RPRT_CLEAR_DIAGS 3
-
-static size_t show_diag_rprt(struct device *device, char *buf,
- u32 offset, u8 op_modifier)
-{
- size_t ret;
- u32 counter_offset = offset;
- u32 diag_counter = 0;
- struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
- ib_dev.dev);
-
- ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier,
- &counter_offset, &diag_counter);
- if (ret)
- return ret;
-
- return sprintf(buf,"%d\n", diag_counter);
-}
-
-static ssize_t clear_diag_counters(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t length)
-{
- size_t ret;
- struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
- ib_dev.dev);
-
- ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS,
- NULL, NULL);
- if (ret)
- return ret;
-
- return length;
-}
-
-DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_leeoe , 0x10, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_leeoe , 0x14, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rabrte , 0x7C, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_ieecne , 0x84, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_ieecse , 0x8C, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_rsync , 0x110, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rsync , 0x114, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2);
-DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2);
-DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2);
-DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2);
-
-static DEVICE_ATTR(clear_diag, S_IWUGO, NULL, clear_diag_counters);
-
-static struct attribute *diag_rprt_attrs[] = {
- &dev_attr_rq_num_lle.attr,
- &dev_attr_sq_num_lle.attr,
- &dev_attr_rq_num_lqpoe.attr,
- &dev_attr_sq_num_lqpoe.attr,
- &dev_attr_rq_num_leeoe.attr,
- &dev_attr_sq_num_leeoe.attr,
- &dev_attr_rq_num_lpe.attr,
- &dev_attr_sq_num_lpe.attr,
- &dev_attr_rq_num_wrfe.attr,
- &dev_attr_sq_num_wrfe.attr,
- &dev_attr_sq_num_mwbe.attr,
- &dev_attr_sq_num_bre.attr,
- &dev_attr_rq_num_lae.attr,
- &dev_attr_sq_num_rire.attr,
- &dev_attr_rq_num_rire.attr,
- &dev_attr_sq_num_rae.attr,
- &dev_attr_rq_num_rae.attr,
- &dev_attr_sq_num_roe.attr,
- &dev_attr_sq_num_tree.attr,
- &dev_attr_sq_num_rree.attr,
- &dev_attr_rq_num_rnr.attr,
- &dev_attr_sq_num_rnr.attr,
- &dev_attr_sq_num_rabrte.attr,
- &dev_attr_sq_num_ieecne.attr,
- &dev_attr_sq_num_ieecse.attr,
- &dev_attr_rq_num_oos.attr,
- &dev_attr_sq_num_oos.attr,
- &dev_attr_rq_num_mce.attr,
- &dev_attr_rq_num_rsync.attr,
- &dev_attr_sq_num_rsync.attr,
- &dev_attr_rq_num_udsdprd.attr,
- &dev_attr_rq_num_ucsdprd.attr,
- &dev_attr_num_cqovf.attr,
- &dev_attr_num_eqovf.attr,
- &dev_attr_num_baddb.attr,
- &dev_attr_clear_diag.attr,
- NULL
-};
-
-struct attribute_group diag_counters_group = {
- .name = "diag_counters",
- .attrs = diag_rprt_attrs
-};
-
static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
{
#ifdef __linux__
@@ -1049,8 +1343,8 @@
memcpy(eui, dev->dev_addr, 3);
memcpy(eui + 5, dev->dev_addr + 3, 3);
#else
- memcpy(eui, IF_LLADDR(dev), 3);
- memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
+ memcpy(eui, IF_LLADDR(dev), 3);
+ memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
#endif
if (vlan_id < 0x1000) {
eui[3] = vlan_id >> 8;
@@ -1069,11 +1363,10 @@
union ib_gid *gids;
int err;
struct mlx4_dev *dev = gw->dev->dev;
- struct ib_event event;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
- printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox));
+ pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
return;
}
@@ -1081,15 +1374,13 @@
memcpy(gids, gw->gids, sizeof gw->gids);
err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
- 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
if (err)
- printk(KERN_WARNING "set port command failed\n");
+ pr_warn("set port command failed\n");
else {
memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
- event.device = &gw->dev->ib_dev;
- event.element.port_num = gw->port;
- event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
+ mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
}
mlx4_free_cmd_mailbox(dev, mailbox);
@@ -1096,10 +1387,6 @@
kfree(gw);
}
-enum {
- MLX4_MAX_EFF_VLANS = 128 - MLX4_VLAN_REGULAR,
-};
-
static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
{
struct net_device *ndev = dev->iboe.netdevs[port - 1];
@@ -1107,11 +1394,11 @@
struct net_device *tmp;
int i;
u8 *hits;
- int ret;
union ib_gid gid;
- int tofree;
+ int index_free;
int found;
int need_update = 0;
+ int max_gids;
u16 vid;
work = kzalloc(sizeof *work, GFP_ATOMIC);
@@ -1118,18 +1405,20 @@
if (!work)
return -ENOMEM;
- hits = kzalloc(MLX4_MAX_EFF_VLANS + 1, GFP_ATOMIC);
+ hits = kzalloc(128, GFP_ATOMIC);
if (!hits) {
- ret = -ENOMEM;
- goto out;
+ kfree(work);
+ return -ENOMEM;
}
+ max_gids = dev->dev->caps.gid_table_len[port];
+
#ifdef __linux__
- read_lock(&dev_base_lock);
- for_each_netdev(&init_net, tmp) {
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, tmp) {
#else
- IFNET_RLOCK();
- TAILQ_FOREACH(tmp, &V_ifnet, if_link) {
+ IFNET_RLOCK();
+ TAILQ_FOREACH(tmp, &V_ifnet, if_link) {
#endif
if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
@@ -1136,11 +1425,11 @@
vid = rdma_vlan_dev_vlan_id(tmp);
mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
found = 0;
- tofree = -1;
- for (i = 0; i < MLX4_MAX_EFF_VLANS + 1; ++i) {
- if (tofree < 0 &&
+ index_free = -1;
+ for (i = 0; i < max_gids; ++i) {
+ if (index_free < 0 &&
!memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- tofree = i;
+ index_free = i;
if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
hits[i] = 1;
found = 1;
@@ -1149,26 +1438,30 @@
}
if (!found) {
- if (tmp == ndev && (memcmp(&dev->iboe.gid_table[port - 1][0], &gid, sizeof gid) || !memcmp(&dev->iboe.gid_table[port - 1][0], &zgid, sizeof gid))) {
+ if (tmp == ndev &&
+ (memcmp(&dev->iboe.gid_table[port - 1][0],
+ &gid, sizeof gid) ||
+ !memcmp(&dev->iboe.gid_table[port - 1][0],
+ &zgid, sizeof gid))) {
dev->iboe.gid_table[port - 1][0] = gid;
++need_update;
hits[0] = 1;
- } else if (tofree >= 0) {
- dev->iboe.gid_table[port - 1][tofree] = gid;
- hits[tofree] = 1;
+ } else if (index_free >= 0) {
+ dev->iboe.gid_table[port - 1][index_free] = gid;
+ hits[index_free] = 1;
++need_update;
}
}
}
-#ifdef __linux__
- }
- read_unlock(&dev_base_lock);
+#ifdef __linux__
+ }
+ rcu_read_unlock();
#else
- }
- IFNET_RUNLOCK();
+ }
+ IFNET_RUNLOCK();
#endif
- for (i = 0; i < MLX4_MAX_EFF_VLANS + 1; ++i)
+ for (i = 0; i < max_gids; ++i)
if (!hits[i]) {
if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
++need_update;
@@ -1175,7 +1468,6 @@
dev->iboe.gid_table[port - 1][i] = zgid;
}
-
if (need_update) {
memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
INIT_WORK(&work->work, update_gids_task);
@@ -1187,10 +1479,6 @@
kfree(hits);
return 0;
-
-out:
- kfree(work);
- return ret;
}
static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
@@ -1239,7 +1527,8 @@
spin_lock(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
oldnd = iboe->netdevs[port - 1];
- iboe->netdevs[port - 1] = mlx4_get_prot_dev(ibdev->dev, MLX4_PROT_EN, port);
+ iboe->netdevs[port - 1] =
+ mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
if (oldnd != iboe->netdevs[port - 1]) {
if (iboe->netdevs[port - 1])
netdev_added(ibdev, port);
@@ -1260,20 +1549,328 @@
return NOTIFY_DONE;
}
+static void init_pkeys(struct mlx4_ib_dev *ibdev)
+{
+ int port;
+ int slave;
+ int i;
+
+ if (mlx4_is_master(ibdev->dev)) {
+ for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
+ for (i = 0;
+ i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
+ ++i) {
+ ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
+ /* master has the identity virt2phys pkey mapping */
+ (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
+ ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
+ mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
+ ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
+ }
+ }
+ }
+ /* initialize pkey cache */
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
+ for (i = 0;
+ i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
+ ++i)
+ ibdev->pkeys.phys_pkey_cache[port-1][i] =
+ (i) ? 0 : 0xFFFF;
+ }
+ }
+}
+
+static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
+{
+ char name[32];
+ int eq_per_port = 0;
+ int added_eqs = 0;
+ int total_eqs = 0;
+ int i, j, eq;
+
+ /* Legacy mode or comp_pool is not large enough */
+ if (dev->caps.comp_pool == 0 ||
+ dev->caps.num_ports > dev->caps.comp_pool)
+ return;
+
+ eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
+ dev->caps.num_ports);
+
+ /* Init eq table */
+ added_eqs = 0;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ added_eqs += eq_per_port;
+
+ total_eqs = dev->caps.num_comp_vectors + added_eqs;
+
+ ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
+ if (!ibdev->eq_table)
+ return;
+
+ ibdev->eq_added = added_eqs;
+
+ eq = 0;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
+ for (j = 0; j < eq_per_port; j++) {
+ snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%d:%d:%d:%d", i, j,
+ pci_get_domain(dev->pdev->dev.bsddev),
+ pci_get_bus(dev->pdev->dev.bsddev),
+ PCI_SLOT(dev->pdev->devfn),
+ PCI_FUNC(dev->pdev->devfn));
+
+ /* Set IRQ for specific name (per ring) */
+ if (mlx4_assign_eq(dev, name,
+ &ibdev->eq_table[eq])) {
+ /* Use legacy (same as mlx4_en driver) */
+ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
+ ibdev->eq_table[eq] =
+ (eq % dev->caps.num_comp_vectors);
+ }
+ eq++;
+ }
+ }
+
+ /* Fill the reset of the vector with legacy EQ */
+ for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
+ ibdev->eq_table[eq++] = i;
+
+ /* Advertise the new number of EQs to clients */
+ ibdev->ib_dev.num_comp_vectors = total_eqs;
+}
+
+static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
+{
+ int i;
+
+ /* no additional eqs were added */
+ if (!ibdev->eq_table)
+ return;
+
+ /* Reset the advertised EQ number */
+ ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
+
+ /* Free only the added eqs */
+ for (i = 0; i < ibdev->eq_added; i++) {
+ /* Don't free legacy eqs if used */
+ if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
+ continue;
+ mlx4_release_eq(dev, ibdev->eq_table[i]);
+ }
+
+ kfree(ibdev->eq_table);
+}
+
+/*
+ * create show function and a device_attribute struct pointing to
+ * the function for _name
+ */
+#define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod) \
+static ssize_t show_rprt_##_name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf){ \
+ return show_diag_rprt(dev, buf, _offset, _op_mod); \
+} \
+static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL);
+
+#define MLX4_DIAG_RPRT_CLEAR_DIAGS 3
+
+static size_t show_diag_rprt(struct device *device, char *buf,
+ u32 offset, u8 op_modifier)
+{
+ size_t ret;
+ u32 counter_offset = offset;
+ u32 diag_counter = 0;
+ struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
+ ib_dev.dev);
+
+ ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier,
+ &counter_offset, &diag_counter);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%d\n", diag_counter);
+}
+
+static ssize_t clear_diag_counters(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t length)
+{
+ size_t ret;
+ struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
+ ib_dev.dev);
+
+ ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS,
+ NULL, NULL);
+ if (ret)
+ return ret;
+
+ return length;
+}
+
+DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2);
+DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2);
+DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2);
+DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2);
+
+static DEVICE_ATTR(clear_diag, S_IWUSR, NULL, clear_diag_counters);
+
+static struct attribute *diag_rprt_attrs[] = {
+ &dev_attr_rq_num_lle.attr,
+ &dev_attr_sq_num_lle.attr,
+ &dev_attr_rq_num_lqpoe.attr,
+ &dev_attr_sq_num_lqpoe.attr,
+ &dev_attr_rq_num_lpe.attr,
+ &dev_attr_sq_num_lpe.attr,
+ &dev_attr_rq_num_wrfe.attr,
+ &dev_attr_sq_num_wrfe.attr,
+ &dev_attr_sq_num_mwbe.attr,
+ &dev_attr_sq_num_bre.attr,
+ &dev_attr_rq_num_lae.attr,
+ &dev_attr_sq_num_rire.attr,
+ &dev_attr_rq_num_rire.attr,
+ &dev_attr_sq_num_rae.attr,
+ &dev_attr_rq_num_rae.attr,
+ &dev_attr_sq_num_roe.attr,
+ &dev_attr_sq_num_tree.attr,
+ &dev_attr_sq_num_rree.attr,
+ &dev_attr_rq_num_rnr.attr,
+ &dev_attr_sq_num_rnr.attr,
+ &dev_attr_rq_num_oos.attr,
+ &dev_attr_sq_num_oos.attr,
+ &dev_attr_rq_num_mce.attr,
+ &dev_attr_rq_num_udsdprd.attr,
+ &dev_attr_rq_num_ucsdprd.attr,
+ &dev_attr_num_cqovf.attr,
+ &dev_attr_num_eqovf.attr,
+ &dev_attr_num_baddb.attr,
+ &dev_attr_clear_diag.attr,
+ NULL
+};
+
+static struct attribute_group diag_counters_group = {
+ .name = "diag_counters",
+ .attrs = diag_rprt_attrs
+};
+
+#ifdef __linux__
+static int mlx4_ib_proc_init(void)
+{
+ /* Creating procfs directories /proc/drivers/mlx4_ib/ &&
+ /proc/drivers/mlx4_ib/mrs for further use by the driver.
+ */
+ int err;
+
+ mlx4_ib_driver_dir_entry = proc_mkdir(MLX4_IB_DRIVER_PROC_DIR_NAME,
+ NULL);
+ if (!mlx4_ib_driver_dir_entry) {
+ pr_err("mlx4_ib_proc_init has failed for %s\n",
+ MLX4_IB_DRIVER_PROC_DIR_NAME);
+ err = -ENODEV;
+ goto error;
+ }
+
+ mlx4_mrs_dir_entry = proc_mkdir(MLX4_IB_MRS_PROC_DIR_NAME,
+ mlx4_ib_driver_dir_entry);
+ if (!mlx4_mrs_dir_entry) {
+ pr_err("mlx4_ib_proc_init has failed for %s\n",
+ MLX4_IB_MRS_PROC_DIR_NAME);
+ err = -ENODEV;
+ goto remove_entry;
+ }
+
+ return 0;
+
+remove_entry:
+ remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME,
+ NULL);
+error:
+ return err;
+}
+#endif
+
+static void init_dev_assign(void)
+{
+ int bus, slot, fn, ib_idx;
+ char *p = dev_assign_str, *t;
+ char curr_val[32] = {0};
+ int ret;
+ int j, i = 0;
+
+ memset(dr, 0, sizeof dr);
+
+ if (dev_assign_str[0] == 0)
+ return;
+
+ while (strlen(p)) {
+ ret = sscanf(p, "%02x:%02x.%x-%x", &bus, &slot, &fn, &ib_idx);
+ if (ret != 4 || ib_idx < 0)
+ goto err;
+
+ for (j = 0; j < i; j++)
+ if (dr[j].nr == ib_idx)
+ goto err;
+
+ dr[i].bus = bus;
+ dr[i].dev = slot;
+ dr[i].func = fn;
+ dr[i].nr = ib_idx;
+
+ t = strchr(p, ',');
+ sprintf(curr_val, "%02x:%02x.%x-%x", bus, slot, fn, ib_idx);
+ if ((!t) && strlen(p) == strlen(curr_val))
+ return;
+
+ if (!t || (t + 1) >= dev_assign_str + sizeof dev_assign_str)
+ goto err;
+
+ ++i;
+ if (i >= MAX_DR)
+ goto err;
+
+ p = t + 1;
+ }
+
+ return;
+err:
+ memset(dr, 0, sizeof dr);
+ printk(KERN_WARNING "mlx4_ib: The value of 'dev_assign_str' parameter "
+ "is incorrect. The parameter value is discarded!");
+}
+
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
- static int mlx4_ib_version_printed;
struct mlx4_ib_dev *ibdev;
int num_ports = 0;
- int i;
+ int i, j;
int err;
struct mlx4_ib_iboe *iboe;
- int k;
- if (!mlx4_ib_version_printed) {
- printk(KERN_INFO "%s", mlx4_ib_version);
- ++mlx4_ib_version_printed;
- }
+ printk(KERN_INFO "%s", mlx4_ib_version);
mlx4_foreach_ib_transport_port(i, dev)
num_ports++;
@@ -1296,9 +1893,12 @@
if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
goto err_pd;
- ibdev->priv_uar.map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ ibdev->priv_uar.map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT,
+ PAGE_SIZE);
+
if (!ibdev->priv_uar.map)
goto err_uar;
+
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
@@ -1312,7 +1912,11 @@
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ if (dev->caps.userspace_caps)
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ else
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
+
ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -1334,6 +1938,11 @@
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_FLOW) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_FLOW) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
ibdev->ib_dev.query_device = mlx4_ib_query_device;
@@ -1346,6 +1955,9 @@
ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
ibdev->ib_dev.mmap = mlx4_ib_mmap;
+#ifdef __linux__
+ ibdev->ib_dev.get_unmapped_area = mlx4_ib_get_unmapped_area;
+#endif
ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
@@ -1376,87 +1988,139 @@
ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
+ ibdev->ib_dev.attach_flow = mlx4_ib_flow_attach;
+ ibdev->ib_dev.detach_flow = mlx4_ib_flow_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
- ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
- ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
- ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
- ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
+ if (!mlx4_is_slave(ibdev->dev)) {
+ ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
+ ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
+ ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
+ ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
+ }
+
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
- ibdev->ib_dev.create_xrc_srq = mlx4_ib_create_xrc_srq;
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
- ibdev->ib_dev.create_xrc_rcv_qp = mlx4_ib_create_xrc_rcv_qp;
- ibdev->ib_dev.modify_xrc_rcv_qp = mlx4_ib_modify_xrc_rcv_qp;
- ibdev->ib_dev.query_xrc_rcv_qp = mlx4_ib_query_xrc_rcv_qp;
- ibdev->ib_dev.reg_xrc_rcv_qp = mlx4_ib_reg_xrc_rcv_qp;
- ibdev->ib_dev.unreg_xrc_rcv_qp = mlx4_ib_unreg_xrc_rcv_qp;
ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_CREATE_XRC_SRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN) |
- (1ull << IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP) |
- (1ull << IB_USER_VERBS_CMD_REG_XRC_RCV_QP) |
- (1ull << IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP);
+ (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
+ (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
+
if (init_node_data(ibdev))
goto err_map;
- for (k = 0; k < ibdev->num_ports; ++k) {
- err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[k]);
- if (err)
- ibdev->counters[k] = -1;
- else
- mlx4_set_iboe_counter(dev, ibdev->counters[k], k + 1);
+ for (i = 0; i < ibdev->num_ports; ++i) {
+ if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
+ IB_LINK_LAYER_ETHERNET) {
+ err = mlx4_counter_alloc(ibdev->dev, i + 1, &ibdev->counters[i]);
+ if (err)
+ ibdev->counters[i] = -1;
+ } else
+ ibdev->counters[i] = -1;
}
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
- mutex_init(&ibdev->xrc_reg_mutex);
- if (ib_register_device(&ibdev->ib_dev))
- goto err_counter;
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ !mlx4_is_slave(dev)) {
+ ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
+ err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
+ MLX4_IB_UC_STEER_QPN_ALIGN, &ibdev->steer_qpn_base, 0);
+ if (err)
+ goto err_counter;
+ ibdev->ib_uc_qpns_bitmap =
+ kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
+ sizeof(long),
+ GFP_KERNEL);
+ if (!ibdev->ib_uc_qpns_bitmap) {
+ dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+ goto err_steer_qp_release;
+ }
+
+ bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
+
+ err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_base + ibdev->steer_qpn_count - 1);
+ if (err)
+ goto err_steer_free_bitmap;
+ }
+
+ if (ib_register_device(&ibdev->ib_dev, NULL))
+ goto err_steer_free_bitmap;
+
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
+
+ if (mlx4_ib_init_sriov(ibdev))
+ goto err_mad;
+
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
if (err)
- goto err_reg;
+ goto err_sriov;
}
- for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
+
+ for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
- mlx4_class_attributes[i]))
+ mlx4_class_attributes[j]))
goto err_notif;
}
-
- if(sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group))
+ if (sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group))
goto err_notif;
- ibdev->ib_active = 1;
+ ibdev->ib_active = true;
+ if (mlx4_is_mfunc(ibdev->dev))
+ init_pkeys(ibdev);
+
+ /* create paravirt contexts for any VFs which are active */
+ if (mlx4_is_master(ibdev->dev)) {
+ for (j = 0; j < MLX4_MFUNC_MAX; j++) {
+ if (j == mlx4_master_func_num(ibdev->dev))
+ continue;
+ if (mlx4_is_slave_active(ibdev->dev, j))
+ do_slave_init(ibdev, j, 1);
+ }
+ }
return ibdev;
err_notif:
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
- printk(KERN_WARNING "failure unregistering notifier\n");
+ pr_warn("failure unregistering notifier\n");
flush_workqueue(wq);
+err_sriov:
+ mlx4_ib_close_sriov(ibdev);
+
+err_mad:
+ mlx4_ib_mad_cleanup(ibdev);
+
err_reg:
ib_unregister_device(&ibdev->ib_dev);
+err_steer_free_bitmap:
+ kfree(ibdev->ib_uc_qpns_bitmap);
+
+err_steer_qp_release:
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
err_counter:
- for (; k; --k)
- mlx4_counter_free(ibdev->dev, ibdev->counters[k - 1]);
+ for (; i; --i)
+ if (ibdev->counters[i - 1] != -1)
+ mlx4_counter_free(ibdev->dev, i, ibdev->counters[i - 1]);
err_map:
iounmap(ibdev->priv_uar.map);
+ mlx4_ib_free_eqs(dev, ibdev);
err_uar:
mlx4_uar_free(dev, &ibdev->priv_uar);
@@ -1470,73 +2134,215 @@
return NULL;
}
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
+{
+ int offset;
+
+ WARN_ON(!dev->ib_uc_qpns_bitmap);
+
+ offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
+ dev->steer_qpn_count,
+ get_count_order(count));
+ if (offset < 0)
+ return offset;
+
+ *qpn = dev->steer_qpn_base + offset;
+ return 0;
+}
+
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
+{
+ if (!qpn ||
+ dev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return;
+
+ BUG_ON(qpn < dev->steer_qpn_base);
+
+ bitmap_release_region(dev->ib_uc_qpns_bitmap,
+ qpn - dev->steer_qpn_base, get_count_order(count));
+}
+
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach)
+{
+ struct ib_flow_spec spec = {
+ .type = IB_FLOW_IB_UC,
+ .l2_id.ib_uc.qpn = mqp->ibqp.qp_num,
+ };
+
+ return is_attach ?
+ __mlx4_ib_flow_attach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0)
+ : __mlx4_ib_flow_detach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0);
+}
+
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
- int p;
- int k;
+ int p,j;
+ mlx4_ib_close_sriov(ibdev);
sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group);
+ mlx4_ib_mad_cleanup(ibdev);
- mlx4_ib_mad_cleanup(ibdev);
+ for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
+ device_remove_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j]);
+ }
+
ib_unregister_device(&ibdev->ib_dev);
- for (k = 0; k < ibdev->num_ports; ++k)
- mlx4_counter_free(ibdev->dev, ibdev->counters[k]);
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
+ kfree(ibdev->ib_uc_qpns_bitmap);
+ }
+
if (ibdev->iboe.nb.notifier_call) {
- unregister_netdevice_notifier(&ibdev->iboe.nb);
- flush_workqueue(wq);
+ if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+ pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
iounmap(ibdev->priv_uar.map);
-
+ for (p = 0; p < ibdev->num_ports; ++p)
+ if (ibdev->counters[p] != -1)
+ mlx4_counter_free(ibdev->dev, p + 1, ibdev->counters[p]);
mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
mlx4_CLOSE_PORT(dev, p);
+ mlx4_ib_free_eqs(dev, ibdev);
+
mlx4_uar_free(dev, &ibdev->priv_uar);
mlx4_pd_free(dev, ibdev->priv_pdn);
ib_dealloc_device(&ibdev->ib_dev);
}
+static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
+{
+ struct mlx4_ib_demux_work **dm = NULL;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ unsigned long flags;
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
+ if (!dm) {
+ pr_err("failed to allocate memory for tunneling qp update\n");
+ goto out;
+ }
+
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
+ if (!dm[i]) {
+ pr_err("failed to allocate memory for tunneling qp update work struct\n");
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (dm[i])
+ kfree(dm[i]);
+ }
+ goto out;
+ }
+ }
+ /* initialize or tear down tunnel QPs for the slave */
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
+ dm[i]->port = i + 1;
+ dm[i]->slave = slave;
+ dm[i]->do_init = do_init;
+ dm[i]->dev = ibdev;
+ spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
+ if (!ibdev->sriov.is_going_down)
+ queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
+ spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
+ }
+out:
+ if (dm)
+ kfree(dm);
+ return;
+}
+
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
- enum mlx4_dev_event event, int port)
+ enum mlx4_dev_event event, unsigned long param)
{
struct ib_event ibev;
struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
+ struct mlx4_eqe *eqe = NULL;
+ struct ib_event_work *ew;
+ int p = 0;
- if (port > ibdev->num_ports)
- return;
+ if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
+ eqe = (struct mlx4_eqe *)param;
+ else
+ p = (int) param;
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
+ if (p > ibdev->num_ports)
+ return;
+ if (mlx4_is_master(dev) &&
+ rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
+ IB_LINK_LAYER_INFINIBAND) {
+ mlx4_ib_invalidate_all_guid_record(ibdev, p);
+ }
+ mlx4_ib_info((struct ib_device *) ibdev_ptr,
+ "Port %d logical link is up\n", p);
ibev.event = IB_EVENT_PORT_ACTIVE;
break;
case MLX4_DEV_EVENT_PORT_DOWN:
+ if (p > ibdev->num_ports)
+ return;
+ mlx4_ib_info((struct ib_device *) ibdev_ptr,
+ "Port %d logical link is down\n", p);
ibev.event = IB_EVENT_PORT_ERR;
break;
case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
- ibdev->ib_active = 0;
+ ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
break;
+ case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
+ ew = kmalloc(sizeof *ew, GFP_ATOMIC);
+ if (!ew) {
+ pr_err("failed to allocate memory for events work\n");
+ break;
+ }
+
+ INIT_WORK(&ew->work, handle_port_mgmt_change_event);
+ memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
+ ew->ib_dev = ibdev;
+ /* need to queue only for port owner, which uses GEN_EQE */
+ if (mlx4_is_master(dev))
+ queue_work(wq, &ew->work);
+ else
+ handle_port_mgmt_change_event(&ew->work);
+ return;
+
+ case MLX4_DEV_EVENT_SLAVE_INIT:
+ /* here, p is the slave id */
+ do_slave_init(ibdev, p, 1);
+ return;
+
+ case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+ /* here, p is the slave id */
+ do_slave_init(ibdev, p, 0);
+ return;
+
default:
return;
}
ibev.device = ibdev_ptr;
- ibev.element.port_num = port;
+ ibev.element.port_num = (u8) p;
ib_dispatch_event(&ibev);
}
static struct mlx4_interface mlx4_ib_interface = {
- .add = mlx4_ib_add,
- .remove = mlx4_ib_remove,
- .event = mlx4_ib_event,
- .get_prot_dev = get_ibdev,
- .protocol = MLX4_PROT_IB,
+ .add = mlx4_ib_add,
+ .remove = mlx4_ib_remove,
+ .event = mlx4_ib_event,
+ .protocol = MLX4_PROT_IB_IPV6
};
static int __init mlx4_ib_init(void)
@@ -1547,19 +2353,52 @@
if (!wq)
return -ENOMEM;
+#ifdef __linux__
+ err = mlx4_ib_proc_init();
+ if (err)
+ goto clean_wq;
+#endif
+
+ err = mlx4_ib_mcg_init();
+ if (err)
+ goto clean_proc;
+
+ init_dev_assign();
+
err = mlx4_register_interface(&mlx4_ib_interface);
- if (err) {
- destroy_workqueue(wq);
- return err;
- }
+ if (err)
+ goto clean_mcg;
return 0;
+
+clean_mcg:
+ mlx4_ib_mcg_destroy();
+
+clean_proc:
+#ifdef __linux__
+ remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME,
+ mlx4_ib_driver_dir_entry);
+ remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL);
+
+clean_wq:
+#endif
+ destroy_workqueue(wq);
+ return err;
}
static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
+ mlx4_ib_mcg_destroy();
destroy_workqueue(wq);
+
+ /* Remove proc entries */
+#ifdef __linux__
+ remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME,
+ mlx4_ib_driver_dir_entry);
+ remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL);
+#endif
+
}
module_init_order(mlx4_ib_init, SI_ORDER_MIDDLE);
@@ -1572,9 +2411,11 @@
{
return (0);
}
+
static moduledata_t mlx4ib_mod = {
.name = "mlx4ib",
.evhand = mlx4ib_evhand,
};
-DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_SMP, SI_ORDER_ANY);
+DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY);
MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1);
+MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1);
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/main.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,1254 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_sa.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/delay.h>
+
+#include "mlx4_ib.h"
+
+#define MAX_VFS 80
+#define MAX_PEND_REQS_PER_FUNC 4
+#define MAD_TIMEOUT_MS 2000
+
+#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
+#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
+#define mcg_warn_group(group, format, arg...) \
+ pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
+ (group)->name, group->demux->port, ## arg)
+
+#define mcg_error_group(group, format, arg...) \
+ pr_err(" %16s: " format, (group)->name, ## arg)
+
+static union ib_gid mgid0;
+
+static struct workqueue_struct *clean_wq;
+
+enum mcast_state {
+ MCAST_NOT_MEMBER = 0,
+ MCAST_MEMBER,
+};
+
+enum mcast_group_state {
+ MCAST_IDLE,
+ MCAST_JOIN_SENT,
+ MCAST_LEAVE_SENT,
+ MCAST_RESP_READY
+};
+
+struct mcast_member {
+ enum mcast_state state;
+ uint8_t join_state;
+ int num_pend_reqs;
+ struct list_head pending;
+};
+
+struct ib_sa_mcmember_data {
+ union ib_gid mgid;
+ union ib_gid port_gid;
+ __be32 qkey;
+ __be16 mlid;
+ u8 mtusel_mtu;
+ u8 tclass;
+ __be16 pkey;
+ u8 ratesel_rate;
+ u8 lifetmsel_lifetm;
+ __be32 sl_flowlabel_hoplimit;
+ u8 scope_join_state;
+ u8 proxy_join;
+ u8 reserved[2];
+};
+
+struct mcast_group {
+ struct ib_sa_mcmember_data rec;
+ struct rb_node node;
+ struct list_head mgid0_list;
+ struct mlx4_ib_demux_ctx *demux;
+ struct mcast_member func[MAX_VFS];
+ struct mutex lock;
+ struct work_struct work;
+ struct list_head pending_list;
+ int members[3];
+ enum mcast_group_state state;
+ enum mcast_group_state prev_state;
+ struct ib_sa_mad response_sa_mad;
+ __be64 last_req_tid;
+
+ char name[33]; /* MGID string */
+ struct device_attribute dentry;
+
+ /* refcount is the reference count for the following:
+ 1. Each queued request
+ 2. Each invocation of the worker thread
+ 3. Membership of the port at the SA
+ */
+ atomic_t refcount;
+
+ /* delayed work to clean pending SM request */
+ struct delayed_work timeout_work;
+ struct list_head cleanup_list;
+};
+
+struct mcast_req {
+ int func;
+ struct ib_sa_mad sa_mad;
+ struct list_head group_list;
+ struct list_head func_list;
+ struct mcast_group *group;
+ int clean;
+};
+
+
+#define safe_atomic_dec(ref) \
+ do {\
+ if (atomic_dec_and_test(ref)) \
+ mcg_warn_group(group, "did not expect to reach zero\n"); \
+ } while (0)
+
+static const char *get_state_string(enum mcast_group_state state)
+{
+ switch (state) {
+ case MCAST_IDLE:
+ return "MCAST_IDLE";
+ case MCAST_JOIN_SENT:
+ return "MCAST_JOIN_SENT";
+ case MCAST_LEAVE_SENT:
+ return "MCAST_LEAVE_SENT";
+ case MCAST_RESP_READY:
+ return "MCAST_RESP_READY";
+ }
+ return "Invalid State";
+}
+
+static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx,
+ union ib_gid *mgid)
+{
+ struct rb_node *node = ctx->mcg_table.rb_node;
+ struct mcast_group *group;
+ int ret;
+
+ while (node) {
+ group = rb_entry(node, struct mcast_group, node);
+ ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
+ if (!ret)
+ return group;
+
+ if (ret < 0)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return NULL;
+}
+
+static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
+ struct mcast_group *group)
+{
+ struct rb_node **link = &ctx->mcg_table.rb_node;
+ struct rb_node *parent = NULL;
+ struct mcast_group *cur_group;
+ int ret;
+
+ while (*link) {
+ parent = *link;
+ cur_group = rb_entry(parent, struct mcast_group, node);
+
+ ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
+ sizeof group->rec.mgid);
+ if (ret < 0)
+ link = &(*link)->rb_left;
+ else if (ret > 0)
+ link = &(*link)->rb_right;
+ else
+ return cur_group;
+ }
+ rb_link_node(&group->node, parent, link);
+ rb_insert_color(&group->node, &ctx->mcg_table);
+ return NULL;
+}
+
+static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
+{
+ struct mlx4_ib_dev *dev = ctx->dev;
+ struct ib_ah_attr ah_attr;
+
+ spin_lock(&dev->sm_lock);
+ if (!dev->sm_ah[ctx->port - 1]) {
+ /* port is not yet Active, sm_ah not ready */
+ spin_unlock(&dev->sm_lock);
+ return -EAGAIN;
+ }
+ mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
+ spin_unlock(&dev->sm_lock);
+ return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
+ IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
+}
+
+static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
+ struct ib_mad *mad)
+{
+ struct mlx4_ib_dev *dev = ctx->dev;
+ struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
+ struct ib_wc wc;
+ struct ib_ah_attr ah_attr;
+
+ /* Our agent might not yet be registered when mads start to arrive */
+ if (!agent)
+ return -EAGAIN;
+
+ ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
+
+ if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
+ return -EINVAL;
+ wc.sl = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = ctx->port;
+ wc.slid = ah_attr.dlid; /* opensm lid */
+ wc.src_qp = 1;
+ return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
+}
+
+static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad)
+{
+ struct ib_sa_mad mad;
+ struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data;
+ int ret;
+
+ /* we rely on a mad request as arrived from a VF */
+ memcpy(&mad, sa_mad, sizeof mad);
+
+ /* fix port GID to be the real one (slave 0) */
+ sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
+
+ /* assign our own TID */
+ mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
+ group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
+
+ ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
+ /* set timeout handler */
+ if (!ret) {
+ /* calls mlx4_ib_mcg_timeout_handler */
+ queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
+ msecs_to_jiffies(MAD_TIMEOUT_MS));
+ }
+
+ return ret;
+}
+
+static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
+{
+ struct ib_sa_mad mad;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
+ int ret;
+
+ memset(&mad, 0, sizeof mad);
+ mad.mad_hdr.base_version = 1;
+ mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ mad.mad_hdr.class_version = 2;
+ mad.mad_hdr.method = IB_SA_METHOD_DELETE;
+ mad.mad_hdr.status = cpu_to_be16(0);
+ mad.mad_hdr.class_specific = cpu_to_be16(0);
+ mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
+ group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
+ mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ mad.mad_hdr.attr_mod = cpu_to_be32(0);
+ mad.sa_hdr.sm_key = 0x0;
+ mad.sa_hdr.attr_offset = cpu_to_be16(7);
+ mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE;
+
+ *sa_data = group->rec;
+ sa_data->scope_join_state = join_state;
+
+ ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
+ if (ret)
+ group->state = MCAST_IDLE;
+
+ /* set timeout handler */
+ if (!ret) {
+ /* calls mlx4_ib_mcg_timeout_handler */
+ queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
+ msecs_to_jiffies(MAD_TIMEOUT_MS));
+ }
+
+ return ret;
+}
+
+static int send_reply_to_slave(int slave, struct mcast_group *group,
+ struct ib_sa_mad *req_sa_mad, u16 status)
+{
+ struct ib_sa_mad mad;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
+ struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data;
+ int ret;
+
+ memset(&mad, 0, sizeof mad);
+ mad.mad_hdr.base_version = 1;
+ mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ mad.mad_hdr.class_version = 2;
+ mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ mad.mad_hdr.status = cpu_to_be16(status);
+ mad.mad_hdr.class_specific = cpu_to_be16(0);
+ mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid;
+ *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */
+ mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ mad.mad_hdr.attr_mod = cpu_to_be32(0);
+ mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key;
+ mad.sa_hdr.attr_offset = cpu_to_be16(7);
+ mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */
+
+ *sa_data = group->rec;
+
+ /* reconstruct VF's requested join_state and port_gid */
+ sa_data->scope_join_state &= 0xf0;
+ sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f);
+ memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid);
+
+ ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad);
+ return ret;
+}
+
+static int check_selector(ib_sa_comp_mask comp_mask,
+ ib_sa_comp_mask selector_mask,
+ ib_sa_comp_mask value_mask,
+ u8 src_value, u8 dst_value)
+{
+ int err;
+ u8 selector = dst_value >> 6;
+ dst_value &= 0x3f;
+ src_value &= 0x3f;
+
+ if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
+ return 0;
+
+ switch (selector) {
+ case IB_SA_GT:
+ err = (src_value <= dst_value);
+ break;
+ case IB_SA_LT:
+ err = (src_value >= dst_value);
+ break;
+ case IB_SA_EQ:
+ err = (src_value != dst_value);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static u16 cmp_rec(struct ib_sa_mcmember_data *src,
+ struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
+{
+ /* src is group record, dst is request record */
+ /* MGID must already match */
+ /* Port_GID we always replace to our Port_GID, so it is a match */
+
+#define MAD_STATUS_REQ_INVALID 0x0200
+ if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
+ return MAD_STATUS_REQ_INVALID;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
+ IB_SA_MCMEMBER_REC_MTU,
+ src->mtusel_mtu, dst->mtusel_mtu))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
+ src->tclass != dst->tclass)
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
+ return MAD_STATUS_REQ_INVALID;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
+ IB_SA_MCMEMBER_REC_RATE,
+ src->ratesel_rate, dst->ratesel_rate))
+ return MAD_STATUS_REQ_INVALID;
+ if (check_selector(comp_mask,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
+ src->lifetmsel_lifetm, dst->lifetmsel_lifetm))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SL &&
+ (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) !=
+ (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
+ (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) !=
+ (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
+ (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) !=
+ (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE &&
+ (src->scope_join_state & 0xf0) !=
+ (dst->scope_join_state & 0xf0))
+ return MAD_STATUS_REQ_INVALID;
+
+ /* join_state checked separately, proxy_join ignored */
+
+ return 0;
+}
+
+/* release group, return 1 if this was last release and group is destroyed
+ * timout work is canceled sync */
+static int release_group(struct mcast_group *group, int from_timeout_handler)
+{
+ struct mlx4_ib_demux_ctx *ctx = group->demux;
+ int nzgroup;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ mutex_lock(&group->lock);
+ if (atomic_dec_and_test(&group->refcount)) {
+ if (!from_timeout_handler) {
+ if (group->state != MCAST_IDLE &&
+ !cancel_delayed_work(&group->timeout_work)) {
+ atomic_inc(&group->refcount);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ return 0;
+ }
+ }
+
+ nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0);
+ if (nzgroup)
+ del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
+ if (!list_empty(&group->pending_list))
+ mcg_warn_group(group, "releasing a group with non empty pending list\n");
+ if (nzgroup)
+ rb_erase(&group->node, &ctx->mcg_table);
+ list_del_init(&group->mgid0_list);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ kfree(group);
+ return 1;
+ } else {
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ }
+ return 0;
+}
+
+static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
+{
+ int i;
+
+ for (i = 0; i < 3; i++, join_state >>= 1)
+ if (join_state & 0x1)
+ group->members[i] += inc;
+}
+
+static u8 get_leave_state(struct mcast_group *group)
+{
+ u8 leave_state = 0;
+ int i;
+
+ for (i = 0; i < 3; i++)
+ if (!group->members[i])
+ leave_state |= (1 << i);
+
+ return leave_state & (group->rec.scope_join_state & 7);
+}
+
+static int join_group(struct mcast_group *group, int slave, u8 join_mask)
+{
+ int ret = 0;
+ u8 join_state;
+
+ /* remove bits that slave is already member of, and adjust */
+ join_state = join_mask & (~group->func[slave].join_state);
+ adjust_membership(group, join_state, 1);
+ group->func[slave].join_state |= join_state;
+ if (group->func[slave].state != MCAST_MEMBER && join_state) {
+ group->func[slave].state = MCAST_MEMBER;
+ ret = 1;
+ }
+ return ret;
+}
+
+static int leave_group(struct mcast_group *group, int slave, u8 leave_state)
+{
+ int ret = 0;
+
+ adjust_membership(group, leave_state, -1);
+ group->func[slave].join_state &= ~leave_state;
+ if (!group->func[slave].join_state) {
+ group->func[slave].state = MCAST_NOT_MEMBER;
+ ret = 1;
+ }
+ return ret;
+}
+
+static int check_leave(struct mcast_group *group, int slave, u8 leave_mask)
+{
+ if (group->func[slave].state != MCAST_MEMBER)
+ return MAD_STATUS_REQ_INVALID;
+
+ /* make sure we're not deleting unset bits */
+ if (~group->func[slave].join_state & leave_mask)
+ return MAD_STATUS_REQ_INVALID;
+
+ if (!leave_mask)
+ return MAD_STATUS_REQ_INVALID;
+
+ return 0;
+}
+
+static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct mcast_group *group;
+ struct mcast_req *req = NULL;
+
+ group = container_of(delay, typeof(*group), timeout_work);
+
+ mutex_lock(&group->lock);
+ if (group->state == MCAST_JOIN_SENT) {
+ if (!list_empty(&group->pending_list)) {
+ req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ --group->func[req->func].num_pend_reqs;
+ mutex_unlock(&group->lock);
+ kfree(req);
+ if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) {
+ if (release_group(group, 1))
+ return;
+ } else {
+ kfree(group);
+ return;
+ }
+ mutex_lock(&group->lock);
+ } else
+ mcg_warn_group(group, "DRIVER BUG\n");
+ } else if (group->state == MCAST_LEAVE_SENT) {
+ if (group->rec.scope_join_state & 7)
+ group->rec.scope_join_state &= 0xf8;
+ group->state = MCAST_IDLE;
+ mutex_unlock(&group->lock);
+ if (release_group(group, 1))
+ return;
+ mutex_lock(&group->lock);
+ } else
+ mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
+ group->state = MCAST_IDLE;
+ atomic_inc(&group->refcount);
+ queue_work(group->demux->mcg_wq, &group->work);
+ safe_atomic_dec(&group->refcount);
+
+ mutex_unlock(&group->lock);
+}
+
+static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
+ struct mcast_req *req)
+{
+ u16 status;
+
+ if (req->clean)
+ leave_mask = group->func[req->func].join_state;
+
+ status = check_leave(group, req->func, leave_mask);
+ if (!status)
+ leave_group(group, req->func, leave_mask);
+
+ if (!req->clean)
+ send_reply_to_slave(req->func, group, &req->sa_mad, status);
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ return 1;
+}
+
+static int handle_join_req(struct mcast_group *group, u8 join_mask,
+ struct mcast_req *req)
+{
+ u8 group_join_state = group->rec.scope_join_state & 7;
+ int ref = 0;
+ u16 status;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
+
+ if (join_mask == (group_join_state & join_mask)) {
+ /* port's membership need not change */
+ status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask);
+ if (!status)
+ join_group(group, req->func, join_mask);
+
+ --group->func[req->func].num_pend_reqs;
+ send_reply_to_slave(req->func, group, &req->sa_mad, status);
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ ++ref;
+ } else {
+ /* port's membership needs to be updated */
+ group->prev_state = group->state;
+ if (send_join_to_wire(group, &req->sa_mad)) {
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ ref = 1;
+ group->state = group->prev_state;
+ } else
+ group->state = MCAST_JOIN_SENT;
+ }
+
+ return ref;
+}
+
+static void mlx4_ib_mcg_work_handler(struct work_struct *work)
+{
+ struct mcast_group *group;
+ struct mcast_req *req = NULL;
+ struct ib_sa_mcmember_data *sa_data;
+ u8 req_join_state;
+ int rc = 1; /* release_count - this is for the scheduled work */
+ u16 status;
+ u8 method;
+
+ group = container_of(work, typeof(*group), work);
+
+ mutex_lock(&group->lock);
+
+ /* First, let's see if a response from SM is waiting regarding this group.
+ * If so, we need to update the group's REC. If this is a bad response, we
+ * may need to send a bad response to a VF waiting for it. If VF is waiting
+ * and this is a good response, the VF will be answered later in this func. */
+ if (group->state == MCAST_RESP_READY) {
+ /* cancels mlx4_ib_mcg_timeout_handler */
+ cancel_delayed_work(&group->timeout_work);
+ status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
+ method = group->response_sa_mad.mad_hdr.method;
+ if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
+ mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
+ (long long unsigned int)be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
+ (long long unsigned int)be64_to_cpu(group->last_req_tid));
+ group->state = group->prev_state;
+ goto process_requests;
+ }
+ if (status) {
+ if (!list_empty(&group->pending_list))
+ req = list_first_entry(&group->pending_list,
+ struct mcast_req, group_list);
+ if (method == IB_MGMT_METHOD_GET_RESP) {
+ if (req) {
+ send_reply_to_slave(req->func, group, &req->sa_mad, status);
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ ++rc;
+ } else
+ mcg_warn_group(group, "no request for failed join\n");
+ } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
+ ++rc;
+ } else {
+ u8 resp_join_state;
+ u8 cur_join_state;
+
+ resp_join_state = ((struct ib_sa_mcmember_data *)
+ group->response_sa_mad.data)->scope_join_state & 7;
+ cur_join_state = group->rec.scope_join_state & 7;
+
+ if (method == IB_MGMT_METHOD_GET_RESP) {
+ /* successfull join */
+ if (!cur_join_state && resp_join_state)
+ --rc;
+ } else if (!resp_join_state)
+ ++rc;
+ memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec);
+ }
+ group->state = MCAST_IDLE;
+ }
+
+process_requests:
+ /* We should now go over pending join/leave requests, as long as we are idle. */
+ while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
+ req = list_first_entry(&group->pending_list, struct mcast_req,
+ group_list);
+ sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
+ req_join_state = sa_data->scope_join_state & 0x7;
+
+ /* For a leave request, we will immediately answer the VF, and
+ * update our internal counters. The actual leave will be sent
+ * to SM later, if at all needed. We dequeue the request now. */
+ if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
+ rc += handle_leave_req(group, req_join_state, req);
+ else
+ rc += handle_join_req(group, req_join_state, req);
+ }
+
+ /* Handle leaves */
+ if (group->state == MCAST_IDLE) {
+ req_join_state = get_leave_state(group);
+ if (req_join_state) {
+ group->rec.scope_join_state &= ~req_join_state;
+ group->prev_state = group->state;
+ if (send_leave_to_wire(group, req_join_state)) {
+ group->state = group->prev_state;
+ ++rc;
+ } else
+ group->state = MCAST_LEAVE_SENT;
+ }
+ }
+
+ if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE)
+ goto process_requests;
+ mutex_unlock(&group->lock);
+
+ while (rc--)
+ release_group(group, 0);
+}
+
+static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx,
+ __be64 tid,
+ union ib_gid *new_mgid)
+{
+ struct mcast_group *group = NULL, *cur_group;
+ struct mcast_req *req;
+ struct list_head *pos;
+ struct list_head *n;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) {
+ group = list_entry(pos, struct mcast_group, mgid0_list);
+ mutex_lock(&group->lock);
+ if (group->last_req_tid == tid) {
+ if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
+ group->rec.mgid = *new_mgid;
+ sprintf(group->name, "%016llx%016llx",
+ (long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
+ (long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id));
+ list_del_init(&group->mgid0_list);
+ cur_group = mcast_insert(ctx, group);
+ if (cur_group) {
+ /* A race between our code and SM. Silently cleaning the new one */
+ req = list_first_entry(&group->pending_list,
+ struct mcast_req, group_list);
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ release_group(group, 0);
+ return NULL;
+ }
+
+ atomic_inc(&group->refcount);
+ add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ return group;
+ } else {
+ struct mcast_req *tmp1, *tmp2;
+
+ list_del(&group->mgid0_list);
+ if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE)
+ cancel_delayed_work_sync(&group->timeout_work);
+
+ list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) {
+ list_del(&tmp1->group_list);
+ kfree(tmp1);
+ }
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ kfree(group);
+ return NULL;
+ }
+ }
+ mutex_unlock(&group->lock);
+ }
+ mutex_unlock(&ctx->mcg_table_lock);
+
+ return NULL;
+}
+
+static ssize_t sysfs_show_group(struct device *dev,
+ struct device_attribute *attr, char *buf);
+
+static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
+ union ib_gid *mgid, int create,
+ gfp_t gfp_mask)
+{
+ struct mcast_group *group, *cur_group;
+ int is_mgid0;
+ int i;
+
+ is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
+ if (!is_mgid0) {
+ group = mcast_find(ctx, mgid);
+ if (group)
+ goto found;
+ }
+
+ if (!create)
+ return ERR_PTR(-ENOENT);
+
+ group = kzalloc(sizeof *group, gfp_mask);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ group->demux = ctx;
+ group->rec.mgid = *mgid;
+ INIT_LIST_HEAD(&group->pending_list);
+ INIT_LIST_HEAD(&group->mgid0_list);
+ for (i = 0; i < MAX_VFS; ++i)
+ INIT_LIST_HEAD(&group->func[i].pending);
+ INIT_WORK(&group->work, mlx4_ib_mcg_work_handler);
+ INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
+ mutex_init(&group->lock);
+ sprintf(group->name, "%016llx%016llx",
+ (long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
+ (long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id));
+ sysfs_attr_init(&group->dentry.attr);
+ group->dentry.show = sysfs_show_group;
+ group->dentry.store = NULL;
+ group->dentry.attr.name = group->name;
+ group->dentry.attr.mode = 0400;
+ group->state = MCAST_IDLE;
+
+ if (is_mgid0) {
+ list_add(&group->mgid0_list, &ctx->mcg_mgid0_list);
+ goto found;
+ }
+
+ cur_group = mcast_insert(ctx, group);
+ if (cur_group) {
+ mcg_warn("group just showed up %s - confused\n", cur_group->name);
+ kfree(group);
+ return ERR_PTR(-EINVAL);
+ }
+
+ add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
+
+found:
+ atomic_inc(&group->refcount);
+ return group;
+}
+
+static void queue_req(struct mcast_req *req)
+{
+ struct mcast_group *group = req->group;
+
+ atomic_inc(&group->refcount); /* for the request */
+ atomic_inc(&group->refcount); /* for scheduling the work */
+ list_add_tail(&req->group_list, &group->pending_list);
+ list_add_tail(&req->func_list, &group->func[req->func].pending);
+ /* calls mlx4_ib_mcg_work_handler */
+ queue_work(group->demux->mcg_wq, &group->work);
+ safe_atomic_dec(&group->refcount);
+}
+
+int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data;
+ struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
+ struct mcast_group *group;
+
+ switch (mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_SA_METHOD_DELETE_RESP:
+ mutex_lock(&ctx->mcg_table_lock);
+ group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL);
+ mutex_unlock(&ctx->mcg_table_lock);
+ if (IS_ERR(group)) {
+ if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
+ __be64 tid = mad->mad_hdr.tid;
+ *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */
+ group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
+ } else
+ group = NULL;
+ }
+
+ if (!group)
+ return 1;
+
+ mutex_lock(&group->lock);
+ group->response_sa_mad = *mad;
+ group->prev_state = group->state;
+ group->state = MCAST_RESP_READY;
+ /* calls mlx4_ib_mcg_work_handler */
+ atomic_inc(&group->refcount);
+ queue_work(ctx->mcg_wq, &group->work);
+ safe_atomic_dec(&group->refcount);
+ mutex_unlock(&group->lock);
+ release_group(group, 0);
+ return 1; /* consumed */
+ case IB_MGMT_METHOD_SET:
+ case IB_SA_METHOD_GET_TABLE:
+ case IB_SA_METHOD_GET_TABLE_RESP:
+ case IB_SA_METHOD_DELETE:
+ return 0; /* not consumed, pass-through to guest over tunnel */
+ default:
+ mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
+ port, mad->mad_hdr.method);
+ return 1; /* consumed */
+ }
+}
+
+int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
+ int slave, struct ib_sa_mad *sa_mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data;
+ struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
+ struct mcast_group *group;
+ struct mcast_req *req;
+ int may_create = 0;
+
+ if (ctx->flushing)
+ return -EAGAIN;
+
+ switch (sa_mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_SET:
+ may_create = 1;
+ case IB_SA_METHOD_DELETE:
+ req = kzalloc(sizeof *req, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ req->func = slave;
+ req->sa_mad = *sa_mad;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL);
+ mutex_unlock(&ctx->mcg_table_lock);
+ if (IS_ERR(group)) {
+ kfree(req);
+ return PTR_ERR(group);
+ }
+ mutex_lock(&group->lock);
+ if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
+ mutex_unlock(&group->lock);
+ mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
+ port, slave, MAX_PEND_REQS_PER_FUNC);
+ release_group(group, 0);
+ kfree(req);
+ return -ENOMEM;
+ }
+ ++group->func[slave].num_pend_reqs;
+ req->group = group;
+ queue_req(req);
+ mutex_unlock(&group->lock);
+ release_group(group, 0);
+ return 1; /* consumed */
+ case IB_SA_METHOD_GET_TABLE:
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_SA_METHOD_GET_TABLE_RESP:
+ case IB_SA_METHOD_DELETE_RESP:
+ return 0; /* not consumed, pass-through */
+ default:
+ mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n",
+ port, slave, sa_mad->mad_hdr.method);
+ return 1; /* consumed */
+ }
+}
+
+static ssize_t sysfs_show_group(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct mcast_group *group =
+ container_of(attr, struct mcast_group, dentry);
+ struct mcast_req *req = NULL;
+ char pending_str[40];
+ char state_str[40];
+ ssize_t len = 0;
+ int f;
+
+ if (group->state == MCAST_IDLE)
+ sprintf(state_str, "%s", get_state_string(group->state));
+ else
+ sprintf(state_str, "%s(TID=0x%llx)",
+ get_state_string(group->state),
+ (long long unsigned int)be64_to_cpu(group->last_req_tid));
+ if (list_empty(&group->pending_list)) {
+ sprintf(pending_str, "No");
+ } else {
+ req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
+ sprintf(pending_str, "Yes(TID=0x%llx)",
+ (long long unsigned int)be64_to_cpu(req->sa_mad.mad_hdr.tid));
+ }
+ len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
+ group->rec.scope_join_state & 0xf,
+ group->members[2], group->members[1], group->members[0],
+ atomic_read(&group->refcount),
+ pending_str,
+ state_str);
+ for (f = 0; f < MAX_VFS; ++f)
+ if (group->func[f].state == MCAST_MEMBER)
+ len += sprintf(buf + len, "%d[%1x] ",
+ f, group->func[f].join_state);
+
+ len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
+ "%4x %4x %2x %2x)\n",
+ be16_to_cpu(group->rec.pkey),
+ be32_to_cpu(group->rec.qkey),
+ (group->rec.mtusel_mtu & 0xc0) >> 6,
+ group->rec.mtusel_mtu & 0x3f,
+ group->rec.tclass,
+ (group->rec.ratesel_rate & 0xc0) >> 6,
+ group->rec.ratesel_rate & 0x3f,
+ (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
+ (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
+ be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
+ group->rec.proxy_join);
+
+ return len;
+}
+
+int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
+{
+ char name[20];
+
+ atomic_set(&ctx->tid, 0);
+ sprintf(name, "mlx4_ib_mcg%d", ctx->port);
+ ctx->mcg_wq = create_singlethread_workqueue(name);
+ if (!ctx->mcg_wq)
+ return -ENOMEM;
+
+ mutex_init(&ctx->mcg_table_lock);
+ ctx->mcg_table = RB_ROOT;
+ INIT_LIST_HEAD(&ctx->mcg_mgid0_list);
+ ctx->flushing = 0;
+
+ return 0;
+}
+
+static void force_clean_group(struct mcast_group *group)
+{
+ struct mcast_req *req, *tmp
+ ;
+ list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) {
+ list_del(&req->group_list);
+ kfree(req);
+ }
+ del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr);
+ rb_erase(&group->node, &group->demux->mcg_table);
+ kfree(group);
+}
+
+static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
+{
+ int i;
+ struct rb_node *p;
+ struct mcast_group *group;
+ unsigned long end;
+ int count;
+
+ for (i = 0; i < MAX_VFS; ++i)
+ clean_vf_mcast(ctx, i);
+
+ end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
+ do {
+ count = 0;
+ mutex_lock(&ctx->mcg_table_lock);
+ for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p))
+ ++count;
+ mutex_unlock(&ctx->mcg_table_lock);
+ if (!count)
+ break;
+
+ msleep(1);
+ } while (time_after(end, jiffies));
+
+ flush_workqueue(ctx->mcg_wq);
+ if (destroy_wq)
+ destroy_workqueue(ctx->mcg_wq);
+
+ mutex_lock(&ctx->mcg_table_lock);
+ while ((p = rb_first(&ctx->mcg_table)) != NULL) {
+ group = rb_entry(p, struct mcast_group, node);
+ if (atomic_read(&group->refcount))
+ mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
+
+ force_clean_group(group);
+ }
+ mutex_unlock(&ctx->mcg_table_lock);
+}
+
+struct clean_work {
+ struct work_struct work;
+ struct mlx4_ib_demux_ctx *ctx;
+ int destroy_wq;
+};
+
+static void mcg_clean_task(struct work_struct *work)
+{
+ struct clean_work *cw = container_of(work, struct clean_work, work);
+
+ _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
+ cw->ctx->flushing = 0;
+ kfree(cw);
+}
+
+void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
+{
+ struct clean_work *work;
+
+ if (ctx->flushing)
+ return;
+
+ ctx->flushing = 1;
+
+ if (destroy_wq) {
+ _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
+ ctx->flushing = 0;
+ return;
+ }
+
+ work = kmalloc(sizeof *work, GFP_KERNEL);
+ if (!work) {
+ ctx->flushing = 0;
+ mcg_warn("failed allocating work for cleanup\n");
+ return;
+ }
+
+ work->ctx = ctx;
+ work->destroy_wq = destroy_wq;
+ INIT_WORK(&work->work, mcg_clean_task);
+ queue_work(clean_wq, &work->work);
+}
+
+static void build_leave_mad(struct mcast_req *req)
+{
+ struct ib_sa_mad *mad = &req->sa_mad;
+
+ mad->mad_hdr.method = IB_SA_METHOD_DELETE;
+}
+
+
+static void clear_pending_reqs(struct mcast_group *group, int vf)
+{
+ struct mcast_req *req, *tmp, *group_first = NULL;
+ int clear;
+ int pend = 0;
+
+ if (!list_empty(&group->pending_list))
+ group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list);
+
+ list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) {
+ clear = 1;
+ if (group_first == req &&
+ (group->state == MCAST_JOIN_SENT ||
+ group->state == MCAST_LEAVE_SENT)) {
+ clear = cancel_delayed_work(&group->timeout_work);
+ pend = !clear;
+ group->state = MCAST_IDLE;
+ }
+ if (clear) {
+ --group->func[vf].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ atomic_dec(&group->refcount);
+ }
+ }
+
+ if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) {
+ mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n",
+ list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs);
+ }
+}
+
+static int push_deleteing_req(struct mcast_group *group, int slave)
+{
+ struct mcast_req *req;
+ struct mcast_req *pend_req;
+
+ if (!group->func[slave].join_state)
+ return 0;
+
+ req = kzalloc(sizeof *req, GFP_KERNEL);
+ if (!req) {
+ mcg_warn_group(group, "failed allocation - may leave stall groups\n");
+ return -ENOMEM;
+ }
+
+ if (!list_empty(&group->func[slave].pending)) {
+ pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
+ if (pend_req->clean) {
+ kfree(req);
+ return 0;
+ }
+ }
+
+ req->clean = 1;
+ req->func = slave;
+ req->group = group;
+ ++group->func[slave].num_pend_reqs;
+ build_leave_mad(req);
+ queue_req(req);
+ return 0;
+}
+
+void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
+{
+ struct mcast_group *group;
+ struct rb_node *p;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) {
+ group = rb_entry(p, struct mcast_group, node);
+ mutex_lock(&group->lock);
+ if (atomic_read(&group->refcount)) {
+ /* clear pending requests of this VF */
+ clear_pending_reqs(group, slave);
+ push_deleteing_req(group, slave);
+ }
+ mutex_unlock(&group->lock);
+ }
+ mutex_unlock(&ctx->mcg_table_lock);
+}
+
+
+int mlx4_ib_mcg_init(void)
+{
+ clean_wq = create_singlethread_workqueue("mlx4_ib_mcg");
+ if (!clean_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_ib_mcg_destroy(void)
+{
+ destroy_workqueue(clean_wq);
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -37,39 +37,52 @@
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/idr.h>
+#include <linux/notifier.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_sa.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
+#include <linux/rbtree.h>
-
#define MLX4_IB_DRV_NAME "mlx4_ib"
-#ifdef CONFIG_MLX4_DEBUG
-extern int mlx4_ib_debug_level;
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
-#define mlx4_ib_dbg(format, arg...) \
- do { \
- if (mlx4_ib_debug_level) \
- printk(KERN_DEBUG "<" MLX4_IB_DRV_NAME "> %s: " format "\n",\
- __func__, ## arg); \
- } while (0)
+#define mlx4_ib_warn(ibdev, format, arg...) \
+ dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
-#else /* CONFIG_MLX4_DEBUG */
+#define mlx4_ib_info(ibdev, format, arg...) \
+ dev_info((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
-#define mlx4_ib_dbg(format, arg...) do {} while (0)
-
-#endif /* CONFIG_MLX4_DEBUG */
-
enum {
- MLX4_IB_SQ_MIN_WQE_SHIFT = 6
+ MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
+ MLX4_IB_MAX_HEADROOM = 2048
};
-#define MLX4_IB_SQ_HEADROOM(shift) ((2048 >> (shift)) + 1)
-#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
+#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
+#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
+/*module param to indicate if SM assigns the alias_GUID*/
+extern int mlx4_ib_sm_guid_assign;
+#ifdef __linux__
+extern struct proc_dir_entry *mlx4_mrs_dir_entry;
+#endif
+
+#define MLX4_IB_UC_STEER_QPN_ALIGN 1
+#define MLX4_IB_UC_MAX_NUM_QPS (256 * 1024)
+
+
+#define MLX4_IB_MMAP_CMD_MASK 0xFF
+#define MLX4_IB_MMAP_CMD_BITS 8
+
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
@@ -83,15 +96,16 @@
};
struct mlx4_ib_xrcd {
- struct ib_xrcd ibxrcd;
- u32 xrcdn;
- struct ib_pd *pd;
- struct ib_cq *cq;
+ struct ib_xrcd ibxrcd;
+ u32 xrcdn;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
};
struct mlx4_ib_cq_buf {
struct mlx4_buf buf;
struct mlx4_mtt mtt;
+ int entry_size;
};
struct mlx4_ib_cq_resize {
@@ -99,6 +113,11 @@
int cqe;
};
+struct mlx4_shared_mr_info {
+ int mr_id;
+ struct ib_umem *umem;
+};
+
struct mlx4_ib_cq {
struct ib_cq ibcq;
struct mlx4_cq mcq;
@@ -115,6 +134,7 @@
struct ib_mr ibmr;
struct mlx4_mr mmr;
struct ib_umem *umem;
+ struct mlx4_shared_mr_info *smr_info;
};
struct mlx4_ib_fast_reg_page_list {
@@ -141,12 +161,14 @@
};
enum mlx4_ib_qp_flags {
- MLX4_IB_QP_LSO = 1 << 0,
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
- MLX4_IB_XRC_RCV = 1 << 2,
+ MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+ MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
+ MLX4_IB_SRIOV_SQP = 1 << 31,
};
-struct gid_entry {
+struct mlx4_ib_gid_entry {
struct list_head list;
union ib_gid gid;
int added;
@@ -153,6 +175,113 @@
u8 port;
};
+enum mlx4_ib_mmap_cmd {
+ MLX4_IB_MMAP_UAR_PAGE = 0,
+ MLX4_IB_MMAP_BLUE_FLAME_PAGE = 1,
+ MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES = 2,
+};
+
+enum mlx4_ib_qp_type {
+ /*
+ * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
+ * here (and in that order) since the MAD layer uses them as
+ * indices into a 2-entry table.
+ */
+ MLX4_IB_QPT_SMI = IB_QPT_SMI,
+ MLX4_IB_QPT_GSI = IB_QPT_GSI,
+
+ MLX4_IB_QPT_RC = IB_QPT_RC,
+ MLX4_IB_QPT_UC = IB_QPT_UC,
+ MLX4_IB_QPT_UD = IB_QPT_UD,
+ MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6,
+ MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE,
+ MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET,
+ MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI,
+ MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT,
+
+ MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16,
+ MLX4_IB_QPT_PROXY_SMI = 1 << 17,
+ MLX4_IB_QPT_PROXY_GSI = 1 << 18,
+ MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19,
+ MLX4_IB_QPT_TUN_SMI = 1 << 20,
+ MLX4_IB_QPT_TUN_GSI = 1 << 21,
+};
+
+#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
+ MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
+
+enum mlx4_ib_mad_ifc_flags {
+ MLX4_MAD_IFC_IGNORE_MKEY = 1,
+ MLX4_MAD_IFC_IGNORE_BKEY = 2,
+ MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY |
+ MLX4_MAD_IFC_IGNORE_BKEY),
+ MLX4_MAD_IFC_NET_VIEW = 4,
+};
+
+enum {
+ MLX4_NUM_TUNNEL_BUFS = 256,
+};
+
+struct mlx4_ib_tunnel_header {
+ struct mlx4_av av;
+ __be32 remote_qpn;
+ __be32 qkey;
+ __be16 vlan;
+ u8 mac[6];
+ __be16 pkey_index;
+ u8 reserved[6];
+};
+
+struct mlx4_ib_buf {
+ void *addr;
+ dma_addr_t map;
+};
+
+struct mlx4_rcv_tunnel_hdr {
+ __be32 flags_src_qp; /* flags[6:5] is defined for VLANs:
+ * 0x0 - no vlan was in the packet
+ * 0x01 - C-VLAN was in the packet */
+ u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */
+ u8 reserved;
+ __be16 pkey_index;
+ __be16 sl_vid;
+ __be16 slid_mac_47_32;
+ __be32 mac_31_0;
+};
+
+struct mlx4_ib_proxy_sqp_hdr {
+ struct ib_grh grh;
+ struct mlx4_rcv_tunnel_hdr tun;
+} __packed;
+
+struct mlx4_roce_smac_vlan_info {
+ u64 smac;
+ int smac_index;
+ int smac_port;
+ u64 candidate_smac;
+ int candidate_smac_index;
+ int candidate_smac_port;
+ u16 vid;
+ int vlan_index;
+ int vlan_port;
+ u16 candidate_vid;
+ int candidate_vlan_index;
+ int candidate_vlan_port;
+ int update_vid;
+};
+
+struct mlx4_ib_qpg_data {
+ unsigned long *tss_bitmap;
+ unsigned long *rss_bitmap;
+ struct mlx4_ib_qp *qpg_parent;
+ int tss_qpn_base;
+ int rss_qpn_base;
+ u32 tss_child_count;
+ u32 rss_child_count;
+ u32 qpg_tss_mask_sz;
+};
+
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
@@ -168,14 +297,13 @@
int sq_spare_wqes;
struct mlx4_ib_wq sq;
+ enum mlx4_ib_qp_type mlx4_ib_qp_type;
struct ib_umem *umem;
struct mlx4_mtt mtt;
int buf_size;
struct mutex mutex;
+ u16 xrcdn;
u32 flags;
- struct list_head xrc_reg_list;
- spinlock_t xrc_reg_list_lock;
- u16 xrcdn;
u8 port;
u8 alt_port;
u8 atomic_rd_en;
@@ -183,9 +311,16 @@
u8 sq_no_prefetch;
u8 state;
int mlx_type;
+ enum ib_qpg_type qpg_type;
+ struct mlx4_ib_qpg_data *qpg_data;
struct list_head gid_list;
- int max_inline_data;
- struct mlx4_bf bf;
+ struct list_head steering_rules;
+ struct mlx4_ib_buf *sqp_proxy_rcv;
+ struct mlx4_roce_smac_vlan_info pri;
+ struct mlx4_roce_smac_vlan_info alt;
+ struct list_head rules_list;
+ int max_inline_data;
+ struct mlx4_bf bf;
};
struct mlx4_ib_srq {
@@ -208,6 +343,138 @@
union mlx4_ext_av av;
};
+/****************************************/
+/* alias guid support */
+/****************************************/
+#define NUM_PORT_ALIAS_GUID 2
+#define NUM_ALIAS_GUID_IN_REC 8
+#define NUM_ALIAS_GUID_REC_IN_PORT 16
+#define GUID_REC_SIZE 8
+#define NUM_ALIAS_GUID_PER_PORT 128
+#define MLX4_NOT_SET_GUID (0x00LL)
+#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
+
+enum mlx4_guid_alias_rec_status {
+ MLX4_GUID_INFO_STATUS_IDLE,
+ MLX4_GUID_INFO_STATUS_SET,
+ MLX4_GUID_INFO_STATUS_PENDING,
+};
+
+enum mlx4_guid_alias_rec_ownership {
+ MLX4_GUID_DRIVER_ASSIGN,
+ MLX4_GUID_SYSADMIN_ASSIGN,
+ MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
+};
+
+enum mlx4_guid_alias_rec_method {
+ MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
+ MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE,
+};
+
+struct mlx4_sriov_alias_guid_info_rec_det {
+ u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
+ ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
+ enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
+ u8 method; /*set or delete*/
+ enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
+};
+
+struct mlx4_sriov_alias_guid_port_rec_det {
+ struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
+ struct workqueue_struct *wq;
+ struct delayed_work alias_guid_work;
+ u8 port;
+ struct mlx4_sriov_alias_guid *parent;
+ struct list_head cb_list;
+};
+
+struct mlx4_sriov_alias_guid {
+ struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS];
+ spinlock_t ag_work_lock;
+ struct ib_sa_client *sa_client;
+};
+
+struct mlx4_ib_demux_work {
+ struct work_struct work;
+ struct mlx4_ib_dev *dev;
+ int slave;
+ int do_init;
+ u8 port;
+
+};
+
+struct mlx4_ib_tun_tx_buf {
+ struct mlx4_ib_buf buf;
+ struct ib_ah *ah;
+};
+
+struct mlx4_ib_demux_pv_qp {
+ struct ib_qp *qp;
+ enum ib_qp_type proxy_qpt;
+ struct mlx4_ib_buf *ring;
+ struct mlx4_ib_tun_tx_buf *tx_ring;
+ spinlock_t tx_lock;
+ unsigned tx_ix_head;
+ unsigned tx_ix_tail;
+};
+
+enum mlx4_ib_demux_pv_state {
+ DEMUX_PV_STATE_DOWN,
+ DEMUX_PV_STATE_STARTING,
+ DEMUX_PV_STATE_ACTIVE,
+ DEMUX_PV_STATE_DOWNING,
+};
+
+struct mlx4_ib_demux_pv_ctx {
+ int port;
+ int slave;
+ enum mlx4_ib_demux_pv_state state;
+ int has_smi;
+ struct ib_device *ib_dev;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ struct work_struct work;
+ struct workqueue_struct *wq;
+ struct mlx4_ib_demux_pv_qp qp[2];
+};
+
+struct mlx4_ib_demux_ctx {
+ struct ib_device *ib_dev;
+ int port;
+ struct workqueue_struct *wq;
+ struct workqueue_struct *ud_wq;
+ spinlock_t ud_lock;
+ __be64 subnet_prefix;
+ __be64 guid_cache[128];
+ struct mlx4_ib_dev *dev;
+ /* the following lock protects both mcg_table and mcg_mgid0_list */
+ struct mutex mcg_table_lock;
+ struct rb_root mcg_table;
+ struct list_head mcg_mgid0_list;
+ struct workqueue_struct *mcg_wq;
+ struct mlx4_ib_demux_pv_ctx **tun;
+ atomic_t tid;
+ int flushing; /* flushing the work queue */
+};
+
+struct mlx4_ib_sriov {
+ struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS];
+ struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS];
+ /* when using this spinlock you should use "irq" because
+ * it may be called from interrupt context.*/
+ spinlock_t going_down_lock;
+ int is_going_down;
+
+ struct mlx4_sriov_alias_guid alias_guid;
+
+ /* CM paravirtualization fields */
+ struct list_head cm_list;
+ spinlock_t id_map_lock;
+ struct rb_root sl_id_map;
+ struct idr pv_id_table;
+};
+
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
@@ -215,6 +482,42 @@
union ib_gid gid_table[MLX4_MAX_PORTS][128];
};
+struct pkey_mgt {
+ u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ struct list_head pkey_port_list[MLX4_MFUNC_MAX];
+ struct kobject *device_parent[MLX4_MFUNC_MAX];
+};
+
+struct mlx4_ib_iov_sysfs_attr {
+ void *ctx;
+ struct kobject *kobj;
+ unsigned long data;
+ u32 entry_num;
+ char name[15];
+ struct device_attribute dentry;
+ struct device *dev;
+};
+
+struct mlx4_ib_iov_sysfs_attr_ar {
+ struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1];
+};
+
+struct mlx4_ib_iov_port {
+ char name[100];
+ u8 num;
+ struct mlx4_ib_dev *dev;
+ struct list_head list;
+ struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar;
+ struct ib_port_attr attr;
+ struct kobject *cur_port;
+ struct kobject *admin_alias_parent;
+ struct kobject *gids_parent;
+ struct kobject *pkeys_parent;
+ struct kobject *mcgs_parent;
+ struct mlx4_ib_iov_sysfs_attr mcg_dentry;
+};
+
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@@ -226,14 +529,37 @@
struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
struct ib_ah *sm_ah[MLX4_MAX_PORTS];
spinlock_t sm_lock;
+ struct mlx4_ib_sriov sriov;
struct mutex cap_mask_mutex;
- struct mutex xrc_reg_mutex;
- int ib_active;
+ bool ib_active;
struct mlx4_ib_iboe iboe;
int counters[MLX4_MAX_PORTS];
+ int *eq_table;
+ int eq_added;
+ struct kobject *iov_parent;
+ struct kobject *ports_parent;
+ struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
+ struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
+ struct pkey_mgt pkeys;
+ unsigned long *ib_uc_qpns_bitmap;
+ int steer_qpn_count;
+ int steer_qpn_base;
};
+struct ib_event_work {
+ struct work_struct work;
+ struct mlx4_ib_dev *ib_dev;
+ struct mlx4_eqe ib_eqe;
+};
+
+struct mlx4_ib_qp_tunnel_init_attr {
+ struct ib_qp_init_attr init_attr;
+ int slave;
+ enum ib_qp_type proxy_qp_type;
+ u8 port;
+};
+
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@@ -303,6 +629,9 @@
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
+int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
+void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
+
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db);
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@@ -310,9 +639,12 @@
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem);
+int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
+ u64 start_va,
+ int *num_of_mtts);
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
- struct ib_udata *udata);
+ struct ib_udata *udata, int mr_id);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
@@ -322,6 +654,7 @@
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
+int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
struct ib_ucontext *context,
struct ib_udata *udata);
@@ -338,11 +671,6 @@
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
-struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
- struct ib_cq *xrc_cq,
- struct ib_xrcd *xrcd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
@@ -364,7 +692,7 @@
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -379,20 +707,20 @@
u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
-int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
- u32 *qp_num);
-int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
- struct ib_qp_attr *attr, int attr_mask);
-int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
- struct ib_qp_attr *attr, int attr_mask,
- struct ib_qp_init_attr *init_attr);
-int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
-int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
+int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view);
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey, int netw_view);
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid, int netw_view);
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port);
+int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index,
+ union mlx4_counter *counter, u8 clear);
+
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
@@ -403,7 +731,73 @@
return !!(ah->av.ib.g_slid & 0x80);
}
+int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
+void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
+void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
+int mlx4_ib_mcg_init(void);
+void mlx4_ib_mcg_destroy(void);
+
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
+
+int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *sa_mad);
+int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *mad);
+
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid);
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+ enum ib_event_type type);
+
+void mlx4_ib_tunnels_update_work(struct work_struct *work);
+
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type qpt, struct ib_wc *wc,
+ struct ib_grh *grh, struct ib_mad *mad);
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
+ u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
+__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
+
+int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
+ struct ib_mad *mad, int is_eth);
+
+int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
+ struct ib_mad *mad);
+
+void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev);
+void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id);
+
+/* alias guid support */
+void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port);
+int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev);
+void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev);
+void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
+
+void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num,
+ u8 port_num, u8 *p_data);
+
+void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num, u8 port_num,
+ u8 *p_data);
+
+int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr);
+void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr);
+ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
+
+int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
+
+void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
+
+__be64 mlx4_ib_gen_node_guid(void);
+
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach);
+
#endif /* MLX4_IB_H */
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/mr.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/mr.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -31,6 +31,15 @@
* SOFTWARE.
*/
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#ifdef __linux__
+#include <linux/proc_fs.h>
+#include <linux/cred.h>
+#endif
+
#include "mlx4_ib.h"
static u32 convert_access(int acc)
@@ -41,13 +50,67 @@
(acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
MLX4_PERM_LOCAL_READ;
}
+#ifdef __linux__
+static ssize_t shared_mr_proc_read(struct file *file,
+ char __user *buffer,
+ size_t len,
+ loff_t *offset)
+{
+ return -ENOSYS;
+
+}
+
+static ssize_t shared_mr_proc_write(struct file *file,
+ const char __user *buffer,
+ size_t len,
+ loff_t *offset)
+{
+
+ return -ENOSYS;
+}
+
+static int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+
+ struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode);
+ struct mlx4_shared_mr_info *smr_info =
+ (struct mlx4_shared_mr_info *)pde->data;
+
+ /* Prevent any mapping not on start of area */
+ if (vma->vm_pgoff != 0)
+ return -EINVAL;
+
+ return ib_umem_map_to_vma(smr_info->umem,
+ vma);
+
+}
+
+static const struct file_operations shared_mr_proc_ops = {
+ .owner = THIS_MODULE,
+ .read = shared_mr_proc_read,
+ .write = shared_mr_proc_write,
+ .mmap = shared_mr_mmap
+};
+
+static mode_t convert_shared_access(int acc)
+{
+
+ return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) |
+ (acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) |
+ (acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) |
+ (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) |
+ (acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) |
+ (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0);
+
+}
+#endif
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx4_ib_mr *mr;
int err;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -74,118 +137,350 @@
return ERR_PTR(err);
}
+static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
+ struct mlx4_mtt *mtt,
+ u64 mtt_size,
+ u64 mtt_shift,
+ u64 len,
+ u64 cur_start_addr,
+ u64 *pages,
+ int *start_index,
+ int *npages)
+{
+ int k;
+ int err = 0;
+ u64 mtt_entries;
+ u64 cur_end_addr = cur_start_addr + len;
+ u64 cur_end_addr_aligned = 0;
+
+ len += (cur_start_addr & (mtt_size-1ULL));
+ cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
+ len += (cur_end_addr_aligned - cur_end_addr);
+ if (len & (mtt_size-1ULL)) {
+ WARN(1 ,
+ "write_block: len %llx is not aligned to mtt_size %llx\n",
+ (long long)len, (long long)mtt_size);
+ return -EINVAL;
+ }
+
+
+ mtt_entries = (len >> mtt_shift);
+
+ /* Align the MTT start address to
+ the mtt_size.
+ Required to handle cases when the MR
+ starts in the middle of an MTT record.
+ Was not required in old code since
+ the physical addresses provided by
+ the dma subsystem were page aligned,
+ which was also the MTT size.
+ */
+ cur_start_addr = round_down(cur_start_addr, mtt_size);
+ /* A new block is started ...*/
+ for (k = 0; k < mtt_entries; ++k) {
+ pages[*npages] = cur_start_addr + (mtt_size * k);
+ (*npages)++;
+ /*
+ * Be friendly to mlx4_write_mtt() and
+ * pass it chunks of appropriate size.
+ */
+ if (*npages == PAGE_SIZE / sizeof(u64)) {
+ err = mlx4_write_mtt(dev->dev,
+ mtt, *start_index,
+ *npages, pages);
+ if (err)
+ return err;
+
+ (*start_index) += *npages;
+ *npages = 0;
+ }
+ }
+
+ return 0;
+}
+
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
struct ib_umem_chunk *chunk;
- int i, j, k;
- int n;
- int len;
+ int j;
+ u64 len = 0;
int err = 0;
+ u64 mtt_size;
+ u64 cur_start_addr = 0;
+ u64 mtt_shift;
+ int start_index = 0;
+ int npages = 0;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
return -ENOMEM;
- i = n = 0;
+ mtt_shift = mtt->page_shift;
+ mtt_size = 1ULL << mtt_shift;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- umem->page_size * k;
- /*
- * Be friendly to mlx4_write_mtt() and
- * pass it chunks of appropriate size.
- */
- if (i == PAGE_SIZE / sizeof (u64)) {
- err = mlx4_write_mtt(dev->dev, mtt, n,
- i, pages);
- if (err)
- goto out;
- n += i;
- i = 0;
- }
+ if (cur_start_addr + len ==
+ sg_dma_address(&chunk->page_list[j])) {
+ /* still the same block */
+ len += sg_dma_len(&chunk->page_list[j]);
+ continue;
}
+ /* A new block is started ...*/
+ /* If len is malaligned, write an extra mtt entry to
+ cover the misaligned area (round up the division)
+ */
+ err = mlx4_ib_umem_write_mtt_block(dev,
+ mtt, mtt_size, mtt_shift,
+ len, cur_start_addr,
+ pages,
+ &start_index,
+ &npages);
+ if (err)
+ goto out;
+
+ cur_start_addr =
+ sg_dma_address(&chunk->page_list[j]);
+ len = sg_dma_len(&chunk->page_list[j]);
}
- if (i)
- err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
+ /* Handle the last block */
+ if (len > 0) {
+ /* If len is malaligned, write an extra mtt entry to cover
+ the misaligned area (round up the division)
+ */
+ err = mlx4_ib_umem_write_mtt_block(dev,
+ mtt, mtt_size, mtt_shift,
+ len, cur_start_addr,
+ pages,
+ &start_index,
+ &npages);
+ if (err)
+ goto out;
+ }
+
+ if (npages)
+ err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
+
out:
free_page((unsigned long) pages);
return err;
}
-static int handle_hugetlb_user_mr(struct ib_pd *pd, struct mlx4_ib_mr *mr,
- u64 start, u64 virt_addr, int access_flags)
+static inline u64 alignment_of(u64 ptr)
{
-#if defined(CONFIG_HUGETLB_PAGE) && !defined(__powerpc__) && !defined(__ia64__)
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ return ilog2(ptr & (~(ptr-1)));
+}
+
+static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
+ u64 current_block_end,
+ u64 block_shift)
+{
+ /* Check whether the alignment of the new block
+ is aligned as well as the previous block.
+ Block address must start with zeros till size of entity_size.
+ */
+ if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
+ /* It is not as well aligned as the
+ previous block-reduce the mtt size
+ accordingly.
+ Here we take the last right bit
+ which is 1.
+ */
+ block_shift = alignment_of(next_block_start);
+
+ /* Check whether the alignment of the
+ end of previous block - is it aligned
+ as well as the start of the block
+ */
+ if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
+ /* It is not as well aligned as
+ the start of the block - reduce the
+ mtt size accordingly.
+ */
+ block_shift = alignment_of(current_block_end);
+
+ return block_shift;
+}
+
+/* Calculate optimal mtt size based on contiguous pages.
+* Function will return also the number of pages that are not aligned to the
+ calculated mtt_size to be added to total number
+ of pages. For that we should check the first chunk length & last chunk
+ length and if not aligned to mtt_size we should increment
+ the non_aligned_pages number.
+ All chunks in the middle already handled as part of mtt shift calculation
+ for both their start & end addresses.
+*/
+int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
+ u64 start_va,
+ int *num_of_mtts)
+{
struct ib_umem_chunk *chunk;
- unsigned dsize;
- dma_addr_t daddr;
- unsigned cur_size = 0;
- dma_addr_t uninitialized_var(cur_addr);
- int n;
- struct ib_umem *umem = mr->umem;
- u64 *arr;
- int err = 0;
- int i;
- int j = 0;
- int off = start & (HPAGE_SIZE - 1);
+ int j;
+ u64 block_shift = MLX4_MAX_MTT_SHIFT;
+ u64 current_block_len = 0;
+ u64 current_block_start = 0;
+ u64 misalignment_bits;
+ u64 first_block_start = 0;
+ u64 last_block_end = 0;
+ u64 total_len = 0;
+ u64 last_block_aligned_end = 0;
+ u64 min_shift = ilog2(umem->page_size);
- n = DIV_ROUND_UP(off + umem->length, HPAGE_SIZE);
- arr = kmalloc(n * sizeof *arr, GFP_KERNEL);
- if (!arr)
- return -ENOMEM;
+ list_for_each_entry(chunk, &umem->chunk_list, list) {
+ /* Initialization - save the first chunk start as
+ the current_block_start - block means contiguous pages.
+ */
+ if (current_block_len == 0 && current_block_start == 0) {
+ first_block_start = current_block_start =
+ sg_dma_address(&chunk->page_list[0]);
+ /* Find the bits that are different between
+ the physical address and the virtual
+ address for the start of the MR.
+ */
+ /* umem_get aligned the start_va to a page
+ boundry. Therefore, we need to align the
+ start va to the same boundry */
+ /* misalignment_bits is needed to handle the
+ case of a single memory region. In this
+ case, the rest of the logic will not reduce
+ the block size. If we use a block size
+ which is bigger than the alignment of the
+ misalignment bits, we might use the virtual
+ page number instead of the physical page
+ number, resulting in access to the wrong
+ data. */
+ misalignment_bits =
+ (start_va & (~(((u64)(umem->page_size))-1ULL)))
+ ^ current_block_start;
+ block_shift = min(alignment_of(misalignment_bits)
+ , block_shift);
+ }
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (i = 0; i < chunk->nmap; ++i) {
- daddr = sg_dma_address(&chunk->page_list[i]);
- dsize = sg_dma_len(&chunk->page_list[i]);
- if (!cur_size) {
- cur_addr = daddr;
- cur_size = dsize;
- } else if (cur_addr + cur_size != daddr) {
- err = -EINVAL;
- goto out;
- } else
- cur_size += dsize;
+ /* Go over the scatter entries in the current chunk, check
+ if they continue the previous scatter entry.
+ */
+ for (j = 0; j < chunk->nmap; ++j) {
+ u64 next_block_start =
+ sg_dma_address(&chunk->page_list[j]);
+ u64 current_block_end = current_block_start
+ + current_block_len;
+ /* If we have a split (non-contig.) between two block*/
+ if (current_block_end != next_block_start) {
+ block_shift = mlx4_ib_umem_calc_block_mtt(
+ next_block_start,
+ current_block_end,
+ block_shift);
- if (cur_size > HPAGE_SIZE) {
- err = -EINVAL;
- goto out;
- } else if (cur_size == HPAGE_SIZE) {
- cur_size = 0;
- arr[j++] = cur_addr;
+ /* If we reached the minimum shift for 4k
+ page we stop the loop.
+ */
+ if (block_shift <= min_shift)
+ goto end;
+
+ /* If not saved yet we are in first block -
+ we save the length of first block to
+ calculate the non_aligned_pages number at
+ * the end.
+ */
+ total_len += current_block_len;
+
+ /* Start a new block */
+ current_block_start = next_block_start;
+ current_block_len =
+ sg_dma_len(&chunk->page_list[j]);
+ continue;
}
+ /* The scatter entry is another part of
+ the current block, increase the block size
+ * An entry in the scatter can be larger than
+ 4k (page) as of dma mapping
+ which merge some blocks together.
+ */
+ current_block_len +=
+ sg_dma_len(&chunk->page_list[j]);
}
+ }
- if (cur_size) {
- arr[j++] = cur_addr;
+ /* Account for the last block in the total len */
+ total_len += current_block_len;
+ /* Add to the first block the misalignment that it suffers from.*/
+ total_len += (first_block_start & ((1ULL<<block_shift)-1ULL));
+ last_block_end = current_block_start+current_block_len;
+ last_block_aligned_end = round_up(last_block_end, 1<<block_shift);
+ total_len += (last_block_aligned_end - last_block_end);
+
+ WARN((total_len & ((1ULL<<block_shift)-1ULL)),
+ " misaligned total length detected (%llu, %llu)!",
+ (long long)total_len, (long long)block_shift);
+
+ *num_of_mtts = total_len >> block_shift;
+end:
+ if (block_shift < min_shift) {
+ /* If shift is less than the min we set a WARN and
+ return the min shift.
+ */
+ WARN(1,
+ "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
+ (long long)block_shift);
+
+ block_shift = min_shift;
}
+ return block_shift;
+}
- err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, umem->length,
- convert_access(access_flags), n, HPAGE_SHIFT, &mr->mmr);
- if (err)
- goto out;
+#ifdef __linux__
+static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
+{
+ struct proc_dir_entry *mr_proc_entry;
+ mode_t mode = S_IFREG;
+ char name_buff[16];
- err = mlx4_write_mtt(dev->dev, &mr->mmr.mtt, 0, n, arr);
+ mode |= convert_shared_access(access_flags);
+ sprintf(name_buff, "%X", mr_id);
+ mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL);
+ mr->smr_info->mr_id = mr_id;
+ mr->smr_info->umem = mr->umem;
-out:
- kfree(arr);
- return err;
-#else
- return -ENOSYS;
-#endif
+ mr_proc_entry = proc_create_data(name_buff, mode,
+ mlx4_mrs_dir_entry,
+ &shared_mr_proc_ops,
+ mr->smr_info);
+
+ if (!mr_proc_entry) {
+ pr_err("prepare_shared_mr failed via proc\n");
+ kfree(mr->smr_info);
+ return -ENODEV;
+ }
+
+ current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid));
+ mr_proc_entry->size = mr->umem->length;
+ return 0;
+
}
+static int is_shared_mr(int access_flags)
+{
+ /* We should check whether IB_ACCESS_SHARED_MR_USER_READ or
+ other shared bits were turned on.
+ */
+ return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ |
+ IB_ACCESS_SHARED_MR_USER_WRITE |
+ IB_ACCESS_SHARED_MR_GROUP_READ |
+ IB_ACCESS_SHARED_MR_GROUP_WRITE |
+ IB_ACCESS_SHARED_MR_OTHER_READ |
+ IB_ACCESS_SHARED_MR_OTHER_WRITE));
+}
+#endif
+
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+ struct ib_udata *udata,
+ int mr_id)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
@@ -193,38 +488,49 @@
int err;
int n;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
+ access_flags, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
- if (!mr->umem->hugetlb ||
- handle_hugetlb_user_mr(pd, mr, start, virt_addr, access_flags)) {
- n = ib_umem_page_count(mr->umem);
- shift = ilog2(mr->umem->page_size);
+ n = ib_umem_page_count(mr->umem);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
+ &n);
+ err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
+ convert_access(access_flags), n, shift, &mr->mmr);
+ if (err)
+ goto err_umem;
- err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
- convert_access(access_flags), n, shift, &mr->mmr);
- if (err)
- goto err_umem;
+ err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
+ if (err)
+ goto err_mr;
- err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
- if (err)
- goto err_mr;
- }
-
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+#ifdef __linux__
+ /* Check whether MR should be shared */
+ if (is_shared_mr(access_flags)) {
+ /* start address and length must be aligned to page size in order
+ to map a full page and preventing leakage of data */
+ if (mr->umem->offset || (length & ~PAGE_MASK)) {
+ err = -EINVAL;
+ goto err_mr;
+ }
+ err = prepare_shared_mr(mr, access_flags, mr_id);
+ if (err)
+ goto err_mr;
+ }
+#endif
return &mr->ibmr;
err_mr:
@@ -239,13 +545,36 @@
return ERR_PTR(err);
}
+
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
+ if (mr->smr_info) {
+ /* When master/parent shared mr is dereged there is
+ no ability to share this mr any more - its mr_id will be
+ returned to the kernel as part of ib_uverbs_dereg_mr
+ and may be allocated again as part of other reg_mr.
+ */
+ char name_buff[16];
+
+ sprintf(name_buff, "%X", mr->smr_info->mr_id);
+ /* Remove proc entry is checking internally that no operation
+ was strated on that proc fs file and if in the middle
+ current process will wait till end of operation.
+ That's why no sync mechanism is needed when we release
+ below the shared umem.
+ */
+#ifdef __linux__
+ remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
+ kfree(mr->smr_info);
+#endif
+ }
+
if (mr->umem)
ib_umem_release(mr->umem);
+
kfree(mr);
return 0;
@@ -258,7 +587,7 @@
struct mlx4_ib_mr *mr;
int err;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -291,7 +620,7 @@
struct mlx4_ib_fast_reg_page_list *mfrpl;
int size = page_list_len * sizeof (u64);
- if (page_list_len > MAX_FAST_REG_PAGES)
+ if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
return ERR_PTR(-EINVAL);
mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
@@ -403,7 +732,7 @@
err = mlx4_SYNC_TPT(mdev);
if (err)
- printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when "
+ pr_warn("SYNC_TPT error %d when "
"unmapping FMRs\n", err);
return 0;
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/qp.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/qp.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -32,15 +32,23 @@
*/
#include <linux/log2.h>
+#include <linux/slab.h>
#include <linux/netdevice.h>
+#include <linux/bitops.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
#include <linux/mlx4/qp.h>
+#include <linux/mlx4/driver.h>
#include <linux/io.h>
+#ifndef __linux__
+#define asm __asm
+#endif
+
#include "mlx4_ib.h"
#include "user.h"
@@ -52,27 +60,24 @@
MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
MLX4_IB_LINK_TYPE_IB = 0,
- MLX4_IB_LINK_TYPE_ETH = 1,
+ MLX4_IB_LINK_TYPE_ETH = 1
};
enum {
/*
- * Largest possible UD header: send with GRH and immediate data.
- * 4 bytes added to accommodate for eth header instead of lrh
+ * Largest possible UD header: send with GRH and immediate
+ * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
+ * tag. (LRH would only use 8 bytes, so Ethernet is the
+ * biggest case)
*/
- MLX4_IB_UD_HEADER_SIZE = 76,
- MLX4_IB_MAX_RAW_ETY_HDR_SIZE = 12
+ MLX4_IB_UD_HEADER_SIZE = 82,
+ MLX4_IB_LSO_HEADER_SPARE = 128,
};
enum {
- MLX4_IBOE_ETHERTYPE = 0x8915
+ MLX4_IB_IBOE_ETHERTYPE = 0x8915
};
-struct mlx4_ib_xrc_reg_entry {
- struct list_head list;
- void *context;
-};
-
struct mlx4_ib_sqp {
struct mlx4_ib_qp qp;
int pkey_index;
@@ -83,9 +88,15 @@
};
enum {
- MLX4_IB_MIN_SQ_STRIDE = 6
+ MLX4_IB_MIN_SQ_STRIDE = 6,
+ MLX4_IB_CACHE_LINE_SIZE = 64,
};
+enum {
+ MLX4_RAW_QP_MTU = 7,
+ MLX4_RAW_QP_MSGMAX = 31,
+};
+
static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
[IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
@@ -104,32 +115,77 @@
#ifndef wc_wmb
#if defined(__i386__)
- #define wc_wmb() __asm volatile("lock; addl $0,0(%%esp) " ::: "memory")
+ #define wc_wmb() asm volatile("lock; addl $0,0(%%esp) " ::: "memory")
#elif defined(__x86_64__)
- #define wc_wmb() __asm volatile("sfence" ::: "memory")
+ #define wc_wmb() asm volatile("sfence" ::: "memory")
#elif defined(__ia64__)
- #define wc_wmb() __asm volatile("fwb" ::: "memory")
+ #define wc_wmb() asm volatile("fwb" ::: "memory")
#else
#define wc_wmb() wmb()
#endif
#endif
-
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
{
return container_of(mqp, struct mlx4_ib_sqp, qp);
}
+static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+
+ return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
+ qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
+ 8 * MLX4_MFUNC_MAX;
+}
+
static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
- return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
- qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
+ int proxy_sqp = 0;
+ int real_sqp = 0;
+ int i;
+ /* PPF or Native -- real SQP */
+ real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
+ qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
+ qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
+ if (real_sqp)
+ return 1;
+ /* VF or PF -- proxy SQP */
+ if (mlx4_is_mfunc(dev->dev)) {
+ for (i = 0; i < dev->dev->caps.num_ports; i++) {
+ if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
+ qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
+ proxy_sqp = 1;
+ break;
+ }
+ }
+ }
+ return proxy_sqp;
}
+/* used for INIT/CLOSE port logic */
static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
- return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
- qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
+ int proxy_qp0 = 0;
+ int real_qp0 = 0;
+ int i;
+ /* PPF or Native -- real QP0 */
+ real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
+ qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
+ qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
+ if (real_qp0)
+ return 1;
+ /* VF or PF -- proxy QP0 */
+ if (mlx4_is_mfunc(dev->dev)) {
+ for (i = 0; i < dev->dev->caps.num_ports; i++) {
+ if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
+ proxy_qp0 = 1;
+ break;
+ }
+ }
+ }
+ return proxy_qp0;
}
static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
@@ -207,7 +263,7 @@
/* Pad the remainder of the WQE with an inline data segment. */
if (size > s) {
inl = wqe + s;
- inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
+ inl->byte_count = cpu_to_be32(1U << 31 | (size - s - sizeof *inl));
}
ctrl->srcrb_flags = 0;
ctrl->fence_size = size / 16;
@@ -237,10 +293,7 @@
static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
{
struct ib_event event;
- struct mlx4_ib_qp *mqp = to_mibqp(qp);
- struct ib_qp *ibqp = &mqp->ibqp;
- struct mlx4_ib_xrc_reg_entry *ctx_entry;
- unsigned long flags;
+ struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
if (type == MLX4_EVENT_TYPE_PATH_MIG)
to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
@@ -247,6 +300,7 @@
if (ibqp->event_handler) {
event.device = ibqp->device;
+ event.element.qp = ibqp;
switch (type) {
case MLX4_EVENT_TYPE_PATH_MIG:
event.event = IB_EVENT_PATH_MIG;
@@ -273,27 +327,16 @@
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
- printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ pr_warn("Unexpected event type %d "
"on QP %06x\n", type, qp->qpn);
return;
}
- if (unlikely(ibqp->qp_type == IB_QPT_XRC &&
- mqp->flags & MLX4_IB_XRC_RCV)) {
- event.event |= IB_XRC_QP_EVENT_FLAG;
- event.element.xrc_qp_num = ibqp->qp_num;
- spin_lock_irqsave(&mqp->xrc_reg_list_lock, flags);
- list_for_each_entry(ctx_entry, &mqp->xrc_reg_list, list)
- ibqp->event_handler(&event, ctx_entry->context);
- spin_unlock_irqrestore(&mqp->xrc_reg_list_lock, flags);
- return;
- }
- event.element.qp = ibqp;
ibqp->event_handler(&event, ibqp->qp_context);
}
}
-static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
+static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
{
/*
* UD WQEs must have a datagram segment.
@@ -302,20 +345,29 @@
* header and space for the ICRC).
*/
switch (type) {
- case IB_QPT_UD:
+ case MLX4_IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) +
- ((flags & MLX4_IB_QP_LSO) ? 128 : 0);
- case IB_QPT_UC:
+ ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
+ case MLX4_IB_QPT_PROXY_SMI_OWNER:
+ case MLX4_IB_QPT_PROXY_SMI:
+ case MLX4_IB_QPT_PROXY_GSI:
return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_datagram_seg) + 64;
+ case MLX4_IB_QPT_TUN_SMI_OWNER:
+ case MLX4_IB_QPT_TUN_GSI:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_datagram_seg);
+
+ case MLX4_IB_QPT_UC:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
- case IB_QPT_XRC:
- case IB_QPT_RC:
+ case MLX4_IB_QPT_RC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
- sizeof (struct mlx4_wqe_atomic_seg) +
+ sizeof (struct mlx4_wqe_masked_atomic_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
- case IB_QPT_SMI:
- case IB_QPT_GSI:
+ case MLX4_IB_QPT_SMI:
+ case MLX4_IB_QPT_GSI:
return sizeof (struct mlx4_wqe_ctrl_seg) +
ALIGN(MLX4_IB_UD_HEADER_SIZE +
DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
@@ -325,12 +377,6 @@
ALIGN(4 +
sizeof (struct mlx4_wqe_inline_seg),
sizeof (struct mlx4_wqe_data_seg));
- case IB_QPT_RAW_ETY:
- return sizeof(struct mlx4_wqe_ctrl_seg) +
- ALIGN(MLX4_IB_MAX_RAW_ETY_HDR_SIZE +
- sizeof(struct mlx4_wqe_inline_seg),
- sizeof(struct mlx4_wqe_data_seg));
-
default:
return sizeof (struct mlx4_wqe_ctrl_seg);
}
@@ -337,32 +383,22 @@
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- int is_user, int has_srq_or_is_xrc, struct mlx4_ib_qp *qp)
+ int is_user, int has_rq, struct mlx4_ib_qp *qp)
{
/* Sanity check RQ size before proceeding */
if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
- cap->max_recv_sge >
- min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) {
- mlx4_ib_dbg("Requested RQ size (sge or wr) too large");
+ cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
return -EINVAL;
- }
- if (has_srq_or_is_xrc) {
- /* QPs attached to an SRQ should have no RQ */
- if (cap->max_recv_wr) {
- mlx4_ib_dbg("non-zero RQ size for QP using SRQ");
+ if (!has_rq) {
+ if (cap->max_recv_wr)
return -EINVAL;
- }
qp->rq.wqe_cnt = qp->rq.max_gs = 0;
} else {
/* HW requires >= 1 RQ entry with >= 1 gather entry */
- if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
- mlx4_ib_dbg("user QP RQ has 0 wr's or 0 sge's "
- "(wr: 0x%x, sge: 0x%x)", cap->max_recv_wr,
- cap->max_recv_sge);
+ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
return -EINVAL;
- }
qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
@@ -378,45 +414,33 @@
min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
cap->max_recv_sge = min(qp->rq.max_gs,
min(dev->dev->caps.max_sq_sg,
- dev->dev->caps.max_rq_sg));
+ dev->dev->caps.max_rq_sg));
}
- /* We don't support inline sends for kernel QPs (yet) */
-
return 0;
}
static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- enum ib_qp_type type, struct mlx4_ib_qp *qp)
+ enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
{
int s;
/* Sanity check SQ size before proceeding */
- if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
- cap->max_send_sge >
- min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
+ if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
+ cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
- sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) {
- mlx4_ib_dbg("Requested SQ resources exceed device maxima");
+ sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
return -EINVAL;
- }
/*
* For MLX transport we need 2 extra S/G entries:
* one for the header and one for the checksum at the end
*/
- if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
- cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) {
- mlx4_ib_dbg("No space for SQP hdr/csum sge's");
+ if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
+ type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
+ cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
return -EINVAL;
- }
- if (type == IB_QPT_RAW_ETY &&
- cap->max_send_sge + 1 > dev->dev->caps.max_sq_sg) {
- mlx4_ib_dbg("No space for RAW ETY hdr");
- return -EINVAL;
- }
-
s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
send_wqe_overhead(type, qp->flags);
@@ -434,7 +458,7 @@
* anymore, so we do this only if selective signaling is off.
*
* Further, on 32-bit platforms, we can't use vmap() to make
- * the QP buffer virtually contigious. Thus we have to use
+ * the QP buffer virtually contiguous. Thus we have to use
* constant-sized WRs to make sure a WR is always fully within
* a single page-sized chunk.
*
@@ -457,7 +481,9 @@
*/
if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
qp->sq_signal_bits && BITS_PER_LONG == 64 &&
- type != IB_QPT_SMI && type != IB_QPT_GSI && type != IB_QPT_RAW_ETY)
+ type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
+ !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
qp->sq.wqe_shift = ilog2(64);
else
qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
@@ -516,10 +542,8 @@
if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
ucmd->log_sq_stride >
ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
- ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) {
- mlx4_ib_dbg("Requested max wqes or wqe stride exceeds max");
+ ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
return -EINVAL;
- }
qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
qp->sq.wqe_shift = ucmd->log_sq_stride;
@@ -530,30 +554,412 @@
return 0;
}
+static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
+{
+ int i;
+
+ qp->sqp_proxy_rcv =
+ kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
+ GFP_KERNEL);
+ if (!qp->sqp_proxy_rcv)
+ return -ENOMEM;
+ for (i = 0; i < qp->rq.wqe_cnt; i++) {
+ qp->sqp_proxy_rcv[i].addr =
+ kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ GFP_KERNEL);
+ if (!qp->sqp_proxy_rcv[i].addr)
+ goto err;
+ qp->sqp_proxy_rcv[i].map =
+ ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ }
+ return 0;
+
+err:
+ while (i > 0) {
+ --i;
+ ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ kfree(qp->sqp_proxy_rcv[i].addr);
+ }
+ kfree(qp->sqp_proxy_rcv);
+ qp->sqp_proxy_rcv = NULL;
+ return -ENOMEM;
+}
+
+static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
+{
+ int i;
+
+ for (i = 0; i < qp->rq.wqe_cnt; i++) {
+ ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ kfree(qp->sqp_proxy_rcv[i].addr);
+ }
+ kfree(qp->sqp_proxy_rcv);
+}
+
+static int qp_has_rq(struct ib_qp_init_attr *attr)
+{
+ if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
+ return 0;
+
+ return !attr->srq;
+}
+
+#ifdef __linux__
+static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp,
+ struct ib_qp_init_attr *attr, int *qpn)
+{
+ struct mlx4_ib_qpg_data *qpg_data;
+ int tss_num, rss_num;
+ int tss_align_num, rss_align_num;
+ int tss_base, rss_base = 0;
+ int err;
+
+ /* Parent is part of the TSS range (in SW TSS ARP is sent via parent) */
+ tss_num = 1 + attr->parent_attrib.tss_child_count;
+ tss_align_num = roundup_pow_of_two(tss_num);
+ rss_num = attr->parent_attrib.rss_child_count;
+ rss_align_num = roundup_pow_of_two(rss_num);
+
+ if (rss_num > 1) {
+ /* RSS is requested */
+ if (!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS))
+ return -ENOSYS;
+ if (rss_align_num > dev->dev->caps.max_rss_tbl_sz)
+ return -EINVAL;
+ /* We must work with power of two */
+ attr->parent_attrib.rss_child_count = rss_align_num;
+ }
+
+ qpg_data = kzalloc(sizeof *qpg_data, GFP_KERNEL);
+ if (!qpg_data)
+ return -ENOMEM;
+
+ if(pqp->flags & MLX4_IB_QP_NETIF)
+ err = mlx4_ib_steer_qp_alloc(dev, tss_align_num, &tss_base);
+ else
+ err = mlx4_qp_reserve_range(dev->dev, tss_align_num,
+ tss_align_num, &tss_base, 1);
+ if (err)
+ goto err1;
+
+ if (tss_num > 1) {
+ u32 alloc = BITS_TO_LONGS(tss_align_num) * sizeof(long);
+ qpg_data->tss_bitmap = kzalloc(alloc, GFP_KERNEL);
+ if (qpg_data->tss_bitmap == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+ bitmap_fill(qpg_data->tss_bitmap, tss_num);
+ /* Note parent takes first index */
+ clear_bit(0, qpg_data->tss_bitmap);
+ }
+
+ if (rss_num > 1) {
+ u32 alloc = BITS_TO_LONGS(rss_align_num) * sizeof(long);
+ err = mlx4_qp_reserve_range(dev->dev, rss_align_num,
+ 1, &rss_base, 0);
+ if (err)
+ goto err3;
+ qpg_data->rss_bitmap = kzalloc(alloc, GFP_KERNEL);
+ if (qpg_data->rss_bitmap == NULL) {
+ err = -ENOMEM;
+ goto err4;
+ }
+ bitmap_fill(qpg_data->rss_bitmap, rss_align_num);
+ }
+
+ qpg_data->tss_child_count = attr->parent_attrib.tss_child_count;
+ qpg_data->rss_child_count = attr->parent_attrib.rss_child_count;
+ qpg_data->qpg_parent = pqp;
+ qpg_data->qpg_tss_mask_sz = ilog2(tss_align_num);
+ qpg_data->tss_qpn_base = tss_base;
+ qpg_data->rss_qpn_base = rss_base;
+
+ pqp->qpg_data = qpg_data;
+ *qpn = tss_base;
+
+ return 0;
+
+err4:
+ mlx4_qp_release_range(dev->dev, rss_base, rss_align_num);
+
+err3:
+ if (tss_num > 1)
+ kfree(qpg_data->tss_bitmap);
+
+err2:
+ if(pqp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, tss_base, tss_align_num);
+ else
+ mlx4_qp_release_range(dev->dev, tss_base, tss_align_num);
+
+err1:
+ kfree(qpg_data);
+ return err;
+}
+
+static void free_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp)
+{
+ struct mlx4_ib_qpg_data *qpg_data = pqp->qpg_data;
+ int align_num;
+
+ if (qpg_data->tss_child_count > 1)
+ kfree(qpg_data->tss_bitmap);
+
+ align_num = roundup_pow_of_two(1 + qpg_data->tss_child_count);
+ if(pqp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qpg_data->tss_qpn_base, align_num);
+ else
+ mlx4_qp_release_range(dev->dev, qpg_data->tss_qpn_base, align_num);
+
+ if (qpg_data->rss_child_count > 1) {
+ kfree(qpg_data->rss_bitmap);
+ align_num = roundup_pow_of_two(qpg_data->rss_child_count);
+ mlx4_qp_release_range(dev->dev, qpg_data->rss_qpn_base,
+ align_num);
+ }
+
+ kfree(qpg_data);
+}
+
+static int alloc_qpg_qpn(struct ib_qp_init_attr *init_attr,
+ struct mlx4_ib_qp *pqp, int *qpn)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(init_attr->qpg_parent);
+ struct mlx4_ib_qpg_data *qpg_data = mqp->qpg_data;
+ u32 idx, old;
+
+ switch (init_attr->qpg_type) {
+ case IB_QPG_CHILD_TX:
+ if (qpg_data->tss_child_count == 0)
+ return -EINVAL;
+ do {
+ /* Parent took index 0 */
+ idx = find_first_bit(qpg_data->tss_bitmap,
+ qpg_data->tss_child_count + 1);
+ if (idx >= qpg_data->tss_child_count + 1)
+ return -ENOMEM;
+ old = test_and_clear_bit(idx, qpg_data->tss_bitmap);
+ } while (old == 0);
+ idx += qpg_data->tss_qpn_base;
+ break;
+ case IB_QPG_CHILD_RX:
+ if (qpg_data->rss_child_count == 0)
+ return -EINVAL;
+ do {
+ idx = find_first_bit(qpg_data->rss_bitmap,
+ qpg_data->rss_child_count);
+ if (idx >= qpg_data->rss_child_count)
+ return -ENOMEM;
+ old = test_and_clear_bit(idx, qpg_data->rss_bitmap);
+ } while (old == 0);
+ idx += qpg_data->rss_qpn_base;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pqp->qpg_data = qpg_data;
+ *qpn = idx;
+
+ return 0;
+}
+
+static void free_qpg_qpn(struct mlx4_ib_qp *mqp, int qpn)
+{
+ struct mlx4_ib_qpg_data *qpg_data = mqp->qpg_data;
+
+ switch (mqp->qpg_type) {
+ case IB_QPG_CHILD_TX:
+ /* Do range check */
+ qpn -= qpg_data->tss_qpn_base;
+ set_bit(qpn, qpg_data->tss_bitmap);
+ break;
+ case IB_QPG_CHILD_RX:
+ qpn -= qpg_data->rss_qpn_base;
+ set_bit(qpn, qpg_data->rss_bitmap);
+ break;
+ default:
+ /* error */
+ pr_warn("wrong qpg type (%d)\n", mqp->qpg_type);
+ break;
+ }
+}
+#endif
+
+static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *attr, int *qpn)
+{
+ int err = 0;
+
+ switch (attr->qpg_type) {
+ case IB_QPG_NONE:
+ /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
+ * BlueFlame setup flow wrongly causes VLAN insertion. */
+ if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 1);
+ } else {
+ if(qp->flags & MLX4_IB_QP_NETIF)
+ err = mlx4_ib_steer_qp_alloc(dev, 1, qpn);
+ else
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 0);
+ }
+ break;
+ case IB_QPG_PARENT:
+#ifdef __linux__
+ err = init_qpg_parent(dev, qp, attr, qpn);
+#endif
+ break;
+ case IB_QPG_CHILD_TX:
+ case IB_QPG_CHILD_RX:
+#ifdef __linux__
+ err = alloc_qpg_qpn(attr, qp, qpn);
+#endif
+ break;
+ default:
+ qp->qpg_type = IB_QPG_NONE;
+ err = -EINVAL;
+ break;
+ }
+ if (err)
+ return err;
+ qp->qpg_type = attr->qpg_type;
+ return 0;
+}
+
+static void free_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ enum ib_qpg_type qpg_type, int qpn)
+{
+ switch (qpg_type) {
+ case IB_QPG_NONE:
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qpn, 1);
+ else
+ mlx4_qp_release_range(dev->dev, qpn, 1);
+ break;
+ case IB_QPG_PARENT:
+#ifdef __linux__
+ free_qpg_parent(dev, qp);
+#endif
+ break;
+ case IB_QPG_CHILD_TX:
+ case IB_QPG_CHILD_RX:
+#ifdef __linux__
+ free_qpg_qpn(qp, qpn);
+#endif
+ break;
+ default:
+ break;
+ }
+}
+
+/* Revert allocation on create_qp_common */
+static void unalloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *attr, int qpn)
+{
+ free_qpn_common(dev, qp, attr->qpg_type, qpn);
+}
+
+static void release_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ free_qpn_common(dev, qp, qp->qpg_type, qp->mqp.qpn);
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
+ struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
{
int qpn;
int err;
+ struct mlx4_ib_sqp *sqp;
+ struct mlx4_ib_qp *qp;
+ enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
+#ifndef __linux__
+ init_attr->qpg_type = IB_QPG_NONE;
+#endif
+
+ /* When tunneling special qps, we use a plain UD qp */
+ if (sqpn) {
+ if (mlx4_is_mfunc(dev->dev) &&
+ (!mlx4_is_master(dev->dev) ||
+ !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
+ if (init_attr->qp_type == IB_QPT_GSI)
+ qp_type = MLX4_IB_QPT_PROXY_GSI;
+ else if (mlx4_is_master(dev->dev))
+ qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+ else
+ qp_type = MLX4_IB_QPT_PROXY_SMI;
+ }
+ qpn = sqpn;
+ /* add extra sg entry for tunneling */
+ init_attr->cap.max_recv_sge++;
+ } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
+ struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
+ container_of(init_attr,
+ struct mlx4_ib_qp_tunnel_init_attr, init_attr);
+ if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
+ tnl_init->proxy_qp_type != IB_QPT_GSI) ||
+ !mlx4_is_master(dev->dev))
+ return -EINVAL;
+ if (tnl_init->proxy_qp_type == IB_QPT_GSI)
+ qp_type = MLX4_IB_QPT_TUN_GSI;
+ else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
+ qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
+ else
+ qp_type = MLX4_IB_QPT_TUN_SMI;
+ /* we are definitely in the PPF here, since we are creating
+ * tunnel QPs. base_tunnel_sqpn is therefore valid. */
+ qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
+ + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
+ sqpn = qpn;
+ }
+
+ if (!*caller_qp) {
+ if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
+ (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
+ sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
+ if (!sqp)
+ return -ENOMEM;
+ qp = &sqp->qp;
+ qp->pri.vid = qp->alt.vid = 0xFFFF;
+ } else {
+ qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
+ if (!qp)
+ return -ENOMEM;
+ qp->pri.vid = qp->alt.vid = 0xFFFF;
+ }
+ } else
+ qp = *caller_qp;
+
+ qp->mlx4_ib_qp_type = qp_type;
+
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
- spin_lock_init(&qp->xrc_reg_list_lock);
INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
+ INIT_LIST_HEAD(&qp->rules_list);
qp->state = IB_QPS_RESET;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
- !!init_attr->srq || !!init_attr->xrc_domain , qp);
+ err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
if (err)
goto err;
if (pd->uobject) {
struct mlx4_ib_create_qp ucmd;
+ int shift;
+ int n;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
err = -EFAULT;
@@ -570,30 +976,25 @@
qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
- mlx4_ib_dbg("ib_umem_get error (%d)", err);
goto err;
}
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
- ilog2(qp->umem->page_size), &qp->mtt);
- if (err) {
- mlx4_ib_dbg("mlx4_mtt_init error (%d)", err);
+ n = ib_umem_page_count(qp->umem);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+
+ if (err)
goto err_buf;
- }
err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
- if (err) {
- mlx4_ib_dbg("mlx4_ib_umem_write_mtt error (%d)", err);
+ if (err)
goto err_mtt;
- }
- if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC) {
+ if (qp_has_rq(init_attr)) {
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &qp->db);
- if (err) {
- mlx4_ib_dbg("mlx4_ib_db_map_user error (%d)", err);
+ if (err)
goto err_mtt;
- }
}
} else {
qp->sq_no_prefetch = 0;
@@ -604,11 +1005,17 @@
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
- err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP &&
+ dev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ !mlx4_is_mfunc(dev->dev))
+ qp->flags |= MLX4_IB_QP_NETIF;
+
+ err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
- if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC) {
+ if (qp_has_rq(init_attr)) {
err = mlx4_db_alloc(dev->dev, &qp->db, 0);
if (err)
goto err;
@@ -617,9 +1024,10 @@
}
if (qp->max_inline_data) {
- err = mlx4_bf_alloc(dev->dev, &qp->bf);
+ err = mlx4_bf_alloc(dev->dev, &qp->bf, 0);
if (err) {
- mlx4_ib_dbg("failed to allocate blue flame register (%d)", err);
+ pr_debug("failed to allocate blue flame"
+ " register (%d)", err);
qp->bf.uar = &dev->priv_uar;
}
} else
@@ -632,16 +1040,12 @@
err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
&qp->mtt);
- if (err) {
- mlx4_ib_dbg("kernel qp mlx4_mtt_init error (%d)", err);
+ if (err)
goto err_buf;
- }
err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
- if (err) {
- mlx4_ib_dbg("mlx4_buf_write_mtt error (%d)", err);
+ if (err)
goto err_mtt;
- }
qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
@@ -653,11 +1057,17 @@
}
if (sqpn) {
- qpn = sqpn;
+ if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+ if (alloc_proxy_bufs(pd->device, qp)) {
+ err = -ENOMEM;
+ goto err_wrid;
+ }
+ }
} else {
- err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
+ err = alloc_qpn_common(dev, qp, init_attr, &qpn);
if (err)
- goto err_wrid;
+ goto err_proxy;
}
err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
@@ -664,7 +1074,7 @@
if (err)
goto err_qpn;
- if (init_attr->qp_type == IB_QPT_XRC)
+ if (init_attr->qp_type == IB_QPT_XRC_TGT)
qp->mqp.qpn |= (1 << 23);
/*
@@ -675,18 +1085,20 @@
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
qp->mqp.event = mlx4_ib_qp_event;
-
+ if (!*caller_qp)
+ *caller_qp = qp;
return 0;
err_qpn:
- if (!sqpn)
- mlx4_qp_release_range(dev->dev, qpn, 1);
+ unalloc_qpn_common(dev, qp, init_attr, qpn);
+err_proxy:
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
+ free_proxy_bufs(pd->device, qp);
err_wrid:
if (pd->uobject) {
- if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC)
- mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context),
- &qp->db);
+ if (qp_has_rq(init_attr))
+ mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
@@ -702,7 +1114,7 @@
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
err_db:
- if (!pd->uobject && !init_attr->srq && init_attr->qp_type != IB_QPT_XRC)
+ if (!pd->uobject && qp_has_rq(init_attr))
mlx4_db_free(dev->dev, &qp->db);
if (qp->max_inline_data)
@@ -709,6 +1121,8 @@
mlx4_bf_free(dev->dev, &qp->bf);
err:
+ if (!*caller_qp)
+ kfree(qp);
return err;
}
@@ -727,10 +1141,12 @@
}
static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ (void) __acquire(&recv_cq->lock);
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
@@ -740,10 +1156,12 @@
}
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __releases(&send_cq->lock) __releases(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
+ (void) __release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else {
@@ -754,7 +1172,7 @@
static void del_gid_entries(struct mlx4_ib_qp *qp)
{
- struct gid_entry *ge, *tmp;
+ struct mlx4_ib_gid_entry *ge, *tmp;
list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
list_del(&ge->list);
@@ -762,19 +1180,66 @@
}
}
+static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
+{
+ if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
+ return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
+ else
+ return to_mpd(qp->ibqp.pd);
+}
+
+static void get_cqs(struct mlx4_ib_qp *qp,
+ struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
+{
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_XRC_TGT:
+ *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
+ *recv_cq = *send_cq;
+ break;
+ case IB_QPT_XRC_INI:
+ *send_cq = to_mcq(qp->ibqp.send_cq);
+ *recv_cq = *send_cq;
+ break;
+ default:
+ *send_cq = to_mcq(qp->ibqp.send_cq);
+ *recv_cq = to_mcq(qp->ibqp.recv_cq);
+ break;
+ }
+}
+
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
int is_user)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
- if (qp->state != IB_QPS_RESET)
+ if (qp->state != IB_QPS_RESET) {
if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
- printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
+ pr_warn("modify QP %06x to RESET failed.\n",
qp->mqp.qpn);
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+ }
- send_cq = to_mcq(qp->ibqp.send_cq);
- recv_cq = to_mcq(qp->ibqp.recv_cq);
+ get_cqs(qp, &send_cq, &recv_cq);
mlx4_ib_lock_cqs(send_cq, recv_cq);
@@ -791,13 +1256,13 @@
mlx4_qp_free(dev->dev, &qp->mqp);
- if (!is_sqp(dev, qp))
- mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
+ release_qpn_common(dev, qp);
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
if (is_user) {
- if (!qp->ibqp.srq && qp->ibqp.qp_type != IB_QPT_XRC)
+ if (qp->rq.wqe_cnt)
mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
&qp->db);
ib_umem_release(qp->umem);
@@ -804,10 +1269,14 @@
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
+ if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
+ free_proxy_bufs(&dev->ib_dev, qp);
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
if (qp->max_inline_data)
mlx4_bf_free(dev->dev, &qp->bf);
- if (!qp->ibqp.srq && qp->ibqp.qp_type != IB_QPT_XRC)
+
+ if (qp->rq.wqe_cnt)
mlx4_db_free(dev->dev, &qp->db);
}
@@ -814,83 +1283,178 @@
del_gid_entries(qp);
}
+static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
+{
+ /* Native or PPF */
+ if (!mlx4_is_mfunc(dev->dev) ||
+ (mlx4_is_master(dev->dev) &&
+ attr->create_flags & MLX4_IB_SRIOV_SQP)) {
+ return dev->dev->phys_caps.base_sqpn +
+ (attr->qp_type == IB_QPT_SMI ? 0 : 2) +
+ attr->port_num - 1;
+ }
+ /* PF or VF -- creating proxies */
+ if (attr->qp_type == IB_QPT_SMI)
+ return dev->dev->caps.qp0_proxy[attr->port_num - 1];
+ else
+ return dev->dev->caps.qp1_proxy[attr->port_num - 1];
+}
+
+#ifdef __linux__
+static int check_qpg_attr(struct mlx4_ib_dev *dev,
+ struct ib_qp_init_attr *attr)
+{
+ if (attr->qpg_type == IB_QPG_NONE)
+ return 0;
+
+ if (attr->qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ if (attr->qpg_type == IB_QPG_PARENT) {
+ if (attr->parent_attrib.tss_child_count == 1)
+ return -EINVAL; /* Doesn't make sense */
+ if (attr->parent_attrib.rss_child_count == 1)
+ return -EINVAL; /* Doesn't make sense */
+ if ((attr->parent_attrib.tss_child_count == 0) &&
+ (attr->parent_attrib.rss_child_count == 0))
+ /* Should be called with IP_QPG_NONE */
+ return -EINVAL;
+ if (attr->parent_attrib.rss_child_count > 1) {
+ int rss_align_num;
+ if (!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS))
+ return -ENOSYS;
+ rss_align_num = roundup_pow_of_two(
+ attr->parent_attrib.rss_child_count);
+ if (rss_align_num > dev->dev->caps.max_rss_tbl_sz)
+ return -EINVAL;
+ }
+ } else {
+ struct mlx4_ib_qpg_data *qpg_data;
+ if (attr->qpg_parent == NULL)
+ return -EINVAL;
+ if (IS_ERR(attr->qpg_parent))
+ return -EINVAL;
+ qpg_data = to_mqp(attr->qpg_parent)->qpg_data;
+ if (qpg_data == NULL)
+ return -EINVAL;
+ if (attr->qpg_type == IB_QPG_CHILD_TX &&
+ !qpg_data->tss_child_count)
+ return -EINVAL;
+ if (attr->qpg_type == IB_QPG_CHILD_RX &&
+ !qpg_data->rss_child_count)
+ return -EINVAL;
+ }
+ return 0;
+}
+#endif
+
+#define RESERVED_FLAGS_MASK ((((unsigned int)IB_QP_CREATE_RESERVED_END - 1) | IB_QP_CREATE_RESERVED_END) \
+ & ~(IB_QP_CREATE_RESERVED_START - 1))
+
+static enum mlx4_ib_qp_flags to_mlx4_ib_qp_flags(enum ib_qp_create_flags ib_qp_flags)
+{
+ enum mlx4_ib_qp_flags mlx4_ib_qp_flags = 0;
+
+ if (ib_qp_flags & IB_QP_CREATE_IPOIB_UD_LSO)
+ mlx4_ib_qp_flags |= MLX4_IB_QP_LSO;
+
+ if (ib_qp_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ mlx4_ib_qp_flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
+ if (ib_qp_flags & IB_QP_CREATE_NETIF_QP)
+ mlx4_ib_qp_flags |= MLX4_IB_QP_NETIF;
+
+ /* reserved flags */
+ mlx4_ib_qp_flags |= (ib_qp_flags & RESERVED_FLAGS_MASK);
+
+ return mlx4_ib_qp_flags;
+}
+
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_sqp *sqp;
- struct mlx4_ib_qp *qp;
+ struct mlx4_ib_qp *qp = NULL;
int err;
+ u16 xrcdn = 0;
+ enum mlx4_ib_qp_flags mlx4_qp_flags = to_mlx4_ib_qp_flags(init_attr->create_flags);
+ struct ib_device *device;
+ /* see ib_core::ib_create_qp same handling */
+ device = pd ? pd->device : init_attr->xrcd->device;
/*
- * We only support LSO and multicast loopback blocking, and
- * only for kernel UD QPs.
+ * We only support LSO, vendor flag1, and multicast loopback blocking,
+ * and only for kernel UD QPs.
*/
- if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+ if (mlx4_qp_flags & ~(MLX4_IB_QP_LSO |
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
+ MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_NETIF))
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ if (init_attr->qp_type != IB_QPT_UD)
+ return ERR_PTR(-EINVAL);
+ }
+
if (init_attr->create_flags &&
- (pd->uobject || init_attr->qp_type != IB_QPT_UD))
+ (udata ||
+ ((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) &&
+ init_attr->qp_type != IB_QPT_UD) ||
+ ((mlx4_qp_flags & MLX4_IB_SRIOV_SQP) &&
+ init_attr->qp_type > IB_QPT_GSI)))
return ERR_PTR(-EINVAL);
+#ifdef __linux__
+ err = check_qpg_attr(to_mdev(device), init_attr);
+ if (err)
+ return ERR_PTR(err);
+#endif
+
switch (init_attr->qp_type) {
- case IB_QPT_XRC:
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ case IB_QPT_XRC_TGT:
+ pd = to_mxrcd(init_attr->xrcd)->pd;
+ xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
+ init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
+ /* fall through */
+ case IB_QPT_XRC_INI:
+ if (!(to_mdev(device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
return ERR_PTR(-ENOSYS);
+ init_attr->recv_cq = init_attr->send_cq;
+ /* fall through */
case IB_QPT_RC:
case IB_QPT_UC:
- case IB_QPT_UD:
- case IB_QPT_RAW_ETH:
- {
+ case IB_QPT_RAW_PACKET:
qp = kzalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
-
- err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
+ qp->pri.vid = qp->alt.vid = 0xFFFF;
+ /* fall through */
+ case IB_QPT_UD:
+ {
+ err = create_qp_common(to_mdev(device), pd, init_attr, udata, 0, &qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
}
- if (init_attr->qp_type == IB_QPT_XRC)
- qp->xrcdn = to_mxrcd(init_attr->xrc_domain)->xrcdn;
- else
- qp->xrcdn = 0;
-
qp->ibqp.qp_num = qp->mqp.qpn;
+ qp->xrcdn = xrcdn;
break;
}
- case IB_QPT_RAW_ETY:
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY))
- return ERR_PTR(-ENOSYS);
case IB_QPT_SMI:
case IB_QPT_GSI:
{
/* Userspace is not allowed to create special QPs: */
- if (pd->uobject) {
- mlx4_ib_dbg("Userspace is not allowed to create special QPs");
+ if (udata)
return ERR_PTR(-EINVAL);
- }
- sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
- if (!sqp)
- return ERR_PTR(-ENOMEM);
-
- qp = &sqp->qp;
-
- err = create_qp_common(dev, pd, init_attr, udata,
- dev->dev->caps.sqp_start +
- (init_attr->qp_type == IB_QPT_RAW_ETY ? 4 :
- (init_attr->qp_type == IB_QPT_SMI ? 0 : 2)) +
- init_attr->port_num - 1,
- qp);
- if (err) {
- kfree(sqp);
+ err = create_qp_common(to_mdev(device), pd, init_attr, udata,
+ get_sqp_num(to_mdev(device), init_attr),
+ &qp);
+ if (err)
return ERR_PTR(err);
- }
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
@@ -898,8 +1462,7 @@
break;
}
default:
- mlx4_ib_dbg("Invalid QP type requested for create_qp (%d)",
- init_attr->qp_type);
+ /* Don't support raw QPs */
return ERR_PTR(-EINVAL);
}
@@ -910,11 +1473,13 @@
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
+ struct mlx4_ib_pd *pd;
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
- destroy_qp_common(dev, mqp, !!qp->pd->uobject);
+ pd = get_pd(mqp);
+ destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
if (is_sqp(dev, mqp))
kfree(to_msqp(mqp));
@@ -924,18 +1489,27 @@
return 0;
}
-static int to_mlx4_st(enum ib_qp_type type)
+static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
{
switch (type) {
- case IB_QPT_RC: return MLX4_QP_ST_RC;
- case IB_QPT_UC: return MLX4_QP_ST_UC;
- case IB_QPT_UD: return MLX4_QP_ST_UD;
- case IB_QPT_XRC: return MLX4_QP_ST_XRC;
- case IB_QPT_RAW_ETY:
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- case IB_QPT_RAW_ETH: return MLX4_QP_ST_MLX;
- default: return -1;
+ case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC;
+ case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC;
+ case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD;
+ case MLX4_IB_QPT_XRC_INI:
+ case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
+ case MLX4_IB_QPT_SMI:
+ case MLX4_IB_QPT_GSI:
+ case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
+
+ case MLX4_IB_QPT_PROXY_SMI_OWNER:
+ case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
+ MLX4_QP_ST_MLX : -1);
+ case MLX4_IB_QPT_PROXY_SMI:
+ case MLX4_IB_QPT_TUN_SMI:
+ case MLX4_IB_QPT_PROXY_GSI:
+ case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
+ MLX4_QP_ST_UD : -1);
+ default: return -1;
}
}
@@ -986,8 +1560,10 @@
}
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
- struct mlx4_qp_path *path, u8 port)
+ struct mlx4_ib_qp *qp, struct mlx4_qp_path *path,
+ u8 port, int is_primary)
{
+ struct net_device *ndev;
int err;
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET;
@@ -995,6 +1571,10 @@
int is_mcast;
u16 vlan_tag;
int vidx;
+ int smac_index;
+ u64 u64_mac;
+ u8 *smac;
+ struct mlx4_roce_smac_vlan_info *smac_info;
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
@@ -1008,7 +1588,7 @@
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
- printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+ pr_err("sgid_index (%u) too large. max is %d\n",
ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
return -1;
}
@@ -1023,29 +1603,96 @@
}
if (is_eth) {
- path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 0x7) << 3) | ((ah->sl & 8) >> 1);
-
if (!(ah->ah_flags & IB_AH_GRH))
return -1;
+ path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+ ((port - 1) << 6) | ((ah->sl & 7) << 3);
+
+ if (is_primary)
+ smac_info = &qp->pri;
+ else
+ smac_info = &qp->alt;
+
+ vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
+ if (vlan_tag < 0x1000) {
+ if (smac_info->vid < 0x1000) {
+ /* both valid vlan ids */
+ if (smac_info->vid != vlan_tag) {
+ /* different VIDs. unreg old and reg new */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ path->fl = 1 << 6;
+ } else {
+ path->vlan_index = smac_info->vlan_index;
+ path->fl = 1 << 6;
+ }
+ } else {
+ /* no current vlan tag in qp */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ path->fl = 1 << 6;
+ }
+ } else {
+ /* have current vlan tag. unregister it at modify-qp success */
+ if (smac_info->vid < 0x1000) {
+ smac_info->candidate_vid = 0xFFFF;
+ smac_info->update_vid = 1;
+ }
+ }
+
err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
if (err)
return err;
+ /* get smac_index for RoCE use.
+ * If no smac was yet assigned, register one.
+ * If one was already assigned, but the new mac differs,
+ * unregister the old one and register the new one.
+ */
+ spin_lock(&dev->iboe.lock);
+ ndev = dev->iboe.netdevs[port - 1];
+ if (ndev) {
+#ifdef __linux__
+ smac = ndev->dev_addr; /* fixme: cache this value */
+#else
+ smac = IF_LLADDR(ndev); /* fixme: cache this value */
+#endif
+
+ u64_mac = mlx4_mac_to_u64(smac);
+ } else
+ u64_mac = dev->dev->caps.def_mac[port];
+ spin_unlock(&dev->iboe.lock);
+
+ if (!smac_info->smac || smac_info->smac != u64_mac) {
+ /* register candidate now, unreg if needed, after success */
+ smac_index = mlx4_register_mac(dev->dev, port, u64_mac);
+ if (smac_index >= 0) {
+ smac_info->candidate_smac_index = smac_index;
+ smac_info->candidate_smac = u64_mac;
+ smac_info->candidate_smac_port = port;
+ } else
+ return -EINVAL;
+ } else
+ smac_index = smac_info->smac_index;
+
memcpy(path->dmac, mac, 6);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
- /* use index 0 into MAC table for IBoE */
- path->grh_mylmc &= 0x80;
+ /* put MAC table smac index for IBoE */
+ path->grh_mylmc = (u8) (smac_index) | 0x80 ;
- vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
- if (vlan_tag < 0x1000) {
- if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
- return -ENOENT;
-
- path->vlan_index = vidx;
- path->fl = 1 << 6;
- }
} else
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);
@@ -1055,7 +1702,7 @@
static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
- struct gid_entry *ge, *tmp;
+ struct mlx4_ib_gid_entry *ge, *tmp;
list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
@@ -1065,6 +1712,38 @@
}
}
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ struct mlx4_qp_context *context)
+{
+ struct net_device *ndev;
+ u64 u64_mac;
+ u8 *smac;
+ int smac_index;
+
+ ndev = dev->iboe.netdevs[qp->port - 1];
+ if (ndev) {
+#ifdef __linux__
+ smac = ndev->dev_addr; /* fixme: cache this value */
+#else
+ smac = IF_LLADDR(ndev); /* fixme: cache this value */
+#endif
+ u64_mac = mlx4_mac_to_u64(smac);
+ } else
+ u64_mac = dev->dev->caps.def_mac[qp->port];
+
+ context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
+ if (!qp->pri.smac) {
+ smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
+ if (smac_index >= 0) {
+ qp->pri.candidate_smac_index = smac_index;
+ qp->pri.candidate_smac = u64_mac;
+ qp->pri.candidate_smac_port = qp->port;
+ context->pri_path.grh_mylmc = 0x80 | (u8) smac_index;
+ } else
+ return -ENOENT;
+ }
+ return 0;
+}
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1071,10 +1750,14 @@
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct mlx4_ib_pd *pd;
+ struct mlx4_ib_cq *send_cq, *recv_cq;
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
int sqd_event;
+ int steer_qp = 0;
int err = -EINVAL;
+ int is_eth = -1;
context = kzalloc(sizeof *context, GFP_KERNEL);
if (!context)
@@ -1081,7 +1764,7 @@
return -ENOMEM;
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
- (to_mlx4_st(ibqp->qp_type) << 16));
+ (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1099,11 +1782,11 @@
break;
}
}
- if (ibqp->qp_type == IB_QPT_RAW_ETH)
- context->mtu_msgmax = 0xff;
- else if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
- ibqp->qp_type == IB_QPT_RAW_ETY)
+
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+ else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
else if (ibqp->qp_type == IB_QPT_UD) {
if (qp->flags & MLX4_IB_QP_LSO)
context->mtu_msgmax = (IB_MTU_4096 << 5) |
@@ -1112,7 +1795,7 @@
context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
} else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
- printk(KERN_ERR "path MTU (%u) is invalid\n",
+ pr_err("path MTU (%u) is invalid\n",
attr->path_mtu);
goto out;
}
@@ -1130,8 +1813,8 @@
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
- if (ibqp->qp_type == IB_QPT_XRC)
- context->xrcd = cpu_to_be32((u32) qp->xrcdn);
+ context->xrcd = cpu_to_be32((u32) qp->xrcdn);
+ context->param3 |= cpu_to_be32(1 << 30);
}
if (qp->ibqp.uobject)
@@ -1150,24 +1833,33 @@
}
}
- if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR &&
- dev->counters[qp->port - 1] != -1) {
- context->pri_path.counter_index = dev->counters[qp->port - 1];
- optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ if (dev->counters[qp->port - 1] != -1) {
+ context->pri_path.counter_index =
+ dev->counters[qp->port - 1];
+ optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+ } else
+ context->pri_path.counter_index = 0xff;
+
+ if (qp->flags & MLX4_IB_QP_NETIF &&
+ (qp->qpg_type == IB_QPG_NONE || qp->qpg_type == IB_QPG_PARENT)) {
+ mlx4_ib_steer_qp_reg(dev, qp, 1);
+ steer_qp = 1;
+ }
}
if (attr_mask & IB_QP_PKEY_INDEX) {
+ if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
+ context->pri_path.disable_pkey_check = 0x40;
context->pri_path.pkey_index = attr->pkey_index;
optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
}
if (attr_mask & IB_QP_AV) {
- if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
- attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) {
- mlx4_ib_dbg("qpn 0x%x: could not set pri path params",
- ibqp->qp_num);
+ if (mlx4_set_path(dev, &attr->ah_attr, qp, &context->pri_path,
+ attr_mask & IB_QP_PORT ?
+ attr->port_num : qp->port, 1))
goto out;
- }
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
@@ -1174,31 +1866,22 @@
}
if (attr_mask & IB_QP_TIMEOUT) {
- context->pri_path.ackto |= (attr->timeout << 3);
+ context->pri_path.ackto |= attr->timeout << 3;
optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
}
if (attr_mask & IB_QP_ALT_PATH) {
if (attr->alt_port_num == 0 ||
- attr->alt_port_num > dev->num_ports) {
- mlx4_ib_dbg("qpn 0x%x: invalid alternate port num (%d)",
- ibqp->qp_num, attr->alt_port_num);
+ attr->alt_port_num > dev->dev->caps.num_ports)
goto out;
- }
if (attr->alt_pkey_index >=
- dev->dev->caps.pkey_table_len[attr->alt_port_num]) {
- mlx4_ib_dbg("qpn 0x%x: invalid alt pkey index (0x%x)",
- ibqp->qp_num, attr->alt_pkey_index);
+ dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
- }
- if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
- attr->alt_port_num)) {
- mlx4_ib_dbg("qpn 0x%x: could not set alt path params",
- ibqp->qp_num);
+ if (mlx4_set_path(dev, &attr->alt_ah_attr, qp, &context->alt_path,
+ attr->alt_port_num, 0))
goto out;
- }
context->alt_path.pkey_index = attr->alt_pkey_index;
context->alt_path.ackto = attr->alt_timeout << 3;
@@ -1205,8 +1888,12 @@
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
- context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
- context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+ pd = get_pd(qp);
+ get_cqs(qp, &send_cq, &recv_cq);
+ context->pd = cpu_to_be32(pd->pdn);
+ context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
+ context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
+ context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
/* Set "fast registration enabled" for all kernel QPs */
if (!qp->ibqp.uobject)
@@ -1232,8 +1919,6 @@
if (attr_mask & IB_QP_SQ_PSN)
context->next_send_psn = cpu_to_be32(attr->sq_psn);
- context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
-
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
if (attr->max_dest_rd_atomic)
context->params2 |=
@@ -1246,6 +1931,18 @@
optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
}
+ if (attr_mask & IB_M_EXT_CLASS_1)
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_MASTER);
+
+ /* for now we enable also sqe on send */
+ if (attr_mask & IB_M_EXT_CLASS_2) {
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_SQ);
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_MASTER);
+ }
+
+ if (attr_mask & IB_M_EXT_CLASS_3)
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_RQ);
+
if (ibqp->srq)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
@@ -1256,10 +1953,24 @@
if (attr_mask & IB_QP_RQ_PSN)
context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
- context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
-
+ /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */
if (attr_mask & IB_QP_QKEY) {
- context->qkey = cpu_to_be32(attr->qkey);
+ if (qp->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
+ context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
+ else {
+ if (mlx4_is_mfunc(dev->dev) &&
+ !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
+ (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
+ MLX4_RESERVED_QKEY_BASE) {
+ pr_err("Cannot use reserved QKEY"
+ " 0x%x (range 0xffff0000..0xffffffff"
+ " is reserved)\n", attr->qkey);
+ err = -EINVAL;
+ goto out;
+ }
+ context->qkey = cpu_to_be32(attr->qkey);
+ }
optpar |= MLX4_QP_OPTPAR_Q_KEY;
}
@@ -1266,20 +1977,41 @@
if (ibqp->srq)
context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
- if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC &&
- cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
if (cur_state == IB_QPS_INIT &&
new_state == IB_QPS_RTR &&
(ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
- ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_RAW_ETY ||
- ibqp->qp_type == IB_QPT_RAW_ETH)) {
+ ibqp->qp_type == IB_QPT_UD ||
+ ibqp->qp_type == IB_QPT_RAW_PACKET)) {
context->pri_path.sched_queue = (qp->port - 1) << 6;
- if (is_qp0(dev, qp))
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
+ qp->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
- else
+ if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
+ context->pri_path.fl = 0x80;
+ } else {
+ if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
+ context->pri_path.fl = 0x80;
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
+ }
+ is_eth = rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET;
+ if (is_eth) {
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI)
+ context->pri_path.feup = 1 << 7; /* don't fsm */
+ /* handle smac_index */
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
+ err = handle_eth_ud_smac_index(dev, qp, context);
+ if (err)
+ return -EINVAL;
+ }
+ }
}
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
@@ -1291,6 +2023,43 @@
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->rlkey |= (1 << 4);
+ if ((attr_mask & IB_QP_GROUP_RSS) &&
+ (qp->qpg_data->rss_child_count > 1)) {
+ struct mlx4_ib_qpg_data *qpg_data = qp->qpg_data;
+ void *rss_context_base = &context->pri_path;
+ struct mlx4_rss_context *rss_context =
+ (struct mlx4_rss_context *) (rss_context_base
+ + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH);
+
+ context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET);
+
+ /* This should be tbl_sz_base_qpn */
+ rss_context->base_qpn = cpu_to_be32(qpg_data->rss_qpn_base |
+ (ilog2(qpg_data->rss_child_count) << 24));
+ rss_context->default_qpn = cpu_to_be32(qpg_data->rss_qpn_base);
+ /* This should be flags_hash_fn */
+ rss_context->flags = MLX4_RSS_TCP_IPV6 |
+ MLX4_RSS_TCP_IPV4;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS) {
+ rss_context->base_qpn_udp = rss_context->default_qpn;
+ rss_context->flags |= MLX4_RSS_IPV6 |
+ MLX4_RSS_IPV4 |
+ MLX4_RSS_UDP_IPV6 |
+ MLX4_RSS_UDP_IPV4;
+ }
+ if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP) {
+ static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B,
+ 0x1983A2FC, 0x943E1ADB, 0xD9389E6B, 0xD1039C2C,
+ 0xA74499AD, 0x593D56D9, 0xF3253C06, 0x2ADC1FFC};
+ rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+ memcpy(rss_context->rss_key, rsskey,
+ sizeof(rss_context->rss_key));
+ } else {
+ rss_context->hash_fn = MLX4_RSS_HASH_XOR;
+ memset(rss_context->rss_key, 0,
+ sizeof(rss_context->rss_key));
+ }
+ }
/*
* Before passing a kernel QP to the HW, make sure that the
* ownership bits of the send queue are set and the SQ
@@ -1333,6 +2102,29 @@
if (is_sqp(dev, qp))
store_sqp_attrs(to_msqp(qp), attr, attr_mask);
+ /* Set 'ignore_cq_overrun' bits for collectives offload */
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ if (attr_mask & (IB_M_EXT_CLASS_2 | IB_M_EXT_CLASS_3)) {
+ err = mlx4_ib_ignore_overrun_cq(ibqp->send_cq);
+ if (err) {
+ pr_err("Failed to set ignore CQ "
+ "overrun for QP 0x%x's send CQ\n",
+ ibqp->qp_num);
+ goto out;
+ }
+
+ if (ibqp->recv_cq != ibqp->send_cq) {
+ err = mlx4_ib_ignore_overrun_cq(ibqp->recv_cq);
+ if (err) {
+ pr_err("Failed to set ignore "
+ "CQ overrun for QP 0x%x's recv "
+ "CQ\n", ibqp->qp_num);
+ goto out;
+ }
+ }
+ }
+ }
+
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
* QP0 to RESET or ERROR, bring the link back down.
@@ -1340,7 +2132,7 @@
if (is_qp0(dev, qp)) {
if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
if (mlx4_INIT_PORT(dev->dev, qp->port))
- printk(KERN_WARNING "INIT_PORT failed for port %d\n",
+ pr_warn("INIT_PORT failed for port %d\n",
qp->port);
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
@@ -1352,23 +2144,120 @@
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
- ibqp->srq ? to_msrq(ibqp->srq): NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
+ if (new_state == IB_QPS_RESET) {
+ if (!ibqp->uobject) {
+ mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ if (send_cq != recv_cq)
+ mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
- qp->rq.head = 0;
- qp->rq.tail = 0;
- qp->sq.head = 0;
- qp->sq.tail = 0;
- qp->sq_next_wqe = 0;
- if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC)
- *qp->db.db = 0;
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->sq_next_wqe = 0;
+ if (qp->rq.wqe_cnt)
+ *qp->db.db = 0;
+
+ if (qp->flags & MLX4_IB_QP_NETIF &&
+ (qp->qpg_type == IB_QPG_NONE ||
+ qp->qpg_type == IB_QPG_PARENT))
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
+ }
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
}
out:
+ if (err && steer_qp)
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
+ if (qp->pri.candidate_smac) {
+ if (err)
+ mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
+ else {
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ }
+ qp->pri.smac = qp->pri.candidate_smac;
+ qp->pri.smac_index = qp->pri.candidate_smac_index;
+ qp->pri.smac_port = qp->pri.candidate_smac_port;
+
+ }
+ qp->pri.candidate_smac = 0;
+ qp->pri.candidate_smac_index = 0;
+ qp->pri.candidate_smac_port = 0;
+ }
+ if (qp->alt.candidate_smac) {
+ if (err)
+ mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->pri.candidate_smac);
+ else {
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ }
+ qp->alt.smac = qp->alt.candidate_smac;
+ qp->alt.smac_index = qp->alt.candidate_smac_index;
+ qp->alt.smac_port = qp->alt.candidate_smac_port;
+
+ }
+ qp->pri.candidate_smac = 0;
+ qp->pri.candidate_smac_index = 0;
+ qp->pri.candidate_smac_port = 0;
+ }
+
+ if (qp->pri.update_vid) {
+ if (err) {
+ if (qp->pri.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port,
+ qp->pri.candidate_vid);
+ } else {
+ if (qp->pri.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port,
+ qp->pri.vid);
+ qp->pri.vid = qp->pri.candidate_vid;
+ qp->pri.vlan_port = qp->pri.candidate_vlan_port;
+ qp->pri.vlan_index = qp->pri.candidate_vlan_index;
+ }
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+
+ if (qp->alt.update_vid) {
+ if (err) {
+ if (qp->alt.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port,
+ qp->alt.candidate_vid);
+ } else {
+ if (qp->alt.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port,
+ qp->alt.vid);
+ qp->alt.vid = qp->alt.candidate_vid;
+ qp->alt.vlan_port = qp->alt.candidate_vlan_port;
+ qp->alt.vlan_index = qp->alt.candidate_vlan_index;
+ }
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+
return err;
}
@@ -1385,40 +2274,43 @@
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
- mlx4_ib_dbg("qpn 0x%x: invalid attribute mask specified "
- "for transition %d to %d. qp_type %d, attr_mask 0x%x",
- ibqp->qp_num, cur_state, new_state,
- ibqp->qp_type, attr_mask);
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask & ~IB_M_QP_MOD_VEND_MASK)) {
+ pr_debug("qpn 0x%x: invalid attribute mask specified "
+ "for transition %d to %d. qp_type %d,"
+ " attr_mask 0x%x\n",
+ ibqp->qp_num, cur_state, new_state,
+ ibqp->qp_type, attr_mask);
goto out;
}
- if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type != IB_QPT_RAW_ETH) &&
- (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
- mlx4_ib_dbg("qpn 0x%x: invalid port number (%d) specified "
- "for transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->port_num, cur_state,
- new_state, ibqp->qp_type);
+ if ((attr_mask & IB_M_QP_MOD_VEND_MASK) && !dev->dev->caps.sync_qp) {
+ pr_err("extended verbs are not supported by %s\n",
+ dev->ib_dev.name);
goto out;
}
- if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_ETH) &&
- (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num)
- != IB_LINK_LAYER_ETHERNET)) {
- mlx4_ib_dbg("qpn 0x%x: invalid port (%d) specified (not RDMAoE)"
- "for transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->port_num, cur_state,
- new_state, ibqp->qp_type);
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
+ pr_debug("qpn 0x%x: invalid port number (%d) specified "
+ "for transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->port_num, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
+ if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
+ (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
+ IB_LINK_LAYER_ETHERNET))
+ goto out;
+
if (attr_mask & IB_QP_PKEY_INDEX) {
int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
- mlx4_ib_dbg("qpn 0x%x: invalid pkey index (%d) specified "
- "for transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->pkey_index, cur_state,
- new_state, ibqp->qp_type);
+ pr_debug("qpn 0x%x: invalid pkey index (%d) specified "
+ "for transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->pkey_index, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
}
@@ -1425,19 +2317,19 @@
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
- mlx4_ib_dbg("qpn 0x%x: max_rd_atomic (%d) too large. "
- "Transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->max_rd_atomic, cur_state,
- new_state, ibqp->qp_type);
+ pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. "
+ "Transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->max_rd_atomic, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
- mlx4_ib_dbg("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
- "Transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
- new_state, ibqp->qp_type);
+ pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
+ "Transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
@@ -1453,46 +2345,111 @@
return err;
}
-static int build_raw_ety_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
- void *wqe, unsigned *mlx_seg_len)
+static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
+ struct ib_send_wr *wr,
+ void *wqe, unsigned *mlx_seg_len)
{
- int payload = 0;
- int header_size, packet_length;
+ struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
+ struct ib_device *ib_dev = &mdev->ib_dev;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
- u32 *lrh = wqe + sizeof *mlx + sizeof *inl;
+ struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ u16 pkey;
+ u32 qkey;
+ int send_size;
+ int header_size;
+ int spc;
int i;
- /* Only IB_WR_SEND is supported */
if (wr->opcode != IB_WR_SEND)
return -EINVAL;
+ send_size = 0;
+
for (i = 0; i < wr->num_sge; ++i)
- payload += wr->sg_list[i].length;
+ send_size += wr->sg_list[i].length;
- header_size = IB_LRH_BYTES + 4; /* LRH + RAW_HEADER (32 bits) */
+ /* for proxy-qp0 sends, need to add in size of tunnel header */
+ /* for tunnel-qp0 sends, tunnel header is already in s/g list */
+ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
+ send_size += sizeof (struct mlx4_ib_tunnel_header);
- /* headers + payload and round up */
- packet_length = (header_size + payload + 3) / 4;
+ ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
+ sqp->ud_header.lrh.service_level =
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
+ sqp->ud_header.lrh.destination_lid =
+ cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+ sqp->ud_header.lrh.source_lid =
+ cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+ }
+
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_ICRC |
- (wr->wr.raw_ety.lrh->service_level << 8));
+ /* force loopback */
+ mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
+ mlx->rlid = sqp->ud_header.lrh.destination_lid;
- mlx->rlid = wr->wr.raw_ety.lrh->destination_lid;
+ sqp->ud_header.lrh.virtual_lane = 0;
+ sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+ sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
+ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ else
+ sqp->ud_header.bth.destination_qpn =
+ cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
- wr->wr.raw_ety.lrh->packet_length = cpu_to_be16(packet_length);
+ sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
- ib_lrh_header_pack(wr->wr.raw_ety.lrh, lrh);
- lrh += IB_LRH_BYTES / 4; /* LRH size is a dword multiple */
- *lrh = cpu_to_be32(wr->wr.raw_ety.eth_type);
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ sqp->ud_header.immediate_present = 0;
- inl->byte_count = cpu_to_be32(1 << 31 | header_size);
+ header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
+ /*
+ * Inline data segments may not cross a 64 byte boundary. If
+ * our UD header is bigger than the space available up to the
+ * next 64 byte boundary in the WQE, use two inline data
+ * segments to hold the UD header.
+ */
+ spc = MLX4_INLINE_ALIGN -
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
+ if (header_size <= spc) {
+ inl->byte_count = cpu_to_be32(1U << 31 | header_size);
+ memcpy(inl + 1, sqp->header_buf, header_size);
+ i = 1;
+ } else {
+ inl->byte_count = cpu_to_be32(1U << 31 | spc);
+ memcpy(inl + 1, sqp->header_buf, spc);
+
+ inl = (void *) (inl + 1) + spc;
+ memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
+ /*
+ * Need a barrier here to make sure all the data is
+ * visible before the byte_count field is set.
+ * Otherwise the HCA prefetcher could grab the 64-byte
+ * chunk with this inline segment and get a valid (!=
+ * 0xffffffff) byte count but stale data, and end up
+ * generating a packet with bad headers.
+ *
+ * The first inline segment's byte_count field doesn't
+ * need a barrier, because it comes after a
+ * control/MLX segment and therefore is at an offset
+ * of 16 mod 64.
+ */
+ wmb();
+ inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc));
+ i = 2;
+ }
+
*mlx_seg_len =
- ALIGN(sizeof(struct mlx4_wqe_inline_seg) + header_size, 16);
-
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
return 0;
}
@@ -1499,23 +2456,23 @@
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
+ struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
+ struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ union ib_gid sgid;
u16 pkey;
int send_size;
int header_size;
int spc;
int i;
- union ib_gid sgid;
int is_eth;
+ int is_vlan = 0;
int is_grh;
- int is_vlan = 0;
- int err;
- u16 vlan;
+ u16 vlan = 0;
+ int err = 0;
- vlan = 0;
send_size = 0;
for (i = 0; i < wr->num_sge; ++i)
send_size += wr->sg_list[i].length;
@@ -1522,16 +2479,29 @@
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
- err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sgid);
- if (err)
- return err;
if (is_eth) {
- is_vlan = rdma_get_vlan_id(&sgid) < 0x1000;
+ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ /* When multi-function is enabled, the ib_core gid
+ * indexes don't necessarily match the hw ones, so
+ * we must use our own cache */
+ err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &sgid.raw[0]);
+ if (err)
+ return err;
+ } else {
+ err = ib_get_cached_gid(ib_dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &sgid);
+ if (err)
+ return err;
+ }
+
vlan = rdma_get_vlan_id(&sgid);
+ is_vlan = vlan < 0x1000;
}
+ ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
- ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
if (!is_eth) {
sqp->ud_header.lrh.service_level =
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
@@ -1545,8 +2515,25 @@
sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
- ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid);
+ if (is_eth)
+ memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
+ else {
+ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ /* When multi-function is enabled, the ib_core gid
+ * indexes don't necessarily match the hw ones, so
+ * we must use our own cache */
+ sqp->ud_header.grh.source_gid.global.subnet_prefix =
+ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+ subnet_prefix;
+ sqp->ud_header.grh.source_gid.global.interface_id =
+ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+ guid_cache[ah->av.ib.gid_index];
+ } else
+ ib_get_cached_gid(ib_dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index,
+ &sqp->ud_header.grh.source_gid);
+ }
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
}
@@ -1558,16 +2545,18 @@
(sqp->ud_header.lrh.destination_lid ==
IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
(sqp->ud_header.lrh.service_level << 8));
+ if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
+ mlx->flags |= cpu_to_be32(0x1); /* force loopback */
mlx->rlid = sqp->ud_header.lrh.destination_lid;
}
switch (wr->opcode) {
case IB_WR_SEND:
- sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
break;
case IB_WR_SEND_WITH_IMM:
- sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
sqp->ud_header.immediate_data = wr->ex.imm_data;
break;
@@ -1576,24 +2565,26 @@
}
if (is_eth) {
- u8 *smac;
+ u8 smac[6];
+ struct in6_addr in6;
+ u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
+
+ mlx->sched_prio = cpu_to_be16(pcp);
+
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
-#ifdef __linux__
- smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr; /* fixme: cache this value */
-#else
- smac = IF_LLADDR(to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]); /* fixme: cache this value */
-#endif
+ /* FIXME: cache smac value? */
+ memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
+ memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
+ memcpy(&in6, sgid.raw, sizeof(in6));
+ rdma_get_ll_mac(&in6, smac);
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
- if (!is_vlan)
- sqp->ud_header.eth.type = cpu_to_be16(MLX4_IBOE_ETHERTYPE);
- else {
- u16 pcp;
-
- sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IBOE_ETHERTYPE);
- pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13;
+ if (!is_vlan) {
+ sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ } else {
+ sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
@@ -1616,16 +2607,16 @@
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
if (0) {
- printk(KERN_ERR "built UD header of size %d:\n", header_size);
+ pr_err("built UD header of size %d:\n", header_size);
for (i = 0; i < header_size / 4; ++i) {
if (i % 8 == 0)
- printk(" [%02x] ", i * 4);
- printk(" %08x",
- be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
+ pr_err(" [%02x] ", i * 4);
+ pr_cont(" %08x",
+ be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
if ((i + 1) % 8 == 0)
- printk("\n");
+ pr_cont("\n");
}
- printk("\n");
+ pr_err("\n");
}
/*
@@ -1635,13 +2626,13 @@
* segments to hold the UD header.
*/
spc = MLX4_INLINE_ALIGN -
- ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (header_size <= spc) {
- inl->byte_count = cpu_to_be32(1 << 31 | header_size);
+ inl->byte_count = cpu_to_be32(1U << 31 | header_size);
memcpy(inl + 1, sqp->header_buf, header_size);
i = 1;
} else {
- inl->byte_count = cpu_to_be32(1 << 31 | spc);
+ inl->byte_count = cpu_to_be32(1U << 31 | spc);
memcpy(inl + 1, sqp->header_buf, spc);
inl = (void *) (inl + 1) + spc;
@@ -1660,12 +2651,12 @@
* of 16 mod 64.
*/
wmb();
- inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
+ inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc));
i = 2;
}
*mlx_seg_len =
- ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
return 0;
}
@@ -1688,10 +2679,10 @@
static __be32 convert_access(int acc)
{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
}
@@ -1718,10 +2709,12 @@
static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
{
- iseg->flags = 0;
- iseg->mem_key = cpu_to_be32(rkey);
- iseg->guest_id = 0;
- iseg->pa = 0;
+ iseg->mem_key = cpu_to_be32(rkey);
+
+ iseg->reserved1 = 0;
+ iseg->reserved2 = 0;
+ iseg->reserved3[0] = 0;
+ iseg->reserved3[1] = 0;
}
static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
@@ -1757,7 +2750,7 @@
}
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr, __be16 *vlan)
+ struct ib_send_wr *wr)
{
memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
@@ -1764,9 +2757,65 @@
dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
- *vlan = dseg->vlan;
}
+static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
+ struct mlx4_wqe_datagram_seg *dseg,
+ struct ib_send_wr *wr, enum ib_qp_type qpt)
+{
+ union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
+ struct mlx4_av sqp_av = {0};
+ int port = *((u8 *) &av->ib.port_pd) & 0x3;
+
+ /* force loopback */
+ sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
+ sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
+ sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
+ cpu_to_be32(0xf0000000);
+
+ memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
+ /* This function used only for sending on QP1 proxies */
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ /* Use QKEY from the QP context, which is set by master */
+ dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
+}
+
+static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
+{
+ struct mlx4_wqe_inline_seg *inl = wqe;
+ struct mlx4_ib_tunnel_header hdr;
+ struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ int spc;
+ int i;
+
+ memcpy(&hdr.av, &ah->av, sizeof hdr.av);
+ hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
+ hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+
+ spc = MLX4_INLINE_ALIGN -
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
+ if (sizeof (hdr) <= spc) {
+ memcpy(inl + 1, &hdr, sizeof (hdr));
+ wmb();
+ inl->byte_count = cpu_to_be32(1U << 31 | sizeof (hdr));
+ i = 1;
+ } else {
+ memcpy(inl + 1, &hdr, spc);
+ wmb();
+ inl->byte_count = cpu_to_be32(1U << 31 | spc);
+
+ inl = (void *) (inl + 1) + spc;
+ memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
+ wmb();
+ inl->byte_count = cpu_to_be32(1U << 31 | (sizeof (hdr) - spc));
+ i = 2;
+ }
+
+ *mlx_seg_len =
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
+}
+
static void set_mlx_icrc_seg(void *dseg)
{
u32 *t = dseg;
@@ -1814,11 +2863,12 @@
static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
- __be32 *lso_hdr_sz, int *blh)
+ __be32 *lso_hdr_sz, __be32 *blh)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
- *blh = unlikely(halign > 64) ? 1 : 0;
+ if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+ *blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1847,6 +2897,13 @@
}
}
+static void add_zero_len_inline(void *wqe)
+{
+ struct mlx4_wqe_inline_seg *inl = wqe;
+ memset(wqe, 0, 16);
+ inl->byte_count = cpu_to_be32(1 << 31);
+}
+
static int lay_inline_data(struct mlx4_ib_qp *qp, struct ib_send_wr *wr,
void *wqe, int *sz)
{
@@ -1923,7 +2980,8 @@
* implementations may use move-string-buffer assembler instructions,
* which do not guarantee order of copying.
*/
-static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt)
+static void mlx4_bf_copy(unsigned long *dst, unsigned long *src,
+ unsigned bytecnt)
{
__iowrite64_copy(dst, src, bytecnt / 8);
}
@@ -1933,7 +2991,7 @@
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
void *wqe;
- struct mlx4_wqe_ctrl_seg *ctrl;
+ struct mlx4_wqe_ctrl_seg *uninitialized_var(ctrl);
struct mlx4_wqe_data_seg *dseg;
unsigned long flags;
int nreq;
@@ -1945,12 +3003,9 @@
__be32 dummy;
__be32 *lso_wqe;
__be32 uninitialized_var(lso_hdr_sz);
+ __be32 blh;
int i;
- int blh = 0;
- __be16 vlan = 0;
int inl = 0;
-
- ctrl = NULL;
spin_lock_irqsave(&qp->sq.lock, flags);
ind = qp->sq_next_wqe;
@@ -1957,9 +3012,9 @@
for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;
+ blh = 0;
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
- mlx4_ib_dbg("QP 0x%x: WQE overflow", ibqp->qp_num);
err = -ENOMEM;
*bad_wr = wr;
goto out;
@@ -1966,8 +3021,6 @@
}
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
- mlx4_ib_dbg("QP 0x%x: too many sg entries (%d)",
- ibqp->qp_num, wr->num_sge);
err = -EINVAL;
*bad_wr = wr;
goto out;
@@ -1992,13 +3045,9 @@
wqe += sizeof *ctrl;
size = sizeof *ctrl / 16;
- switch (ibqp->qp_type) {
- case IB_QPT_XRC:
- ctrl->srcrb_flags |=
- cpu_to_be32(wr->xrc_remote_srq_num << 8);
- /* fall thru */
- case IB_QPT_RC:
- case IB_QPT_UC:
+ switch (qp->mlx4_ib_qp_type) {
+ case MLX4_IB_QPT_RC:
+ case MLX4_IB_QPT_UC:
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
@@ -2059,10 +3108,28 @@
}
break;
- case IB_QPT_UD:
- set_datagram_seg(wqe, wr, &vlan);
+ case MLX4_IB_QPT_TUN_SMI_OWNER:
+ err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += seglen;
+ size += seglen / 16;
+ break;
+ case MLX4_IB_QPT_TUN_SMI:
+ case MLX4_IB_QPT_TUN_GSI:
+ /* this is a UD qp used in MAD responses to slaves. */
+ set_datagram_seg(wqe, wr);
+ /* set the forced-loopback bit in the data seg av */
+ *(__be32 *) wqe |= cpu_to_be32(0x80000000);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+ break;
+ case MLX4_IB_QPT_UD:
+ set_datagram_seg(wqe, wr);
+ wqe += sizeof (struct mlx4_wqe_datagram_seg);
+ size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
@@ -2076,9 +3143,13 @@
}
break;
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
+ case MLX4_IB_QPT_PROXY_SMI_OWNER:
+ if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
+ err = -ENOSYS;
+ *bad_wr = wr;
+ goto out;
+ }
+ err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -2085,11 +3156,35 @@
}
wqe += seglen;
size += seglen / 16;
+ /* to start tunnel header on a cache-line boundary */
+ add_zero_len_inline(wqe);
+ wqe += 16;
+ size++;
+ build_tunnel_header(wr, wqe, &seglen);
+ wqe += seglen;
+ size += seglen / 16;
break;
+ case MLX4_IB_QPT_PROXY_SMI:
+ /* don't allow QP0 sends on guests */
+ err = -ENOSYS;
+ *bad_wr = wr;
+ goto out;
+ case MLX4_IB_QPT_PROXY_GSI:
+ /* If we are tunneling special qps, this is a UD qp.
+ * In this case we first add a UD segment targeting
+ * the tunnel qp, and then add a header with address
+ * information */
+ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
+ wqe += sizeof (struct mlx4_wqe_datagram_seg);
+ size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+ build_tunnel_header(wr, wqe, &seglen);
+ wqe += seglen;
+ size += seglen / 16;
+ break;
- case IB_QPT_RAW_ETY:
- err = build_raw_ety_header(to_msqp(qp), wr, ctrl,
- &seglen);
+ case MLX4_IB_QPT_SMI:
+ case MLX4_IB_QPT_GSI:
+ err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -2108,13 +3203,14 @@
* cacheline. This avoids issues with WQE
* prefetching.
*/
-
dseg = wqe;
dseg += wr->num_sge - 1;
/* Add one more inline data segment for ICRC for MLX sends */
- if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)) {
+ if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
+ qp->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
set_mlx_icrc_seg(dseg + 1);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
@@ -2127,7 +3223,8 @@
size += sz;
}
} else {
- size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
+ size += wr->num_sge *
+ (sizeof(struct mlx4_wqe_data_seg) / 16);
for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
set_data_seg(dseg, wr->sg_list + i);
}
@@ -2139,15 +3236,9 @@
*/
wmb();
*lso_wqe = lso_hdr_sz;
-
ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size;
- if (vlan) {
- ctrl->ins_vlan = 1 << 6;
- ctrl->vlan_tag = vlan;
- }
-
/*
* Make sure descriptor is fully written before
* setting ownership bit (because HW can start
@@ -2155,14 +3246,14 @@
*/
wmb();
- if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
+ if (wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
+ *bad_wr = wr;
err = -EINVAL;
goto out;
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
- (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) |
- (blh ? cpu_to_be32(1 << 6) : 0);
+ (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
@@ -2185,6 +3276,9 @@
out:
if (nreq == 1 && inl && size > 1 && size < qp->bf.buf_size / 16) {
ctrl->owner_opcode |= htonl((qp->sq_next_wqe & 0xffff) << 8);
+ /* We set above doorbell_qpn bits to 0 as part of vlan
+ * tag initialization, so |= should be correct.
+ */
*(u32 *) (&ctrl->vlan_tag) |= qp->doorbell_qpn;
/*
* Make sure that descriptor is written to memory
@@ -2239,8 +3333,10 @@
int err = 0;
int nreq;
int ind;
+ int max_gs;
int i;
+ max_gs = qp->rq.max_gs;
spin_lock_irqsave(&qp->rq.lock, flags);
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
@@ -2247,7 +3343,6 @@
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
- mlx4_ib_dbg("QP 0x%x: WQE overflow", ibqp->qp_num);
err = -ENOMEM;
*bad_wr = wr;
goto out;
@@ -2254,8 +3349,6 @@
}
if (unlikely(wr->num_sge > qp->rq.max_gs)) {
- mlx4_ib_dbg("QP 0x%x: too many sg entries (%d)",
- ibqp->qp_num, wr->num_sge);
err = -EINVAL;
*bad_wr = wr;
goto out;
@@ -2263,10 +3356,25 @@
scat = get_recv_wqe(qp, ind);
+ if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+ ib_dma_sync_single_for_device(ibqp->device,
+ qp->sqp_proxy_rcv[ind].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ scat->byte_count =
+ cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
+ /* use dma lkey from upper layer entry */
+ scat->lkey = cpu_to_be32(wr->sg_list->lkey);
+ scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
+ scat++;
+ max_gs--;
+ }
+
for (i = 0; i < wr->num_sge; ++i)
__set_data_seg(scat + i, wr->sg_list + i);
- if (i < qp->rq.max_gs) {
+ if (i < max_gs) {
scat[i].byte_count = 0;
scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
scat[i].addr = 0;
@@ -2334,10 +3442,10 @@
return ib_flags;
}
-static void to_ib_ah_attr(struct mlx4_ib_dev *ib_dev, struct ib_ah_attr *ib_ah_attr,
- struct mlx4_qp_path *path)
+static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
+ struct mlx4_qp_path *path)
{
- struct mlx4_dev *dev = ib_dev->dev;
+ struct mlx4_dev *dev = ibdev->dev;
int is_eth;
memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
@@ -2346,7 +3454,7 @@
if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
return;
- is_eth = rdma_port_get_link_layer(&ib_dev->ib_dev, ib_ah_attr->port_num) ==
+ is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
IB_LINK_LAYER_ETHERNET;
if (is_eth)
ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
@@ -2355,7 +3463,6 @@
ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
ib_ah_attr->dlid = be16_to_cpu(path->rlid);
-
ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
@@ -2407,8 +3514,7 @@
qp_attr->qp_access_flags =
to_ib_qp_access_flags(be32_to_cpu(context.params2));
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_XRC) {
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
@@ -2463,308 +3569,21 @@
if (qp->flags & MLX4_IB_QP_LSO)
qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
-out:
- mutex_unlock(&qp->mutex);
- return err;
-}
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP;
-int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
- u32 *qp_num)
-{
- struct mlx4_ib_dev *dev = to_mdev(init_attr->xrc_domain->device);
- struct mlx4_ib_xrcd *xrcd = to_mxrcd(init_attr->xrc_domain);
- struct mlx4_ib_qp *qp;
- struct ib_qp *ibqp;
- struct mlx4_ib_xrc_reg_entry *ctx_entry;
- unsigned long flags;
- int err;
+ qp_init_attr->sq_sig_type =
+ qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
+ IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return -ENOSYS;
+ qp_init_attr->qpg_type = ibqp->qpg_type;
+ if (ibqp->qpg_type == IB_QPG_PARENT)
+ qp_init_attr->cap.qpg_tss_mask_sz = qp->qpg_data->qpg_tss_mask_sz;
+ else
+ qp_init_attr->cap.qpg_tss_mask_sz = 0;
- if (init_attr->qp_type != IB_QPT_XRC)
- return -EINVAL;
-
- ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL);
- if (!ctx_entry)
- return -ENOMEM;
-
- qp = kzalloc(sizeof *qp, GFP_KERNEL);
- if (!qp) {
- kfree(ctx_entry);
- return -ENOMEM;
- }
- mutex_lock(&dev->xrc_reg_mutex);
- qp->flags = MLX4_IB_XRC_RCV;
- qp->xrcdn = to_mxrcd(init_attr->xrc_domain)->xrcdn;
- INIT_LIST_HEAD(&qp->xrc_reg_list);
- err = create_qp_common(dev, xrcd->pd, init_attr, NULL, 0, qp);
- if (err) {
- mutex_unlock(&dev->xrc_reg_mutex);
- kfree(ctx_entry);
- kfree(qp);
- return err;
- }
-
- ibqp = &qp->ibqp;
- /* set the ibpq attributes which will be used by the mlx4 module */
- ibqp->qp_num = qp->mqp.qpn;
- ibqp->device = init_attr->xrc_domain->device;
- ibqp->pd = xrcd->pd;
- ibqp->send_cq = ibqp->recv_cq = xrcd->cq;
- ibqp->event_handler = init_attr->event_handler;
- ibqp->qp_context = init_attr->qp_context;
- ibqp->qp_type = init_attr->qp_type;
- ibqp->xrcd = init_attr->xrc_domain;
-
- mutex_lock(&qp->mutex);
- ctx_entry->context = init_attr->qp_context;
- spin_lock_irqsave(&qp->xrc_reg_list_lock, flags);
- list_add_tail(&ctx_entry->list, &qp->xrc_reg_list);
- spin_unlock_irqrestore(&qp->xrc_reg_list_lock, flags);
+out:
mutex_unlock(&qp->mutex);
- mutex_unlock(&dev->xrc_reg_mutex);
- *qp_num = qp->mqp.qpn;
- return 0;
-}
-
-int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
- struct ib_qp_attr *attr, int attr_mask)
-{
- struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
- struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
- struct mlx4_qp *mqp;
- struct mlx4_ib_qp *mibqp;
- int err = -EINVAL;
-
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return -ENOSYS;
-
- mutex_lock(&dev->xrc_reg_mutex);
- mqp = mlx4_qp_lookup_lock(dev->dev, qp_num);
- if (unlikely(!mqp)) {
- printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
- "unknown QPN %06x\n", qp_num);
- goto err_out;
- }
-
- mibqp = to_mibqp(mqp);
-
- if (!(mibqp->flags & MLX4_IB_XRC_RCV) || !mibqp->ibqp.xrcd ||
- xrcd->xrcdn != to_mxrcd(mibqp->ibqp.xrcd)->xrcdn)
- goto err_out;
-
- err = mlx4_ib_modify_qp(&mibqp->ibqp, attr, attr_mask, NULL);
- mutex_unlock(&dev->xrc_reg_mutex);
return err;
-
-err_out:
- mutex_unlock(&dev->xrc_reg_mutex);
- return err;
}
-int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
- struct ib_qp_attr *qp_attr, int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
- struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
- struct mlx4_ib_qp *qp;
- struct mlx4_qp *mqp;
- struct mlx4_qp_context context;
- int mlx4_state;
- int err = -EINVAL;
-
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return -ENOSYS;
-
- mutex_lock(&dev->xrc_reg_mutex);
- mqp = mlx4_qp_lookup_lock(dev->dev, qp_num);
- if (unlikely(!mqp)) {
- printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
- "unknown QPN %06x\n", qp_num);
- goto err_out;
- }
-
- qp = to_mibqp(mqp);
- if (!(qp->flags & MLX4_IB_XRC_RCV) || !(qp->ibqp.xrcd) ||
- xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn)
- goto err_out;
-
- if (qp->state == IB_QPS_RESET) {
- qp_attr->qp_state = IB_QPS_RESET;
- goto done;
- }
-
- err = mlx4_qp_query(dev->dev, mqp, &context);
- if (err)
- goto err_out;
-
- mlx4_state = be32_to_cpu(context.flags) >> 28;
-
- qp_attr->qp_state = to_ib_qp_state(mlx4_state);
- qp_attr->path_mtu = context.mtu_msgmax >> 5;
- qp_attr->path_mig_state =
- to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
- qp_attr->qkey = be32_to_cpu(context.qkey);
- qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
- qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff;
- qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff;
- qp_attr->qp_access_flags =
- to_ib_qp_access_flags(be32_to_cpu(context.params2));
-
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_XRC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr,
- &context.alt_path);
- qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
- }
-
- qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
- if (qp_attr->qp_state == IB_QPS_INIT)
- qp_attr->port_num = qp->port;
- else
- qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
-
- /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
- qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
-
- qp_attr->max_rd_atomic =
- 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
-
- qp_attr->max_dest_rd_atomic =
- 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
- qp_attr->min_rnr_timer =
- (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
- qp_attr->timeout = context.pri_path.ackto >> 3;
- qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
- qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
- qp_attr->alt_timeout = context.alt_path.ackto >> 3;
-
-done:
- qp_attr->cur_qp_state = qp_attr->qp_state;
- qp_attr->cap.max_recv_wr = 0;
- qp_attr->cap.max_recv_sge = 0;
- qp_attr->cap.max_send_wr = 0;
- qp_attr->cap.max_send_sge = 0;
- qp_attr->cap.max_inline_data = 0;
- qp_init_attr->cap = qp_attr->cap;
-
- mutex_unlock(&dev->xrc_reg_mutex);
- return 0;
-
-err_out:
- mutex_unlock(&dev->xrc_reg_mutex);
- return err;
-}
-
-int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
-{
-
- struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
-
- struct mlx4_qp *mqp;
- struct mlx4_ib_qp *mibqp;
- struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp;
- unsigned long flags;
- int err = -EINVAL;
-
- mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- mqp = mlx4_qp_lookup_lock(to_mdev(xrcd->device)->dev, qp_num);
- if (unlikely(!mqp)) {
- printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
- "unknown QPN %06x\n", qp_num);
- goto err_out;
- }
-
- mibqp = to_mibqp(mqp);
-
- if (!(mibqp->flags & MLX4_IB_XRC_RCV) || !(mibqp->ibqp.xrcd) ||
- mxrcd->xrcdn != to_mxrcd(mibqp->ibqp.xrcd)->xrcdn)
- goto err_out;
-
- ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL);
- if (!ctx_entry) {
- err = -ENOMEM;
- goto err_out;
- }
-
- mutex_lock(&mibqp->mutex);
- list_for_each_entry(tmp, &mibqp->xrc_reg_list, list)
- if (tmp->context == context) {
- mutex_unlock(&mibqp->mutex);
- kfree(ctx_entry);
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- return 0;
- }
-
- ctx_entry->context = context;
- spin_lock_irqsave(&mibqp->xrc_reg_list_lock, flags);
- list_add_tail(&ctx_entry->list, &mibqp->xrc_reg_list);
- spin_unlock_irqrestore(&mibqp->xrc_reg_list_lock, flags);
- mutex_unlock(&mibqp->mutex);
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- return 0;
-
-err_out:
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- return err;
-}
-
-int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
-{
-
- struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
-
- struct mlx4_qp *mqp;
- struct mlx4_ib_qp *mibqp;
- struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp;
- unsigned long flags;
- int found = 0;
- int err = -EINVAL;
-
- mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- mqp = mlx4_qp_lookup_lock(to_mdev(xrcd->device)->dev, qp_num);
- if (unlikely(!mqp)) {
- printk(KERN_WARNING "mlx4_ib_unreg_xrc_rcv_qp: "
- "unknown QPN %06x\n", qp_num);
- goto err_out;
- }
-
- mibqp = to_mibqp(mqp);
-
- if (!(mibqp->flags & MLX4_IB_XRC_RCV) ||
- mxrcd->xrcdn != (mibqp->xrcdn & 0xffff))
- goto err_out;
-
- mutex_lock(&mibqp->mutex);
- spin_lock_irqsave(&mibqp->xrc_reg_list_lock, flags);
- list_for_each_entry_safe(ctx_entry, tmp, &mibqp->xrc_reg_list, list)
- if (ctx_entry->context == context) {
- found = 1;
- list_del(&ctx_entry->list);
- spin_unlock_irqrestore(&mibqp->xrc_reg_list_lock, flags);
- kfree(ctx_entry);
- break;
- }
-
- if (!found)
- spin_unlock_irqrestore(&mibqp->xrc_reg_list_lock, flags);
- mutex_unlock(&mibqp->mutex);
- if (!found)
- goto err_out;
-
- /* destroy the QP if the registration list is empty */
- if (list_empty(&mibqp->xrc_reg_list))
- mlx4_ib_destroy_qp(&mibqp->ibqp);
-
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- return 0;
-
-err_out:
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- return err;
-}
-
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/srq.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/srq.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/srq.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -33,6 +33,7 @@
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
+#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
@@ -58,7 +59,7 @@
event.event = IB_EVENT_SRQ_ERR;
break;
default:
- printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ pr_warn("Unexpected event type %d "
"on SRQ %06x\n", type, srq->srqn);
return;
}
@@ -67,17 +68,16 @@
}
}
-struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
- struct ib_cq *xrc_cq,
- struct ib_xrcd *xrcd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_srq *srq;
struct mlx4_wqe_srq_next_seg *next;
- u32 cqn;
- u16 xrcdn;
+ struct mlx4_wqe_data_seg *scatter;
+ u32 cqn;
+ u16 xrcdn;
int desc_size;
int buf_size;
int err;
@@ -85,14 +85,10 @@
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
- init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) {
- mlx4_ib_dbg("a size param is out of range. "
- "max_wr = 0x%x, max_sge = 0x%x",
- init_attr->attr.max_wr, init_attr->attr.max_sge);
+ init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
return ERR_PTR(-EINVAL);
- }
- srq = kzalloc(sizeof *srq, GFP_KERNEL);
+ srq = kmalloc(sizeof *srq, GFP_KERNEL);
if (!srq)
return ERR_PTR(-ENOMEM);
@@ -138,8 +134,6 @@
if (err)
goto err_mtt;
} else {
- struct mlx4_wqe_data_seg *scatter;
-
err = mlx4_db_alloc(dev->dev, &srq->db, 0);
if (err)
goto err_srq;
@@ -182,10 +176,11 @@
}
}
- cqn = xrc_cq ? (u32) (to_mcq(xrc_cq)->mcq.cqn) : 0;
- xrcdn = xrcd ? (u16) (to_mxrcd(xrcd)->xrcdn) :
+ cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
+ to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
+ xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+ to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
(u16) dev->dev->caps.reserved_xrcds;
-
err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
srq->db.dma, &srq->msrq);
if (err)
@@ -192,14 +187,13 @@
goto err_wrid;
srq->msrq.event = mlx4_ib_srq_event;
+ srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject) {
+ if (pd->uobject)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
}
- } else
- srq->ibsrq.xrc_srq_num = srq->msrq.srqn;
init_attr->attr.max_wr = srq->msrq.max - 1;
@@ -238,16 +232,12 @@
int ret;
/* We don't support resizing SRQs (yet?) */
- if (attr_mask & IB_SRQ_MAX_WR) {
- mlx4_ib_dbg("resize not yet supported");
+ if (attr_mask & IB_SRQ_MAX_WR)
return -EINVAL;
- }
if (attr_mask & IB_SRQ_LIMIT) {
- if (attr->srq_limit >= srq->msrq.max){
- mlx4_ib_dbg("limit (0x%x) too high", attr->srq_limit);
+ if (attr->srq_limit >= srq->msrq.max)
return -EINVAL;
- }
mutex_lock(&srq->mutex);
ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
@@ -260,13 +250,6 @@
return 0;
}
-struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
-{
- return mlx4_ib_create_xrc_srq(pd, NULL, NULL, init_attr, udata);
-}
-
int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
{
struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
@@ -289,19 +272,7 @@
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
- struct mlx4_ib_cq *cq;
- mlx4_srq_invalidate(dev->dev, &msrq->msrq);
-
- if (srq->xrc_cq && !srq->uobject) {
- cq = to_mcq(srq->xrc_cq);
- spin_lock_irq(&cq->lock);
- __mlx4_ib_cq_clean(cq, -1, msrq);
- mlx4_srq_remove(dev->dev, &msrq->msrq);
- spin_unlock_irq(&cq->lock);
- } else
- mlx4_srq_remove(dev->dev, &msrq->msrq);
-
mlx4_srq_free(dev->dev, &msrq->msrq);
mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
@@ -349,8 +320,6 @@
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
- mlx4_ib_dbg("srq num 0x%x: num s/g entries too large (%d)",
- srq->msrq.srqn, wr->num_sge);
err = -EINVAL;
*bad_wr = wr;
break;
@@ -357,8 +326,6 @@
}
if (unlikely(srq->head == srq->tail)) {
- mlx4_ib_dbg("srq num 0x%x: No entries available to post.",
- srq->msrq.srqn);
err = -ENOMEM;
*bad_wr = wr;
break;
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/srq.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Added: trunk/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c (rev 0)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,801 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*#include "core_priv.h"*/
+#include "mlx4_ib.h"
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+
+#include <rdma/ib_mad.h>
+/*show_admin_alias_guid returns the administratively assigned value of that GUID.
+ * Values returned in buf parameter string:
+ * 0 - requests opensm to assign a value.
+ * ffffffffffffffff - delete this entry.
+ * other - value assigned by administrator.
+ */
+static ssize_t show_admin_alias_guid(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int record_num;/*0-15*/
+ int guid_index_in_rec; /*0 - 7*/
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+
+ record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
+ guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
+
+ return sprintf(buf, "%llx\n", (long long)
+ be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
+ ports_guid[port->num - 1].
+ all_rec_per_port[record_num].
+ all_recs[8 * guid_index_in_rec]));
+}
+
+/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
+ * Values in buf parameter string:
+ * 0 - requests opensm to assign a value.
+ * 0xffffffffffffffff - delete this entry.
+ * other - guid value assigned by the administrator.
+ */
+static ssize_t store_admin_alias_guid(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int record_num;/*0-15*/
+ int guid_index_in_rec; /*0 - 7*/
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+ u64 sysadmin_ag_val;
+
+ record_num = mlx4_ib_iov_dentry->entry_num / 8;
+ guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
+ if (0 == record_num && 0 == guid_index_in_rec) {
+ pr_err("GUID 0 block 0 is RO\n");
+ return count;
+ }
+ sscanf(buf, "%llx", &sysadmin_ag_val);
+ *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
+ all_rec_per_port[record_num].
+ all_recs[GUID_REC_SIZE * guid_index_in_rec] =
+ cpu_to_be64(sysadmin_ag_val);
+
+ /* Change the state to be pending for update */
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
+ = MLX4_GUID_INFO_STATUS_IDLE ;
+
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
+ = MLX4_GUID_INFO_RECORD_SET;
+
+ switch (sysadmin_ag_val) {
+ case MLX4_GUID_FOR_DELETE_VAL:
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
+ = MLX4_GUID_INFO_RECORD_DELETE;
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
+ = MLX4_GUID_SYSADMIN_ASSIGN;
+ break;
+ /* The sysadmin requests the SM to re-assign */
+ case MLX4_NOT_SET_GUID:
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
+ = MLX4_GUID_DRIVER_ASSIGN;
+ break;
+ /* The sysadmin requests a specific value.*/
+ default:
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
+ = MLX4_GUID_SYSADMIN_ASSIGN;
+ break;
+ }
+
+ /* set the record index */
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
+ = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
+
+ mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
+
+ return count;
+}
+
+static ssize_t show_port_gid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+ union ib_gid gid;
+ ssize_t ret;
+
+ ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
+ mlx4_ib_iov_dentry->entry_num, &gid, 1);
+ if (ret)
+ return ret;
+ ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(((__be16 *) gid.raw)[0]),
+ be16_to_cpu(((__be16 *) gid.raw)[1]),
+ be16_to_cpu(((__be16 *) gid.raw)[2]),
+ be16_to_cpu(((__be16 *) gid.raw)[3]),
+ be16_to_cpu(((__be16 *) gid.raw)[4]),
+ be16_to_cpu(((__be16 *) gid.raw)[5]),
+ be16_to_cpu(((__be16 *) gid.raw)[6]),
+ be16_to_cpu(((__be16 *) gid.raw)[7]));
+ return ret;
+}
+
+static ssize_t show_phys_port_pkey(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+ u16 pkey;
+ ssize_t ret;
+
+ ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num,
+ mlx4_ib_iov_dentry->entry_num, &pkey, 1);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%04x\n", pkey);
+}
+
+#define DENTRY_REMOVE(_dentry) \
+do { \
+ sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \
+} while (0);
+
+static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry,
+ char *_name, struct kobject *_kobj,
+ ssize_t (*show)(struct device *dev,
+ struct device_attribute *attr,
+ char *buf),
+ ssize_t (*store)(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+ )
+{
+ int ret = 0;
+ struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry;
+
+ vdentry->ctx = _ctx;
+ vdentry->dentry.show = show;
+ vdentry->dentry.store = store;
+ sysfs_attr_init(&vdentry->dentry.attr);
+ vdentry->dentry.attr.name = vdentry->name;
+ vdentry->dentry.attr.mode = 0;
+ vdentry->kobj = _kobj;
+ snprintf(vdentry->name, 15, "%s", _name);
+
+ if (vdentry->dentry.store)
+ vdentry->dentry.attr.mode |= S_IWUSR;
+
+ if (vdentry->dentry.show)
+ vdentry->dentry.attr.mode |= S_IRUGO;
+
+ ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr);
+ if (ret) {
+ pr_err("failed to create %s\n", vdentry->dentry.attr.name);
+ vdentry->ctx = NULL;
+ return ret;
+ }
+
+ return ret;
+}
+
+int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr)
+{
+ struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
+ int ret;
+
+ ret = sysfs_create_file(port->mcgs_parent, attr);
+ if (ret)
+ pr_err("failed to create %s\n", attr->name);
+
+ return ret;
+}
+
+void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr)
+{
+ struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
+
+ sysfs_remove_file(port->mcgs_parent, attr);
+}
+
+static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
+{
+ int i;
+ char buff[10];
+ struct mlx4_ib_iov_port *port = NULL;
+ int ret = 0 ;
+ struct ib_port_attr attr;
+
+ /* get the physical gid and pkey table sizes.*/
+ ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
+ if (ret)
+ goto err;
+
+ port = &device->iov_ports[port_num - 1];
+ port->dev = device;
+ port->num = port_num;
+ /* Directory structure:
+ * iov -
+ * port num -
+ * admin_guids
+ * gids (operational)
+ * mcg_table
+ */
+ port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar),
+ GFP_KERNEL);
+ if (!port->dentr_ar) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ sprintf(buff, "%d", port_num);
+ port->cur_port = kobject_create_and_add(buff,
+ kobject_get(device->ports_parent));
+ if (!port->cur_port) {
+ ret = -ENOMEM;
+ goto kobj_create_err;
+ }
+ /* admin GUIDs */
+ port->admin_alias_parent = kobject_create_and_add("admin_guids",
+ kobject_get(port->cur_port));
+ if (!port->admin_alias_parent) {
+ ret = -ENOMEM;
+ goto err_admin_guids;
+ }
+ for (i = 0 ; i < attr.gid_tbl_len; i++) {
+ sprintf(buff, "%d", i);
+ port->dentr_ar->dentries[i].entry_num = i;
+ ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i],
+ buff, port->admin_alias_parent,
+ show_admin_alias_guid, store_admin_alias_guid);
+ if (ret)
+ goto err_admin_alias_parent;
+ }
+
+ /* gids subdirectory (operational gids) */
+ port->gids_parent = kobject_create_and_add("gids",
+ kobject_get(port->cur_port));
+ if (!port->gids_parent) {
+ ret = -ENOMEM;
+ goto err_gids;
+ }
+
+ for (i = 0 ; i < attr.gid_tbl_len; i++) {
+ sprintf(buff, "%d", i);
+ port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i;
+ ret = create_sysfs_entry(port,
+ &port->dentr_ar->dentries[attr.gid_tbl_len + i],
+ buff,
+ port->gids_parent, show_port_gid, NULL);
+ if (ret)
+ goto err_gids_parent;
+ }
+
+ /* physical port pkey table */
+ port->pkeys_parent =
+ kobject_create_and_add("pkeys", kobject_get(port->cur_port));
+ if (!port->pkeys_parent) {
+ ret = -ENOMEM;
+ goto err_pkeys;
+ }
+
+ for (i = 0 ; i < attr.pkey_tbl_len; i++) {
+ sprintf(buff, "%d", i);
+ port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i;
+ ret = create_sysfs_entry(port,
+ &port->dentr_ar->dentries[2 * attr.gid_tbl_len + i],
+ buff, port->pkeys_parent,
+ show_phys_port_pkey, NULL);
+ if (ret)
+ goto err_pkeys_parent;
+ }
+
+ /* MCGs table */
+ port->mcgs_parent =
+ kobject_create_and_add("mcgs", kobject_get(port->cur_port));
+ if (!port->mcgs_parent) {
+ ret = -ENOMEM;
+ goto err_mcgs;
+ }
+ return 0;
+
+err_mcgs:
+ kobject_put(port->cur_port);
+
+err_pkeys_parent:
+ kobject_put(port->pkeys_parent);
+
+err_pkeys:
+ kobject_put(port->cur_port);
+
+err_gids_parent:
+ kobject_put(port->gids_parent);
+
+err_gids:
+ kobject_put(port->cur_port);
+
+err_admin_alias_parent:
+ kobject_put(port->admin_alias_parent);
+
+err_admin_guids:
+ kobject_put(port->cur_port);
+ kobject_put(port->cur_port); /* once more for create_and_add buff */
+
+kobj_create_err:
+ kobject_put(device->ports_parent);
+ kfree(port->dentr_ar);
+
+err:
+ pr_err("add_port_entries FAILED: for port:%d, error: %d\n",
+ port_num, ret);
+ return ret;
+}
+
+static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
+{
+ char base_name[9];
+
+ /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
+ strlcpy(name, pci_name(dev->dev->pdev), max);
+ strncpy(base_name, name, 8); /*till xxxx:yy:*/
+ base_name[8] = '\0';
+ /* with no ARI only 3 last bits are used so when the fn is higher than 8
+ * need to add it to the dev num, so count in the last number will be
+ * modulo 8 */
+ sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
+}
+
+struct mlx4_port {
+ struct kobject kobj;
+ struct mlx4_ib_dev *dev;
+ struct attribute_group pkey_group;
+ struct attribute_group gid_group;
+ u8 port_num;
+ int slave;
+};
+
+
+static void mlx4_port_release(struct kobject *kobj)
+{
+ struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
+ struct attribute *a;
+ int i;
+
+ for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+ kfree(a);
+ kfree(p->pkey_group.attrs);
+ for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+ kfree(a);
+ kfree(p->gid_group.attrs);
+ kfree(p);
+}
+
+struct port_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf);
+ ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
+ const char *buf, size_t count);
+};
+
+static ssize_t port_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
+
+ if (!port_attr->show)
+ return -EIO;
+ return port_attr->show(p, port_attr, buf);
+}
+
+static ssize_t port_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t size)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
+
+ if (!port_attr->store)
+ return -EIO;
+ return port_attr->store(p, port_attr, buf, size);
+}
+
+static const struct sysfs_ops port_sysfs_ops = {
+ .show = port_attr_show,
+ .store = port_attr_store,
+};
+
+static struct kobj_type port_type = {
+ .release = mlx4_port_release,
+ .sysfs_ops = &port_sysfs_ops,
+};
+
+struct port_table_attribute {
+ struct port_attribute attr;
+ char name[8];
+ int index;
+};
+
+static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ ssize_t ret = -ENODEV;
+
+ if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
+ (p->dev->dev->caps.pkey_table_len[p->port_num]))
+ ret = sprintf(buf, "none\n");
+ else
+ ret = sprintf(buf, "%d\n",
+ p->dev->pkeys.virt2phys_pkey[p->slave]
+ [p->port_num - 1][tab_attr->index]);
+ return ret;
+}
+
+static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int idx;
+ int err;
+
+ /* do not allow remapping Dom0 virtual pkey table */
+ if (p->slave == mlx4_master_func_num(p->dev->dev))
+ return -EINVAL;
+
+ if (!strncasecmp(buf, "no", 2))
+ idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1;
+ else if (sscanf(buf, "%i", &idx) != 1 ||
+ idx >= p->dev->dev->caps.pkey_table_len[p->port_num] ||
+ idx < 0)
+ return -EINVAL;
+
+ p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1]
+ [tab_attr->index] = idx;
+ mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num,
+ tab_attr->index, idx);
+ err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num);
+ if (err) {
+ pr_err("mlx4_gen_pkey_eqe failed for slave %d,"
+ " port %d, index %d\n", p->slave, p->port_num, idx);
+ return err;
+ }
+ return count;
+}
+
+static ssize_t show_port_gid_idx(struct mlx4_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", p->slave);
+}
+
+static struct attribute **
+alloc_group_attrs(ssize_t (*show)(struct mlx4_port *,
+ struct port_attribute *, char *buf),
+ ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
+ const char *buf, size_t count),
+ int len)
+{
+ struct attribute **tab_attr;
+ struct port_table_attribute *element;
+ int i;
+
+ tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL);
+ if (!tab_attr)
+ return NULL;
+
+ for (i = 0; i < len; i++) {
+ element = kzalloc(sizeof (struct port_table_attribute),
+ GFP_KERNEL);
+ if (!element)
+ goto err;
+ if (snprintf(element->name, sizeof (element->name),
+ "%d", i) >= sizeof (element->name)) {
+ kfree(element);
+ goto err;
+ }
+ sysfs_attr_init(&element->attr.attr);
+ element->attr.attr.name = element->name;
+ if (store) {
+ element->attr.attr.mode = S_IWUSR | S_IRUGO;
+ element->attr.store = store;
+ } else
+ element->attr.attr.mode = S_IRUGO;
+
+ element->attr.show = show;
+ element->index = i;
+ tab_attr[i] = &element->attr.attr;
+ }
+ return tab_attr;
+
+err:
+ while (--i >= 0)
+ kfree(tab_attr[i]);
+ kfree(tab_attr);
+ return NULL;
+}
+
+static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
+{
+ struct mlx4_port *p;
+ int i;
+ int ret;
+ int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+
+ p = kzalloc(sizeof *p, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->dev = dev;
+ p->port_num = port_num;
+ p->slave = slave;
+
+ ret = kobject_init_and_add(&p->kobj, &port_type,
+ kobject_get(dev->dev_ports_parent[slave]),
+ "%d", port_num);
+ if (ret)
+ goto err_alloc;
+
+ p->pkey_group.name = "pkey_idx";
+ if (is_eth)
+ p->pkey_group.attrs =
+ alloc_group_attrs(show_port_pkey, NULL,
+ dev->dev->caps.pkey_table_len[port_num]);
+ else
+ p->pkey_group.attrs =
+ alloc_group_attrs(show_port_pkey, store_port_pkey,
+ dev->dev->caps.pkey_table_len[port_num]);
+ if (!p->pkey_group.attrs)
+ goto err_alloc;
+
+ ret = sysfs_create_group(&p->kobj, &p->pkey_group);
+ if (ret)
+ goto err_free_pkey;
+
+ p->gid_group.name = "gid_idx";
+ p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
+ if (!p->gid_group.attrs)
+ goto err_free_pkey;
+
+ ret = sysfs_create_group(&p->kobj, &p->gid_group);
+ if (ret)
+ goto err_free_gid;
+
+ list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
+ return 0;
+
+err_free_gid:
+ kfree(p->gid_group.attrs[0]);
+ kfree(p->gid_group.attrs);
+
+err_free_pkey:
+ for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i)
+ kfree(p->pkey_group.attrs[i]);
+ kfree(p->pkey_group.attrs);
+
+err_alloc:
+ kobject_put(dev->dev_ports_parent[slave]);
+ kfree(p);
+ return ret;
+}
+
+static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
+{
+ char name[32];
+ int err;
+ int port;
+ struct kobject *p, *t;
+ struct mlx4_port *mport;
+
+ get_name(dev, name, slave, sizeof name);
+
+ dev->pkeys.device_parent[slave] =
+ kobject_create_and_add(name, kobject_get(dev->iov_parent));
+
+ if (!dev->pkeys.device_parent[slave]) {
+ err = -ENOMEM;
+ goto fail_dev;
+ }
+
+ INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]);
+
+ dev->dev_ports_parent[slave] =
+ kobject_create_and_add("ports",
+ kobject_get(dev->pkeys.device_parent[slave]));
+
+ if (!dev->dev_ports_parent[slave]) {
+ err = -ENOMEM;
+ goto err_ports;
+ }
+
+ for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
+ err = add_port(dev, port, slave);
+ if (err)
+ goto err_add;
+ }
+ return 0;
+
+err_add:
+ list_for_each_entry_safe(p, t,
+ &dev->pkeys.pkey_port_list[slave],
+ entry) {
+ list_del(&p->entry);
+ mport = container_of(p, struct mlx4_port, kobj);
+ sysfs_remove_group(p, &mport->pkey_group);
+ sysfs_remove_group(p, &mport->gid_group);
+ kobject_put(p);
+ }
+ kobject_put(dev->dev_ports_parent[slave]);
+
+err_ports:
+ kobject_put(dev->pkeys.device_parent[slave]);
+ /* extra put for the device_parent create_and_add */
+ kobject_put(dev->pkeys.device_parent[slave]);
+
+fail_dev:
+ kobject_put(dev->iov_parent);
+ return err;
+}
+
+static int register_pkey_tree(struct mlx4_ib_dev *device)
+{
+ int i;
+
+ if (!mlx4_is_master(device->dev))
+ return 0;
+
+ for (i = 0; i <= device->dev->num_vfs; ++i)
+ register_one_pkey_tree(device, i);
+
+ return 0;
+}
+
+static void unregister_pkey_tree(struct mlx4_ib_dev *device)
+{
+ int slave;
+ struct kobject *p, *t;
+ struct mlx4_port *port;
+
+ if (!mlx4_is_master(device->dev))
+ return;
+
+ for (slave = device->dev->num_vfs; slave >= 0; --slave) {
+ list_for_each_entry_safe(p, t,
+ &device->pkeys.pkey_port_list[slave],
+ entry) {
+ list_del(&p->entry);
+ port = container_of(p, struct mlx4_port, kobj);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ kobject_put(p);
+ kobject_put(device->dev_ports_parent[slave]);
+ }
+ kobject_put(device->dev_ports_parent[slave]);
+ kobject_put(device->pkeys.device_parent[slave]);
+ kobject_put(device->pkeys.device_parent[slave]);
+ kobject_put(device->iov_parent);
+ }
+}
+
+int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
+{
+ int i;
+ int ret = 0;
+
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+
+ dev->iov_parent =
+ kobject_create_and_add("iov",
+ kobject_get(dev->ib_dev.ports_parent->parent));
+ if (!dev->iov_parent) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dev->ports_parent =
+ kobject_create_and_add("ports",
+ kobject_get(dev->iov_parent));
+ if (!dev->iov_parent) {
+ ret = -ENOMEM;
+ goto err_ports;
+ }
+
+ for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
+ ret = add_port_entries(dev, i);
+ if (ret)
+ goto err_add_entries;
+ }
+
+ ret = register_pkey_tree(dev);
+ if (ret)
+ goto err_add_entries;
+ return 0;
+
+err_add_entries:
+ kobject_put(dev->ports_parent);
+
+err_ports:
+ kobject_put(dev->iov_parent);
+err:
+ kobject_put(dev->ib_dev.ports_parent->parent);
+ pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
+ return ret;
+}
+
+static void unregister_alias_guid_tree(struct mlx4_ib_dev *device)
+{
+ struct mlx4_ib_iov_port *p;
+ int i;
+
+ if (!mlx4_is_master(device->dev))
+ return;
+
+ for (i = 0; i < device->dev->caps.num_ports; i++) {
+ p = &device->iov_ports[i];
+ kobject_put(p->admin_alias_parent);
+ kobject_put(p->gids_parent);
+ kobject_put(p->pkeys_parent);
+ kobject_put(p->mcgs_parent);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->dev->ports_parent);
+ kfree(p->dentr_ar);
+ }
+}
+
+void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
+{
+ unregister_alias_guid_tree(device);
+ unregister_pkey_tree(device);
+ kobject_put(device->ports_parent);
+ kobject_put(device->iov_parent);
+ kobject_put(device->iov_parent);
+ kobject_put(device->ib_dev.ports_parent->parent);
+}
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/user.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/user.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/user.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -40,8 +40,10 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
-#define MLX4_IB_UVERBS_ABI_VERSION 3
+#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
+#define MLX4_IB_UVERBS_ABI_VERSION 4
+
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
@@ -50,10 +52,18 @@
* instead.
*/
+struct mlx4_ib_alloc_ucontext_resp_v3 {
+ __u32 qp_tab_size;
+ __u16 bf_reg_size;
+ __u16 bf_regs_per_page;
+};
+
struct mlx4_ib_alloc_ucontext_resp {
+ __u32 dev_caps;
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
+ __u32 cqe_size;
};
struct mlx4_ib_alloc_pd_resp {
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/user.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -71,4 +71,3 @@
}
#endif
-
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mlx4/wc.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/Kconfig
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/Kconfig 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/Kconfig 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/Kconfig
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/Makefile
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/Makefile 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/Makefile 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/Makefile
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -32,7 +32,6 @@
#include <linux/errno.h>
#include <linux/slab.h>
-#include <linux/bitmap.h>
#include "mthca_dev.h"
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_catas.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_catas.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_catas.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_catas.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1808,7 +1808,7 @@
case IB_QPT_RAW_IPV6:
op_mod = 2;
break;
- case IB_QPT_RAW_ETY:
+ case IB_QPT_RAW_ETHERTYPE:
op_mod = 3;
break;
default:
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -34,7 +34,7 @@
#ifndef MTHCA_CONFIG_REG_H
#define MTHCA_CONFIG_REG_H
-#include <asm/page.h>
+#include <linux/page.h>
#define MTHCA_HCR_BASE 0x80680
#define MTHCA_HCR_SIZE 0x0001c
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cq.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cq.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cq.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_cq.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_dev.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_dev.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_dev.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_dev.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_doorbell.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_doorbell.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_doorbell.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_doorbell.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_eq.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_eq.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_eq.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_eq.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mad.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mad.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mad.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mad.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -33,7 +33,6 @@
*/
#include <linux/module.h>
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
@@ -1325,7 +1324,7 @@
if (log_mtts_per_seg == 0)
log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) {
- printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %ld\n",
+ printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n",
log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8));
log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
}
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mcg.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mcg.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mcg.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mcg.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -36,7 +36,7 @@
#include <linux/scatterlist.h>
#include <linux/sched.h>
-#include <asm/page.h>
+#include <linux/page.h>
#include "mthca_memfree.h"
#include "mthca_dev.h"
@@ -448,6 +448,7 @@
page * MTHCA_ICM_PAGE_SIZE;
}
+
#include <vm/vm_map.h>
#include <vm/vm_pageout.h>
#include <vm/pmap.h>
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mr.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mr.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mr.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_mr.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_pd.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_pd.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_pd.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_pd.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -40,6 +40,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/fs.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -1006,7 +1007,7 @@
}
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
+ u64 virt, int acc, struct ib_udata *udata, int mr_id)
{
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
@@ -1402,7 +1403,7 @@
mutex_init(&dev->cap_mask_mutex);
- ret = ib_register_device(&dev->ib_dev);
+ ret = ib_register_device(&dev->ib_dev, NULL);
if (ret)
return ret;
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/delay.h>
@@ -44,9 +43,13 @@
int i;
int err = 0;
u32 *hca_header = NULL;
+#ifdef __linux__
u32 *bridge_header = NULL;
+#endif
struct pci_dev *bridge = NULL;
+#ifdef __linux__
int bridge_pcix_cap = 0;
+#endif
int hca_pcie_cap = 0;
int hca_pcix_cap = 0;
@@ -196,6 +199,7 @@
}
good:
+#ifdef __linux__
/* Now restore the PCI headers */
if (bridge) {
if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8,
@@ -236,6 +240,7 @@
goto out;
}
}
+#endif
if (hca_pcix_cap) {
if (pci_write_config_dword(mdev->pdev, hca_pcix_cap,
@@ -290,8 +295,8 @@
#ifdef __linux__
if (bridge)
pci_dev_put(bridge);
+ kfree(bridge_header);
#endif
- kfree(bridge_header);
kfree(hca_header);
return err;
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_srq.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_srq.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_srq.c 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_srq.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -30,7 +30,7 @@
* SOFTWARE.
*/
-#include <asm/page.h> /* PAGE_SHIFT */
+#include <linux/page.h>
#include "mthca_dev.h"
#include "mthca_memfree.h"
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_user.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_user.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_user.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_user.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_wqe.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_wqe.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_wqe.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/hw/mthca/mthca_wqe.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Kconfig
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Kconfig 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Kconfig 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Kconfig
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -67,8 +67,6 @@
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
#endif
#ifdef INET6
#include <netinet6/nd6.h>
@@ -111,7 +109,8 @@
IPOIB_ENCAP_LEN = 4,
IPOIB_HEADER_LEN = IPOIB_ENCAP_LEN + INFINIBAND_ALEN,
IPOIB_UD_MAX_MTU = 4 * 1024,
- IPOIB_UD_RX_SG = (IPOIB_UD_MAX_MTU / MJUMPAGESIZE),
+// IPOIB_UD_RX_SG = (IPOIB_UD_MAX_MTU / MJUMPAGESIZE),
+ IPOIB_UD_RX_SG = 2,
IPOIB_UD_TX_SG = (IPOIB_UD_MAX_MTU / MCLBYTES) + 2,
IPOIB_CM_MAX_MTU = (64 * 1024),
IPOIB_CM_TX_SG = (IPOIB_CM_MAX_MTU / MCLBYTES) + 2,
@@ -320,6 +319,8 @@
unsigned long flags;
+ int gone;
+
struct mutex vlan_mutex;
struct rb_root path_tree;
Property changes on: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -481,6 +481,8 @@
int has_srq;
u_short proto;
+ CURVNET_SET_QUIET(dev->if_vnet);
+
ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
wr_id, wc->status);
@@ -496,7 +498,7 @@
} else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
wr_id, ipoib_recvq_size);
- return;
+ goto done;
}
p = wc->qp->qp_context;
@@ -520,7 +522,7 @@
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
spin_unlock(&priv->lock);
}
- return;
+ goto done;
}
}
@@ -579,6 +581,9 @@
"for buf %d\n", wr_id);
}
}
+done:
+ CURVNET_RESTORE();
+ return;
}
static inline int post_send(struct ipoib_dev_priv *priv,
Modified: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -31,7 +31,6 @@
*/
#include <linux/kernel.h>
-#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include "ipoib.h"
Modified: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -40,7 +40,6 @@
#include <linux/module.h>
-#include <linux/init.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/vmalloc.h>
@@ -259,6 +258,10 @@
struct ifreq *ifr = (struct ifreq *) data;
int error = 0;
+ /* check if detaching */
+ if (priv == NULL || priv->gone != 0)
+ return (ENXIO);
+
switch (command) {
case SIOCSIFFLAGS:
if (ifp->if_flags & IFF_UP) {
@@ -795,6 +798,7 @@
dev = priv->dev;
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
+ priv->gone = 1;
bpfdetach(dev);
if_detach(dev);
if_free(dev);
@@ -1073,6 +1077,8 @@
if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND)
continue;
+ ipoib_stop(priv);
+
ib_unregister_event_handler(&priv->event_handler);
/* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */
@@ -1537,3 +1543,20 @@
module_init(ipoib_init_module);
module_exit(ipoib_cleanup_module);
+
+#undef MODULE_VERSION
+#include <sys/module.h>
+static int
+ipoib_evhand(module_t mod, int event, void *arg)
+{
+ return (0);
+}
+
+static moduledata_t ipoib_mod = {
+ .name = "ipoib",
+ .evhand = ipoib_evhand,
+};
+
+DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_SMP, SI_ORDER_ANY);
+MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
+
Modified: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -723,7 +723,7 @@
void ipoib_mcast_restart(struct ipoib_dev_priv *priv)
{
struct ifnet *dev = priv->dev;
- struct ifmultiaddr *ifma;;
+ struct ifmultiaddr *ifma;
struct ipoib_mcast *mcast, *tmcast;
LIST_HEAD(remove_list);
struct ib_sa_mcmember_rec rec;
Modified: trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -32,7 +32,6 @@
*/
#include "ipoib.h"
-#include <linux/ethtool.h>
int ipoib_mcast_attach(struct ipoib_dev_priv *priv, u16 mlid, union ib_gid *mgid, int set_qkey)
{
Index: trunk/sys/ofed/drivers/infiniband/ulp/sdp/Kconfig
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/sdp/Kconfig 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/sdp/Kconfig 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/ulp/sdp/Kconfig
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/drivers/infiniband/ulp/sdp/Makefile
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/sdp/Makefile 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/sdp/Makefile 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/ulp/sdp/Makefile
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -702,6 +702,7 @@
void sdp_rx_comp_full(struct sdp_sock *ssk);
/* sdp_zcopy.c */
+struct kiocb;
int sdp_sendmsg_zcopy(struct kiocb *iocb, struct socket *sk, struct iovec *iov);
int sdp_handle_srcavail(struct sdp_sock *ssk, struct sdp_srcah *srcah);
void sdp_handle_sendsm(struct sdp_sock *ssk, u32 mseq_ack);
Property changes on: trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -29,7 +29,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: sdp_bcopy.c,v 1.1.1.1 2012-07-21 15:17:36 laffer1 Exp $
+ * $Id$
*/
#include "sdp.h"
Modified: trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -29,7 +29,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: sdp_cma.c,v 1.1.1.1 2012-07-21 15:17:36 laffer1 Exp $
+ * $Id$
*/
#include "sdp.h"
Index: trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_dbg.h
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_dbg.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_dbg.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_dbg.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -85,7 +85,7 @@
#define SDP_LIST_RLOCK_ASSERT() rw_assert(&sdp_lock, RW_RLOCKED)
#define SDP_LIST_LOCK_ASSERT() rw_assert(&sdp_lock, RW_LOCKED)
-MALLOC_DEFINE(M_SDP, "sdp", "Socket Direct Protocol");
+static MALLOC_DEFINE(M_SDP, "sdp", "Socket Direct Protocol");
static void sdp_stop_keepalive_timer(struct socket *so);
@@ -1267,7 +1267,7 @@
/* Socket buffer is empty and we shall not block. */
if (sb->sb_cc == 0 &&
- ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
+ ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
error = EAGAIN;
goto out;
}
@@ -1297,7 +1297,7 @@
/* Socket buffer got some data that we shall deliver now. */
if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
- ((sb->sb_flags & SS_NBIO) ||
+ ((so->so_state & SS_NBIO) ||
(flags & (MSG_DONTWAIT|MSG_NBIO)) ||
sb->sb_cc >= sb->sb_lowat ||
sb->sb_cc >= uio->uio_resid ||
@@ -1878,7 +1878,7 @@
return (error);
}
-SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW, 0, "SDP");
+static SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW, 0, "SDP");
SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
CTLFLAG_RD | CTLTYPE_STRUCT, 0, 0, sdp_pcblist, "S,xtcpcb",
Modified: trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -590,7 +590,7 @@
if (unlikely(!ssk->poll_cq)) {
struct rdma_cm_id *id = ssk->id;
if (id && id->qp)
- rdma_notify(id, RDMA_CM_EVENT_ESTABLISHED);
+ rdma_notify(id, IB_EVENT_COMM_EST);
goto out;
}
Modified: trunk/sys/ofed/drivers/infiniband/util/madeye.c
===================================================================
--- trunk/sys/ofed/drivers/infiniband/util/madeye.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/infiniband/util/madeye.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -30,7 +30,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: madeye.c,v 1.1.1.1 2012-07-21 15:17:36 laffer1 Exp $
+ * $Id$
*/
#include <linux/module.h>
#include <linux/device.h>
Modified: trunk/sys/ofed/drivers/net/mlx4/Makefile
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/Makefile 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/Makefile 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,9 +1,33 @@
-obj-$(CONFIG_MLX4_CORE) += mlx4_core.o
+# $FreeBSD$
+#.PATH: ${.CURDIR}/../../ofed/drivers/net/mlx4:${.CURDIR}/../../ofed/include/linux
+.PATH: ${.CURDIR}/../../../../../include/linux
-mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
- mr.o pd.o port.o profile.o qp.o reset.o sense.o srq.o xrcd.o
+.include <bsd.own.mk>
-obj-$(CONFIG_MLX4_EN) += mlx4_en.o
-mlx4_en-y := en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \
- en_resources.o en_netdev.o en_frag.o en_selftest.o
+KMOD = mlx4
+SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
+SRCS+= alloc.c catas.c cmd.c cq.c eq.c fw.c icm.c intf.c main.c mcg.c mr.c linux_compat.c linux_radix.c
+SRCS+= pd.c port.c profile.c qp.c reset.c sense.c srq.c resource_tracker.c sys_tune.c
+SRCS+= opt_inet.h opt_inet6.h
+
+
+#CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4
+#CFLAGS+= -I${.CURDIR}/../../ofed/include/
+CFLAGS+= -I${.CURDIR}/../../../../../include
+
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ @echo "#define INET 1" > ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+ @echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
+.include <bsd.kmod.mk>
+
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions
Modified: trunk/sys/ofed/drivers/net/mlx4/alloc.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/alloc.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/alloc.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -34,7 +34,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/bitmap.h>
+#include <linux/module.h>
#include <linux/dma-mapping.h>
#include <linux/vmalloc.h>
@@ -70,14 +70,14 @@
return obj;
}
-void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj)
+void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr)
{
- mlx4_bitmap_free_range(bitmap, obj, 1);
+ mlx4_bitmap_free_range(bitmap, obj, 1, use_rr);
}
static unsigned long find_aligned_range(unsigned long *bitmap,
u32 start, u32 nbits,
- int len, int align)
+ int len, int align, u32 skip_mask)
{
unsigned long end, i;
@@ -84,7 +84,8 @@
again:
start = ALIGN(start, align);
- while ((start < nbits) && test_bit(start, bitmap))
+ while ((start < nbits) && (test_bit(start, bitmap) ||
+ (start & skip_mask)))
start += align;
if (start >= nbits)
@@ -95,7 +96,7 @@
return -1;
for (i = start + 1; i < end; i++) {
- if (test_bit(i, bitmap)) {
+ if (test_bit(i, bitmap) || ((u32)i & skip_mask)) {
start = i + 1;
goto again;
}
@@ -104,27 +105,27 @@
return start;
}
-u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align)
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
+ int align, u32 skip_mask)
{
- u32 obj, i;
+ u32 obj;
- if (likely(cnt == 1 && align == 1))
+ if (likely(cnt == 1 && align == 1 && !skip_mask))
return mlx4_bitmap_alloc(bitmap);
spin_lock(&bitmap->lock);
obj = find_aligned_range(bitmap->table, bitmap->last,
- bitmap->max, cnt, align);
+ bitmap->max, cnt, align, skip_mask);
if (obj >= bitmap->max) {
bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
& bitmap->mask;
obj = find_aligned_range(bitmap->table, 0, bitmap->max,
- cnt, align);
+ cnt, align, skip_mask);
}
if (obj < bitmap->max) {
- for (i = 0; i < cnt; i++)
- set_bit(obj + i, bitmap->table);
+ bitmap_set(bitmap->table, obj, cnt);
if (obj == bitmap->last) {
bitmap->last = (obj + cnt);
if (bitmap->last >= bitmap->max)
@@ -147,18 +148,18 @@
return bitmap->avail;
}
-void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt)
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt,
+ int use_rr)
{
- u32 i;
-
obj &= bitmap->max + bitmap->reserved_top - 1;
spin_lock(&bitmap->lock);
- for (i = 0; i < cnt; i++)
- clear_bit(obj + i, bitmap->table);
- bitmap->last = min(bitmap->last, obj);
- bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
- & bitmap->mask;
+ if (!use_rr) {
+ bitmap->last = min(bitmap->last, obj);
+ bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+ & bitmap->mask;
+ }
+ bitmap_clear(bitmap->table, obj, cnt);
bitmap->avail += cnt;
spin_unlock(&bitmap->lock);
}
@@ -166,12 +167,17 @@
int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
u32 reserved_bot, u32 reserved_top)
{
- int i;
+ /* sanity check */
+ if (num <= (u64)reserved_top + reserved_bot)
+ return -EINVAL;
/* num must be a power of 2 */
if (num != roundup_pow_of_two(num))
return -EINVAL;
+ if (reserved_bot + reserved_top >= num)
+ return -EINVAL;
+
bitmap->last = 0;
bitmap->top = 0;
bitmap->max = num - reserved_top;
@@ -184,8 +190,7 @@
if (!bitmap->table)
return -ENOMEM;
- for (i = 0; i < reserved_bot; ++i)
- set_bit(i, bitmap->table);
+ bitmap_set(bitmap->table, 0, reserved_bot);
return 0;
}
@@ -207,7 +212,6 @@
{
dma_addr_t t;
- buf->direct.buf = NULL;
if (size <= max_direct) {
buf->nbufs = 1;
buf->npages = 1;
@@ -229,11 +233,10 @@
int i;
buf->direct.buf = NULL;
- buf->direct.map = 0;
buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
buf->npages = buf->nbufs;
buf->page_shift = PAGE_SHIFT;
- buf->page_list = kzalloc(buf->nbufs * sizeof *buf->page_list,
+ buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
GFP_KERNEL);
if (!buf->page_list)
return -ENOMEM;
@@ -291,7 +294,6 @@
buf->page_list[i].map);
kfree(buf->page_list);
}
- buf->direct.buf = NULL;
}
EXPORT_SYMBOL_GPL(mlx4_buf_free);
Modified: trunk/sys/ofed/drivers/net/mlx4/catas.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/catas.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/catas.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -32,10 +32,13 @@
*/
#include <linux/workqueue.h>
+#include <linux/module.h>
+#include <asm/byteorder.h>
+
#include "mlx4.h"
-#define MLX4_CATAS_POLL_INTERVAL (5 * HZ)
+#define MLX4_CATAS_POLL_INTERVAL (5 * HZ)
static DEFINE_SPINLOCK(catas_lock);
@@ -45,7 +48,8 @@
static int internal_err_reset = 1;
module_param(internal_err_reset, int, 0644);
MODULE_PARM_DESC(internal_err_reset,
- "Reset device on internal errors if non-zero (default 1)");
+ "Reset device on internal errors if non-zero"
+ " (default 1, in SRIOV mode default is 0)");
static void dump_err_buf(struct mlx4_dev *dev)
{
@@ -65,16 +69,21 @@
struct mlx4_priv *priv = mlx4_priv(dev);
if (readl(priv->catas_err.map)) {
- dump_err_buf(dev);
+ /* If the device is off-line, we cannot try to recover it */
+ if (pci_channel_offline(dev->pdev))
+ mod_timer(&priv->catas_err.timer,
+ round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+ else {
+ dump_err_buf(dev);
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
- mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
+ if (internal_err_reset) {
+ spin_lock(&catas_lock);
+ list_add(&priv->catas_err.list, &catas_list);
+ spin_unlock(&catas_lock);
- if (internal_err_reset) {
- spin_lock(&catas_lock);
- list_add(&priv->catas_err.list, &catas_list);
- spin_unlock(&catas_lock);
-
- queue_work(mlx4_wq, &catas_work);
+ queue_work(mlx4_wq, &catas_work);
+ }
}
} else
mod_timer(&priv->catas_err.timer,
@@ -89,9 +98,6 @@
LIST_HEAD(tlist);
int ret;
- if (!mutex_trylock(&drv_mutex))
- return;
-
spin_lock_irq(&catas_lock);
list_splice_init(&catas_list, &tlist);
spin_unlock_irq(&catas_lock);
@@ -99,24 +105,31 @@
list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
struct pci_dev *pdev = priv->dev.pdev;
+ /* If the device is off-line, we cannot reset it */
+ if (pci_channel_offline(pdev))
+ continue;
+
ret = mlx4_restart_one(priv->dev.pdev);
/* 'priv' now is not valid */
if (ret)
- printk(KERN_ERR "mlx4 %s: Reset failed (%d)\n",
- pci_name(pdev), ret);
+ pr_err("mlx4 %s: Reset failed (%d)\n",
+ pci_name(pdev), ret);
else {
dev = pci_get_drvdata(pdev);
mlx4_dbg(dev, "Reset succeeded\n");
}
}
- mutex_unlock(&drv_mutex);
}
void mlx4_start_catas_poll(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- unsigned long addr;
+ phys_addr_t addr;
+ /*If we are in SRIOV the default of the module param must be 0*/
+ if (mlx4_is_mfunc(dev))
+ internal_err_reset = 0;
+
INIT_LIST_HEAD(&priv->catas_err.list);
init_timer(&priv->catas_err.timer);
priv->catas_err.map = NULL;
@@ -126,8 +139,8 @@
priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
if (!priv->catas_err.map) {
- mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
- addr);
+ mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
+ (unsigned long long) addr);
return;
}
@@ -144,11 +157,13 @@
del_timer_sync(&priv->catas_err.timer);
- if (priv->catas_err.map)
+ if (priv->catas_err.map) {
iounmap(priv->catas_err.map);
+ priv->catas_err.map = NULL;
+ }
spin_lock_irq(&catas_lock);
- list_del(&priv->catas_err.list);
+ list_del_init(&priv->catas_err.list);
spin_unlock_irq(&catas_lock);
}
Modified: trunk/sys/ofed/drivers/net/mlx4/cmd.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/cmd.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/cmd.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,17 +33,28 @@
*/
#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
#include <linux/pci.h>
#include <linux/errno.h>
#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/device.h>
+#include <linux/semaphore.h>
+#include <rdma/ib_smi.h>
#include <asm/io.h>
+#include <linux/ktime.h>
#include "mlx4.h"
+#include "fw.h"
#define CMD_POLL_TOKEN 0xffff
+#define INBOX_MASK 0xffffffffffffff00ULL
+#define CMD_CHAN_VER 1
+#define CMD_CHAN_IF_REV 1
+
enum {
/* command completed successfully: */
CMD_STAT_OK = 0x00,
@@ -102,6 +113,14 @@
GO_BIT_TIMEOUT_MSECS = 10000
};
+enum mlx4_vlan_transition {
+ MLX4_VLAN_TRANSITION_VST_VST = 0,
+ MLX4_VLAN_TRANSITION_VST_VGT = 1,
+ MLX4_VLAN_TRANSITION_VGT_VST = 2,
+ MLX4_VLAN_TRANSITION_VGT_VGT = 3,
+};
+
+
struct mlx4_cmd_context {
struct completion done;
int result;
@@ -111,6 +130,9 @@
u8 fw_status;
};
+static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr_cmd *in_vhcr);
+
static int mlx4_status_to_errno(u8 status)
{
static const int trans_table[] = {
@@ -141,33 +163,348 @@
return trans_table[status];
}
+static const char *cmd_to_str(u16 cmd)
+{
+ switch (cmd) {
+ case MLX4_CMD_SYS_EN: return "SYS_EN";
+ case MLX4_CMD_SYS_DIS: return "SYS_DIS";
+ case MLX4_CMD_MAP_FA: return "MAP_FA";
+ case MLX4_CMD_UNMAP_FA: return "UNMAP_FA";
+ case MLX4_CMD_RUN_FW: return "RUN_FW";
+ case MLX4_CMD_MOD_STAT_CFG: return "MOD_STAT_CFG";
+ case MLX4_CMD_QUERY_DEV_CAP: return "QUERY_DEV_CAP";
+ case MLX4_CMD_QUERY_FW: return "QUERY_FW";
+ case MLX4_CMD_ENABLE_LAM: return "ENABLE_LAM";
+ case MLX4_CMD_DISABLE_LAM: return "DISABLE_LAM";
+ case MLX4_CMD_QUERY_DDR: return "QUERY_DDR";
+ case MLX4_CMD_QUERY_ADAPTER: return "QUERY_ADAPTER";
+ case MLX4_CMD_INIT_HCA: return "INIT_HCA";
+ case MLX4_CMD_CLOSE_HCA: return "CLOSE_HCA";
+ case MLX4_CMD_INIT_PORT: return "INIT_PORT";
+ case MLX4_CMD_CLOSE_PORT: return "CLOSE_PORT";
+ case MLX4_CMD_QUERY_HCA: return "QUERY_HCA";
+ case MLX4_CMD_QUERY_PORT: return "QUERY_PORT";
+ case MLX4_CMD_SENSE_PORT: return "SENSE_PORT";
+ case MLX4_CMD_HW_HEALTH_CHECK: return "HW_HEALTH_CHECK";
+ case MLX4_CMD_SET_PORT: return "SET_PORT";
+ case MLX4_CMD_SET_NODE: return "SET_NODE";
+ case MLX4_CMD_QUERY_FUNC: return "QUERY_FUNC";
+ case MLX4_CMD_MAP_ICM: return "MAP_ICM";
+ case MLX4_CMD_UNMAP_ICM: return "UNMAP_ICM";
+ case MLX4_CMD_MAP_ICM_AUX: return "MAP_ICM_AUX";
+ case MLX4_CMD_UNMAP_ICM_AUX: return "UNMAP_ICM_AUX";
+ case MLX4_CMD_SET_ICM_SIZE: return "SET_ICM_SIZE";
+ /*master notify fw on finish for slave's flr*/
+ case MLX4_CMD_INFORM_FLR_DONE: return "INFORM_FLR_DONE";
+ case MLX4_CMD_GET_OP_REQ: return "GET_OP_REQ";
+
+ /* TPT commands */
+ case MLX4_CMD_SW2HW_MPT: return "SW2HW_MPT";
+ case MLX4_CMD_QUERY_MPT: return "QUERY_MPT";
+ case MLX4_CMD_HW2SW_MPT: return "HW2SW_MPT";
+ case MLX4_CMD_READ_MTT: return "READ_MTT";
+ case MLX4_CMD_WRITE_MTT: return "WRITE_MTT";
+ case MLX4_CMD_SYNC_TPT: return "SYNC_TPT";
+
+ /* EQ commands */
+ case MLX4_CMD_MAP_EQ: return "MAP_EQ";
+ case MLX4_CMD_SW2HW_EQ: return "SW2HW_EQ";
+ case MLX4_CMD_HW2SW_EQ: return "HW2SW_EQ";
+ case MLX4_CMD_QUERY_EQ: return "QUERY_EQ";
+
+ /* CQ commands */
+ case MLX4_CMD_SW2HW_CQ: return "SW2HW_CQ";
+ case MLX4_CMD_HW2SW_CQ: return "HW2SW_CQ";
+ case MLX4_CMD_QUERY_CQ: return "QUERY_CQ:";
+ case MLX4_CMD_MODIFY_CQ: return "MODIFY_CQ:";
+
+ /* SRQ commands */
+ case MLX4_CMD_SW2HW_SRQ: return "SW2HW_SRQ";
+ case MLX4_CMD_HW2SW_SRQ: return "HW2SW_SRQ";
+ case MLX4_CMD_QUERY_SRQ: return "QUERY_SRQ";
+ case MLX4_CMD_ARM_SRQ: return "ARM_SRQ";
+
+ /* QP/EE commands */
+ case MLX4_CMD_RST2INIT_QP: return "RST2INIT_QP";
+ case MLX4_CMD_INIT2RTR_QP: return "INIT2RTR_QP";
+ case MLX4_CMD_RTR2RTS_QP: return "RTR2RTS_QP";
+ case MLX4_CMD_RTS2RTS_QP: return "RTS2RTS_QP";
+ case MLX4_CMD_SQERR2RTS_QP: return "SQERR2RTS_QP";
+ case MLX4_CMD_2ERR_QP: return "2ERR_QP";
+ case MLX4_CMD_RTS2SQD_QP: return "RTS2SQD_QP";
+ case MLX4_CMD_SQD2SQD_QP: return "SQD2SQD_QP";
+ case MLX4_CMD_SQD2RTS_QP: return "SQD2RTS_QP";
+ case MLX4_CMD_2RST_QP: return "2RST_QP";
+ case MLX4_CMD_QUERY_QP: return "QUERY_QP";
+ case MLX4_CMD_INIT2INIT_QP: return "INIT2INIT_QP";
+ case MLX4_CMD_SUSPEND_QP: return "SUSPEND_QP";
+ case MLX4_CMD_UNSUSPEND_QP: return "UNSUSPEND_QP";
+ /* special QP and management commands */
+ case MLX4_CMD_CONF_SPECIAL_QP: return "CONF_SPECIAL_QP";
+ case MLX4_CMD_MAD_IFC: return "MAD_IFC";
+
+ /* multicast commands */
+ case MLX4_CMD_READ_MCG: return "READ_MCG";
+ case MLX4_CMD_WRITE_MCG: return "WRITE_MCG";
+ case MLX4_CMD_MGID_HASH: return "MGID_HASH";
+
+ /* miscellaneous commands */
+ case MLX4_CMD_DIAG_RPRT: return "DIAG_RPRT";
+ case MLX4_CMD_NOP: return "NOP";
+ case MLX4_CMD_ACCESS_MEM: return "ACCESS_MEM";
+ case MLX4_CMD_SET_VEP: return "SET_VEP";
+
+ /* Ethernet specific commands */
+ case MLX4_CMD_SET_VLAN_FLTR: return "SET_VLAN_FLTR";
+ case MLX4_CMD_SET_MCAST_FLTR: return "SET_MCAST_FLTR";
+ case MLX4_CMD_DUMP_ETH_STATS: return "DUMP_ETH_STATS";
+
+ /* Communication channel commands */
+ case MLX4_CMD_ARM_COMM_CHANNEL: return "ARM_COMM_CHANNEL";
+ case MLX4_CMD_GEN_EQE: return "GEN_EQE";
+
+ /* virtual commands */
+ case MLX4_CMD_ALLOC_RES: return "ALLOC_RES";
+ case MLX4_CMD_FREE_RES: return "FREE_RES";
+ case MLX4_CMD_MCAST_ATTACH: return "MCAST_ATTACH";
+ case MLX4_CMD_UCAST_ATTACH: return "UCAST_ATTACH";
+ case MLX4_CMD_PROMISC: return "PROMISC";
+ case MLX4_CMD_QUERY_FUNC_CAP: return "QUERY_FUNC_CAP";
+ case MLX4_CMD_QP_ATTACH: return "QP_ATTACH";
+
+ /* debug commands */
+ case MLX4_CMD_QUERY_DEBUG_MSG: return "QUERY_DEBUG_MSG";
+ case MLX4_CMD_SET_DEBUG_MSG: return "SET_DEBUG_MSG";
+
+ /* statistics commands */
+ case MLX4_CMD_QUERY_IF_STAT: return "QUERY_IF_STAT";
+ case MLX4_CMD_SET_IF_STAT: return "SET_IF_STAT";
+
+ /* register/delete flow steering network rules */
+ case MLX4_QP_FLOW_STEERING_ATTACH: return "QP_FLOW_STEERING_ATTACH";
+ case MLX4_QP_FLOW_STEERING_DETACH: return "QP_FLOW_STEERING_DETACH";
+ case MLX4_FLOW_STEERING_IB_UC_QP_RANGE: return "FLOW_STEERING_IB_UC_QP_RANGE";
+ default: return "OTHER";
+ }
+}
+
+static u8 mlx4_errno_to_status(int errno)
+{
+ switch (errno) {
+ case -EPERM:
+ return CMD_STAT_BAD_OP;
+ case -EINVAL:
+ return CMD_STAT_BAD_PARAM;
+ case -ENXIO:
+ return CMD_STAT_BAD_SYS_STATE;
+ case -EBUSY:
+ return CMD_STAT_RESOURCE_BUSY;
+ case -ENOMEM:
+ return CMD_STAT_EXCEED_LIM;
+ case -ENFILE:
+ return CMD_STAT_ICM_ERROR;
+ default:
+ return CMD_STAT_INTERNAL_ERR;
+ }
+}
+
+static int comm_pending(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 status = readl(&priv->mfunc.comm->slave_read);
+
+ return (swab32(status) >> 31) != priv->cmd.comm_toggle;
+}
+
+static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 val;
+
+ priv->cmd.comm_toggle ^= 1;
+ val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31);
+ __raw_writel((__force u32) cpu_to_be32(val),
+ &priv->mfunc.comm->slave_write);
+ mmiowb();
+}
+
+static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param,
+ unsigned long timeout)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ unsigned long end;
+ int err = 0;
+ int ret_from_pending = 0;
+
+ /* First, verify that the master reports correct status */
+ if (comm_pending(dev)) {
+ mlx4_warn(dev, "Communication channel is not idle."
+ "my toggle is %d (cmd:0x%x)\n",
+ priv->cmd.comm_toggle, cmd);
+ return -EAGAIN;
+ }
+
+ /* Write command */
+ down(&priv->cmd.poll_sem);
+ mlx4_comm_cmd_post(dev, cmd, param);
+
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (comm_pending(dev) && time_before(jiffies, end))
+ cond_resched();
+ ret_from_pending = comm_pending(dev);
+ if (ret_from_pending) {
+ /* check if the slave is trying to boot in the middle of
+ * FLR process. The only non-zero result in the RESET command
+ * is MLX4_DELAY_RESET_SLAVE*/
+ if ((MLX4_COMM_CMD_RESET == cmd)) {
+ mlx4_warn(dev, "Got slave FLRed from Communication"
+ " channel (ret:0x%x)\n", ret_from_pending);
+ err = MLX4_DELAY_RESET_SLAVE;
+ } else {
+ mlx4_warn(dev, "Communication channel timed out\n");
+ err = -ETIMEDOUT;
+ }
+ }
+
+ up(&priv->cmd.poll_sem);
+ return err;
+}
+
+static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op,
+ u16 param, unsigned long timeout)
+{
+ struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
+ struct mlx4_cmd_context *context;
+ unsigned long end;
+ int err = 0;
+
+ down(&cmd->event_sem);
+
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (comm_pending(dev) && time_before(jiffies, end))
+ cond_resched();
+ if (comm_pending(dev)) {
+ mlx4_warn(dev, "mlx4_comm_cmd_wait: Comm channel "
+ "is not idle. My toggle is %d (op: 0x%x)\n",
+ mlx4_priv(dev)->cmd.comm_toggle, op);
+ up(&cmd->event_sem);
+ return -EAGAIN;
+ }
+
+ spin_lock(&cmd->context_lock);
+ BUG_ON(cmd->free_head < 0);
+ context = &cmd->context[cmd->free_head];
+ context->token += cmd->token_mask + 1;
+ cmd->free_head = context->next;
+ spin_unlock(&cmd->context_lock);
+
+ init_completion(&context->done);
+
+ mlx4_comm_cmd_post(dev, op, param);
+
+ /* In slave, wait unconditionally for completion */
+ wait_for_completion(&context->done);
+
+ err = context->result;
+ if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) {
+ mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+ op, context->fw_status);
+ goto out;
+ }
+
+out:
+ /* wait for comm channel ready
+ * this is necessary for prevention the race
+ * when switching between event to polling mode
+ */
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (comm_pending(dev) && time_before(jiffies, end))
+ cond_resched();
+
+ spin_lock(&cmd->context_lock);
+ context->next = cmd->free_head;
+ cmd->free_head = context - cmd->context;
+ spin_unlock(&cmd->context_lock);
+
+ up(&cmd->event_sem);
+ return err;
+}
+
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+ unsigned long timeout)
+{
+ if (mlx4_priv(dev)->cmd.use_events)
+ return mlx4_comm_cmd_wait(dev, cmd, param, timeout);
+ return mlx4_comm_cmd_poll(dev, cmd, param, timeout);
+}
+
static int cmd_pending(struct mlx4_dev *dev)
{
- u32 status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
+ u32 status;
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
+
return (status & swab32(1 << HCR_GO_BIT)) ||
(mlx4_priv(dev)->cmd.toggle ==
!!(status & swab32(1 << HCR_T_BIT)));
}
-static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
- u32 in_modifier, u8 op_modifier, u16 op, u16 token,
- int event)
+static int get_status(struct mlx4_dev *dev, u32 *status, int *go_bit,
+ int *t_bit)
{
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ *status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
+ *t_bit = !!(*status & swab32(1 << HCR_T_BIT));
+ *go_bit = !!(*status & swab32(1 << HCR_GO_BIT));
+
+ return 0;
+}
+
+static int mlx4_cmd_post(struct mlx4_dev *dev, struct timespec *ts1,
+ u64 in_param, u64 out_param, u32 in_modifier,
+ u8 op_modifier, u16 op, u16 token, int event)
+{
struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
u32 __iomem *hcr = cmd->hcr;
int ret = -EAGAIN;
unsigned long end;
+ int err, go_bit = 0, t_bit = 0;
+ u32 status = 0;
mutex_lock(&cmd->hcr_mutex);
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ ret = -EIO;
+ goto out;
+ }
+
end = jiffies;
if (event)
end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS);
while (cmd_pending(dev)) {
- if (time_after_eq(jiffies, end))
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ ret = -EIO;
goto out;
+ }
+
+ if (time_after_eq(jiffies, end)) {
+ mlx4_err(dev, "%s:cmd_pending failed\n", __func__);
+ goto out;
+ }
cond_resched();
}
@@ -184,6 +521,9 @@
__raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4);
__raw_writel((__force u32) cpu_to_be32(token << 16), hcr + 5);
+ if (ts1)
+ ktime_get_ts(ts1);
+
/* __raw_writel may not order writes. */
wmb();
@@ -191,7 +531,7 @@
(cmd->toggle << HCR_T_BIT) |
(event ? (1 << HCR_E_BIT) : 0) |
(op_modifier << HCR_OPMOD_SHIFT) |
- op), hcr + 6);
+ op), hcr + 6);
/*
* Make sure that our HCR writes don't get mixed in with
@@ -204,10 +544,78 @@
ret = 0;
out:
+ if (ret) {
+ err = get_status(dev, &status, &go_bit, &t_bit);
+ mlx4_warn(dev, "Could not post command %s (0x%x): ret=%d, "
+ "in_param=0x%llx, in_mod=0x%x, op_mod=0x%x, "
+ "get_status err=%d, status_reg=0x%x, go_bit=%d, "
+ "t_bit=%d, toggle=0x%x\n", cmd_to_str(op), op, ret,
+ (unsigned long long) in_param, in_modifier, op_modifier, err, status,
+ go_bit, t_bit, cmd->toggle);
+ }
mutex_unlock(&cmd->hcr_mutex);
return ret;
}
+static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+ int out_is_imm, u32 in_modifier, u8 op_modifier,
+ u16 op, unsigned long timeout)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr;
+ int ret;
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+
+ vhcr->in_param = cpu_to_be64(in_param);
+ vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0;
+ vhcr->in_modifier = cpu_to_be32(in_modifier);
+ vhcr->opcode = cpu_to_be16((((u16) op_modifier) << 12) | (op & 0xfff));
+ vhcr->token = cpu_to_be16(CMD_POLL_TOKEN);
+ vhcr->status = 0;
+ vhcr->flags = !!(priv->cmd.use_events) << 6;
+
+ if (mlx4_is_master(dev)) {
+ ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr);
+ if (!ret) {
+ if (out_is_imm) {
+ if (out_param)
+ *out_param =
+ be64_to_cpu(vhcr->out_param);
+ else {
+ mlx4_err(dev, "response expected while"
+ "output mailbox is NULL for "
+ "command 0x%x\n", op);
+ vhcr->status = CMD_STAT_BAD_PARAM;
+ }
+ }
+ ret = mlx4_status_to_errno(vhcr->status);
+ }
+ } else {
+ ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0,
+ MLX4_COMM_TIME + timeout);
+ if (!ret) {
+ if (out_is_imm) {
+ if (out_param)
+ *out_param =
+ be64_to_cpu(vhcr->out_param);
+ else {
+ mlx4_err(dev, "response expected while"
+ "output mailbox is NULL for "
+ "command 0x%x\n", op);
+ vhcr->status = CMD_STAT_BAD_PARAM;
+ }
+ }
+ ret = mlx4_status_to_errno(vhcr->status);
+ } else
+ mlx4_err(dev, "failed execution of VHCR_POST command"
+ "opcode %s (0x%x)\n", cmd_to_str(op), op);
+ }
+
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return ret;
+}
+
static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout)
@@ -220,16 +628,37 @@
down(&priv->cmd.poll_sem);
- err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ err = -EIO;
+ goto out;
+ }
+
+ err = mlx4_cmd_post(dev, NULL, in_param, out_param ? *out_param : 0,
in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
if (err)
goto out;
end = msecs_to_jiffies(timeout) + jiffies;
- while (cmd_pending(dev) && time_before(jiffies, end))
+ while (cmd_pending(dev) && time_before(jiffies, end)) {
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ err = -EIO;
+ goto out;
+ }
+
cond_resched();
+ }
if (cmd_pending(dev)) {
+ mlx4_warn(dev, "command %s (0x%x) timed out (go bit not cleared)\n",
+ cmd_to_str(op), op);
err = -ETIMEDOUT;
goto out;
}
@@ -240,13 +669,12 @@
__raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
(u64) be32_to_cpu((__force __be32)
__raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4));
- stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24;
+ stat = be32_to_cpu((__force __be32)
+ __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24;
err = mlx4_status_to_errno(stat);
- if (err) {
- if (op != MLX4_CMD_SET_NODE || stat != CMD_STAT_BAD_OP)
- mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
- op, stat);
- }
+ if (err)
+ mlx4_err(dev, "command %s (0x%x) failed: fw status = 0x%x\n",
+ cmd_to_str(op), op, stat);
out:
up(&priv->cmd.poll_sem);
@@ -277,7 +705,15 @@
struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
struct mlx4_cmd_context *context;
int err = 0;
+ int go_bit = 0, t_bit = 0, stat_err;
+ u32 status = 0;
+ struct timespec ts1, ts2;
+ ktime_t t1, t2, delta;
+ s64 ds;
+ if (out_is_imm && !out_param)
+ return -EINVAL;
+
down(&cmd->event_sem);
spin_lock(&cmd->context_lock);
@@ -289,19 +725,51 @@
init_completion(&context->done);
- mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
- in_modifier, op_modifier, op, context->token, 1);
+ err = mlx4_cmd_post(dev, &ts1, in_param, out_param ? *out_param : 0,
+ in_modifier, op_modifier, op, context->token, 1);
+ if (err)
+ goto out;
- if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) {
+ if (!wait_for_completion_timeout(&context->done,
+ msecs_to_jiffies(timeout))) {
+ stat_err = get_status(dev, &status, &go_bit, &t_bit);
+ mlx4_warn(dev, "command %s (0x%x) timed out: in_param=0x%llx, "
+ "in_mod=0x%x, op_mod=0x%x, get_status err=%d, "
+ "status_reg=0x%x, go_bit=%d, t_bit=%d, toggle=0x%x\n"
+ , cmd_to_str(op), op, (unsigned long long) in_param, in_modifier,
+ op_modifier, stat_err, status, go_bit, t_bit,
+ mlx4_priv(dev)->cmd.toggle);
err = -EBUSY;
goto out;
}
+ if (mlx4_debug_level & MLX4_DEBUG_MASK_CMD_TIME) {
+ ktime_get_ts(&ts2);
+ t1 = timespec_to_ktime(ts1);
+ t2 = timespec_to_ktime(ts2);
+ delta = ktime_sub(t2, t1);
+ ds = ktime_to_ns(delta);
+ pr_info("mlx4: fw exec time for %s is %lld nsec\n", cmd_to_str(op), (long long) ds);
+ }
err = context->result;
if (err) {
- if (op != MLX4_CMD_SET_NODE || context->fw_status != CMD_STAT_BAD_OP)
- mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
- op, context->fw_status);
+ mlx4_err(dev, "command %s (0x%x) failed: in_param=0x%llx, "
+ "in_mod=0x%x, op_mod=0x%x, fw status = 0x%x\n",
+ cmd_to_str(op), op, (unsigned long long) in_param, in_modifier,
+ op_modifier, context->fw_status);
+
+ switch(context->fw_status) {
+ case CMD_STAT_BAD_PARAM:
+ mlx4_err(dev, "Parameter is not supported, "
+ "parameter is out of range\n");
+ break;
+ case CMD_STAT_EXCEED_LIM:
+ mlx4_err(dev, "Required capability exceeded "
+ "device limits\n");
+ break;
+ default:
+ break;
+ }
goto out;
}
@@ -320,42 +788,1537 @@
int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
- u16 op, unsigned long timeout)
+ u16 op, unsigned long timeout, int native)
{
- if (mlx4_priv(dev)->cmd.use_events && !cold)
- return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm,
- in_modifier, op_modifier, op, timeout);
- else
- return mlx4_cmd_poll(dev, in_param, out_param, out_is_imm,
- in_modifier, op_modifier, op, timeout);
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
+ if (mlx4_priv(dev)->cmd.use_events)
+ return mlx4_cmd_wait(dev, in_param, out_param,
+ out_is_imm, in_modifier,
+ op_modifier, op, timeout);
+ else
+ return mlx4_cmd_poll(dev, in_param, out_param,
+ out_is_imm, in_modifier,
+ op_modifier, op, timeout);
+ }
+ return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm,
+ in_modifier, op_modifier, op, timeout);
}
EXPORT_SYMBOL_GPL(__mlx4_cmd);
+
+static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
+{
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+}
+
+static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr,
+ int slave, u64 slave_addr,
+ int size, int is_read)
+{
+ u64 in_param;
+ u64 out_param;
+
+ if ((slave_addr & 0xfff) | (master_addr & 0xfff) |
+ (slave & ~0x7f) | (size & 0xff)) {
+ mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx "
+ "master_addr:0x%llx slave_id:%d size:%d\n",
+ (unsigned long long) slave_addr, (unsigned long long) master_addr, slave, size);
+ return -EINVAL;
+ }
+
+ if (is_read) {
+ in_param = (u64) slave | slave_addr;
+ out_param = (u64) dev->caps.function | master_addr;
+ } else {
+ in_param = (u64) dev->caps.function | master_addr;
+ out_param = (u64) slave | slave_addr;
+ }
+
+ return mlx4_cmd_imm(dev, in_param, &out_param, size, 0,
+ MLX4_CMD_ACCESS_MEM,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+
+static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox)
+{
+ struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf);
+ struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf);
+ int err;
+ int i;
+
+ if (index & 0x1f)
+ return -EINVAL;
+
+ in_mad->attr_mod = cpu_to_be32(index / 32);
+
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ for (i = 0; i < 32; ++i)
+ pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]);
+
+ return err;
+}
+
+static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) {
+ err = query_pkey_block(dev, port, i, table + i, inbox, outbox);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+#define PORT_CAPABILITY_LOCATION_IN_SMP 20
+#define PORT_STATE_OFFSET 32
+
+static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf)
+{
+ if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP)
+ return IB_PORT_ACTIVE;
+ else
+ return IB_PORT_DOWN;
+}
+
+static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct ib_smp *smp = inbox->buf;
+ u32 index;
+ u8 port;
+ u16 *table;
+ int err;
+ int vidx, pidx;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct ib_smp *outsmp = outbox->buf;
+ __be16 *outtab = (__be16 *)(outsmp->data);
+ __be32 slave_cap_mask;
+ __be64 slave_node_guid;
+ port = vhcr->in_modifier;
+
+ if (smp->base_version == 1 &&
+ smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ smp->class_version == 1) {
+ if (smp->method == IB_MGMT_METHOD_GET) {
+ if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+ index = be32_to_cpu(smp->attr_mod);
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+ table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+ /* need to get the full pkey table because the paravirtualized
+ * pkeys may be scattered among several pkey blocks.
+ */
+ err = get_full_pkey_table(dev, port, table, inbox, outbox);
+ if (!err) {
+ for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) {
+ pidx = priv->virt2phys_pkey[slave][port - 1][vidx];
+ outtab[vidx % 32] = cpu_to_be16(table[pidx]);
+ }
+ }
+ kfree(table);
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) {
+ /*get the slave specific caps:*/
+ /*do the command */
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ /* modify the response for slaves */
+ if (!err && slave != mlx4_master_func_num(dev)) {
+ u8 *state = outsmp->data + PORT_STATE_OFFSET;
+
+ *state = (*state & 0xf0) | vf_port_state(dev, port, slave);
+ slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+ memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4);
+ }
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
+ /* compute slave's gid block */
+ smp->attr_mod = cpu_to_be32(slave / 8);
+ /* execute cmd */
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ if (!err) {
+ /* if needed, move slave gid to index 0 */
+ if (slave % 8)
+ memcpy(outsmp->data,
+ outsmp->data + (slave % 8) * 8, 8);
+ /* delete all other gids */
+ memset(outsmp->data + 8, 0, 56);
+ }
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ if (!err) {
+ slave_node_guid = mlx4_get_slave_node_guid(dev, slave);
+ memcpy(outsmp->data + 12, &slave_node_guid, 8);
+ }
+ return err;
+ }
+ }
+ }
+ if (slave != mlx4_master_func_num(dev) &&
+ ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
+ (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ smp->method == IB_MGMT_METHOD_SET))) {
+ mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
+ "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
+ slave, smp->method, smp->mgmt_class,
+ be16_to_cpu(smp->attr_id));
+ return -EPERM;
+ }
+ /*default:*/
+ return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+}
+
+static int MLX4_CMD_DIAG_RPRT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ return -EPERM;
+}
+
+static int MLX4_CMD_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ return -EPERM;
+}
+
+int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u64 in_param;
+ u64 out_param;
+ int err;
+
+ in_param = cmd->has_inbox ? (u64) inbox->dma : vhcr->in_param;
+ out_param = cmd->has_outbox ? (u64) outbox->dma : vhcr->out_param;
+ if (cmd->encode_slave_id) {
+ in_param &= 0xffffffffffffff00ll;
+ in_param |= slave;
+ }
+
+ err = __mlx4_cmd(dev, in_param, &out_param, cmd->out_is_imm,
+ vhcr->in_modifier, vhcr->op_modifier, vhcr->op,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ if (cmd->out_is_imm)
+ vhcr->out_param = out_param;
+
+ return err;
+}
+
+static struct mlx4_cmd_info cmd_info[] = {
+ {
+ .opcode = MLX4_CMD_QUERY_FW,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_FW_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_HCA,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_DEV_CAP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_DEV_CAP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_FUNC_CAP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_FUNC_CAP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_ADAPTER,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_INIT_PORT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_INIT_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_CLOSE_PORT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CLOSE_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_PORT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SET_PORT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SET_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_MAP_EQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MAP_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_EQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW_HEALTH_CHECK,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_DIAG_RPRT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .skip_err_print = true,
+ .verify = NULL,
+ .wrapper = MLX4_CMD_DIAG_RPRT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_NOP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_ALLOC_RES,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_ALLOC_RES_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_FREE_RES,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_FREE_RES_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_MPT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_MPT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_MPT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_MPT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_MPT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_MPT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_READ_MTT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_WRITE_MTT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_WRITE_MTT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SYNC_TPT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_EQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_EQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_CQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_CQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_CQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_MODIFY_CQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MODIFY_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_SRQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_SRQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_SRQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_ARM_SRQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_ARM_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RST2INIT_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_RST2INIT_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_INIT2INIT_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_INIT2INIT_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_INIT2RTR_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_INIT2RTR_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RTR2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_RTR2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RTS2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_RTS2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SQERR2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SQERR2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_2ERR_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RTS2SQD_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SQD2SQD_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SQD2SQD_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SQD2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SQD2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_2RST_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_2RST_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_QP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SUSPEND_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_UNSUSPEND_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_UPDATE_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .skip_err_print = true,
+ .verify = NULL,
+ .wrapper = MLX4_CMD_UPDATE_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_CONF_SPECIAL_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL, /* XXX verify: only demux can do this */
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_MAD_IFC,
+ .has_inbox = true,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MAD_IFC_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_IF_STAT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_IF_STAT_wrapper
+ },
+ /* Native multicast commands are not available for guests */
+ {
+ .opcode = MLX4_CMD_QP_ATTACH,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_ATTACH_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_PROMISC,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_PROMISC_wrapper
+ },
+ /* Ethernet specific commands */
+ {
+ .opcode = MLX4_CMD_SET_VLAN_FLTR,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SET_VLAN_FLTR_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SET_MCAST_FLTR,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SET_MCAST_FLTR_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_DUMP_ETH_STATS,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_DUMP_ETH_STATS_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_INFORM_FLR_DONE,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ /* flow steering commands */
+ {
+ .opcode = MLX4_QP_FLOW_STEERING_ATTACH,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper
+ },
+ {
+ .opcode = MLX4_QP_FLOW_STEERING_DETACH,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper
+ },
+ /* wol commands */
+ {
+ .opcode = MLX4_CMD_MOD_STAT_CFG,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .skip_err_print = true,
+ .verify = NULL,
+ .wrapper = mlx4_MOD_STAT_CFG_wrapper
+ },
+};
+
+static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr_cmd *in_vhcr)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_info *cmd = NULL;
+ struct mlx4_vhcr_cmd *vhcr_cmd = in_vhcr ? in_vhcr : priv->mfunc.vhcr;
+ struct mlx4_vhcr *vhcr;
+ struct mlx4_cmd_mailbox *inbox = NULL;
+ struct mlx4_cmd_mailbox *outbox = NULL;
+ u64 in_param;
+ u64 out_param;
+ int ret = 0;
+ int i;
+ int err = 0;
+
+ /* Create sw representation of Virtual HCR */
+ vhcr = kzalloc(sizeof(struct mlx4_vhcr), GFP_KERNEL);
+ if (!vhcr)
+ return -ENOMEM;
+
+ /* DMA in the vHCR */
+ if (!in_vhcr) {
+ ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave,
+ priv->mfunc.master.slave_state[slave].vhcr_dma,
+ ALIGN(sizeof(struct mlx4_vhcr_cmd),
+ MLX4_ACCESS_MEM_ALIGN), 1);
+ if (ret) {
+ mlx4_err(dev, "%s:Failed reading vhcr"
+ "ret: 0x%x\n", __func__, ret);
+ kfree(vhcr);
+ return ret;
+ }
+ }
+
+ /* Fill SW VHCR fields */
+ vhcr->in_param = be64_to_cpu(vhcr_cmd->in_param);
+ vhcr->out_param = be64_to_cpu(vhcr_cmd->out_param);
+ vhcr->in_modifier = be32_to_cpu(vhcr_cmd->in_modifier);
+ vhcr->token = be16_to_cpu(vhcr_cmd->token);
+ vhcr->op = be16_to_cpu(vhcr_cmd->opcode) & 0xfff;
+ vhcr->op_modifier = (u8) (be16_to_cpu(vhcr_cmd->opcode) >> 12);
+ vhcr->e_bit = vhcr_cmd->flags & (1 << 6);
+
+ /* Lookup command */
+ for (i = 0; i < ARRAY_SIZE(cmd_info); ++i) {
+ if (vhcr->op == cmd_info[i].opcode) {
+ cmd = &cmd_info[i];
+ break;
+ }
+ }
+ if (!cmd) {
+ mlx4_err(dev, "unparavirt command: %s (0x%x) accepted from slave:%d\n",
+ cmd_to_str(vhcr->op), vhcr->op, slave);
+ vhcr_cmd->status = CMD_STAT_BAD_PARAM;
+ goto out_status;
+ }
+
+ /* Read inbox */
+ if (cmd->has_inbox) {
+ vhcr->in_param &= INBOX_MASK;
+ inbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(inbox)) {
+ vhcr_cmd->status = CMD_STAT_BAD_SIZE;
+ inbox = NULL;
+ goto out_status;
+ }
+
+ if (mlx4_ACCESS_MEM(dev, inbox->dma, slave,
+ vhcr->in_param,
+ MLX4_MAILBOX_SIZE, 1)) {
+ mlx4_err(dev, "%s: Failed reading inbox for cmd %s (0x%x)\n",
+ __func__, cmd_to_str(cmd->opcode), cmd->opcode);
+ vhcr_cmd->status = CMD_STAT_INTERNAL_ERR;
+ goto out_status;
+ }
+ }
+
+ /* Apply permission and bound checks if applicable */
+ if (cmd->verify && cmd->verify(dev, slave, vhcr, inbox)) {
+ mlx4_warn(dev, "Command %s (0x%x) from slave: %d failed protection "
+ "checks for resource_id: %d\n", cmd_to_str(vhcr->op),
+ vhcr->op, slave, vhcr->in_modifier);
+ vhcr_cmd->status = CMD_STAT_BAD_OP;
+ goto out_status;
+ }
+
+ /* Allocate outbox */
+ if (cmd->has_outbox) {
+ outbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(outbox)) {
+ vhcr_cmd->status = CMD_STAT_BAD_SIZE;
+ outbox = NULL;
+ goto out_status;
+ }
+ }
+
+ /* Execute the command! */
+ if (cmd->wrapper) {
+ err = cmd->wrapper(dev, slave, vhcr, inbox, outbox,
+ cmd);
+ if (cmd->out_is_imm)
+ vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param);
+ } else {
+ in_param = cmd->has_inbox ? (u64) inbox->dma :
+ vhcr->in_param;
+ out_param = cmd->has_outbox ? (u64) outbox->dma :
+ vhcr->out_param;
+ err = __mlx4_cmd(dev, in_param, &out_param,
+ cmd->out_is_imm, vhcr->in_modifier,
+ vhcr->op_modifier, vhcr->op,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ if (cmd->out_is_imm) {
+ vhcr->out_param = out_param;
+ vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param);
+ }
+ }
+
+ if (err) {
+ if (!cmd->skip_err_print)
+ mlx4_warn(dev, "vhcr command %s (0x%x) slave:%d "
+ "in_param 0x%llx in_mod=0x%x, op_mod=0x%x "
+ "failed with error:%d, status %d\n",
+ cmd_to_str(vhcr->op), vhcr->op, slave,
+ (unsigned long long) vhcr->in_param, vhcr->in_modifier,
+ vhcr->op_modifier, vhcr->errno, err);
+ vhcr_cmd->status = mlx4_errno_to_status(err);
+ goto out_status;
+ }
+
+
+ /* Write outbox if command completed successfully */
+ if (cmd->has_outbox && !vhcr_cmd->status) {
+ ret = mlx4_ACCESS_MEM(dev, outbox->dma, slave,
+ vhcr->out_param,
+ MLX4_MAILBOX_SIZE, MLX4_CMD_WRAPPED);
+ if (ret) {
+ /* If we failed to write back the outbox after the
+ *command was successfully executed, we must fail this
+ * slave, as it is now in undefined state */
+ mlx4_err(dev, "%s: Failed writing outbox\n", __func__);
+ goto out;
+ }
+ }
+
+out_status:
+ /* DMA back vhcr result */
+ if (!in_vhcr) {
+ ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave,
+ priv->mfunc.master.slave_state[slave].vhcr_dma,
+ ALIGN(sizeof(struct mlx4_vhcr),
+ MLX4_ACCESS_MEM_ALIGN),
+ MLX4_CMD_WRAPPED);
+ if (ret)
+ mlx4_err(dev, "%s:Failed writing vhcr result\n",
+ __func__);
+ else if (vhcr->e_bit &&
+ mlx4_GEN_EQE(dev, slave, &priv->mfunc.master.cmd_eqe))
+ mlx4_warn(dev, "Failed to generate command completion "
+ "eqe for slave %d\n", slave);
+ }
+
+out:
+ kfree(vhcr);
+ mlx4_free_cmd_mailbox(dev, inbox);
+ mlx4_free_cmd_mailbox(dev, outbox);
+ return ret;
+}
+
+static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv,
+ int slave, int port)
+{
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_vport_state *vp_admin;
+ struct mlx4_vf_immed_vlan_work *work;
+ int err;
+ int admin_vlan_ix = NO_INDX;
+
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+ if (vp_oper->state.default_vlan == vp_admin->default_vlan &&
+ vp_oper->state.default_qos == vp_admin->default_qos)
+ return 0;
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ if (vp_oper->state.default_vlan != vp_admin->default_vlan) {
+ if (MLX4_VGT != vp_admin->default_vlan) {
+ err = __mlx4_register_vlan(&priv->dev, port,
+ vp_admin->default_vlan,
+ &admin_vlan_ix);
+ if (err) {
+ mlx4_warn((&priv->dev),
+ "No vlan resources slave %d, port %d\n",
+ slave, port);
+ kfree(work);
+ return err;
+ }
+ } else {
+ admin_vlan_ix = NO_INDX;
+ }
+ work->flags |= MLX4_VF_IMMED_VLAN_FLAG_VLAN;
+ mlx4_dbg((&(priv->dev)),
+ "alloc vlan %d idx %d slave %d port %d\n",
+ (int)(vp_admin->default_vlan),
+ admin_vlan_ix, slave, port);
+ }
+
+ /* save original vlan ix and vlan id */
+ work->orig_vlan_id = vp_oper->state.default_vlan;
+ work->orig_vlan_ix = vp_oper->vlan_idx;
+
+ /* handle new qos */
+ if (vp_oper->state.default_qos != vp_admin->default_qos)
+ work->flags |= MLX4_VF_IMMED_VLAN_FLAG_QOS;
+
+ if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN)
+ vp_oper->vlan_idx = admin_vlan_ix;
+
+ vp_oper->state.default_vlan = vp_admin->default_vlan;
+ vp_oper->state.default_qos = vp_admin->default_qos;
+
+ /* iterate over QPs owned by this slave, using UPDATE_QP */
+ work->port = port;
+ work->slave = slave;
+ work->qos = vp_oper->state.default_qos;
+ work->vlan_id = vp_oper->state.default_vlan;
+ work->vlan_ix = vp_oper->vlan_idx;
+ work->priv = priv;
+ INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler);
+ queue_work(priv->mfunc.master.comm_wq, &work->work);
+
+ return 0;
+}
+
+
+static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave)
+{
+ int port, err;
+ struct mlx4_vport_state *vp_admin;
+ struct mlx4_vport_oper_state *vp_oper;
+
+ for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+ vp_oper->state = *vp_admin;
+ if (MLX4_VGT != vp_admin->default_vlan) {
+ err = __mlx4_register_vlan(&priv->dev, port,
+ vp_admin->default_vlan, &(vp_oper->vlan_idx));
+ if (err) {
+ vp_oper->vlan_idx = NO_INDX;
+ mlx4_warn((&priv->dev),
+ "No vlan resorces slave %d, port %d\n",
+ slave, port);
+ return err;
+ }
+ mlx4_dbg((&(priv->dev)), "alloc vlan %d idx %d slave %d port %d\n",
+ (int)(vp_oper->state.default_vlan),
+ vp_oper->vlan_idx, slave, port);
+ }
+ if (vp_admin->spoofchk) {
+ vp_oper->mac_idx = __mlx4_register_mac(&priv->dev,
+ port,
+ vp_admin->mac);
+ if (0 > vp_oper->mac_idx) {
+ err = vp_oper->mac_idx;
+ vp_oper->mac_idx = NO_INDX;
+ mlx4_warn((&priv->dev),
+ "No mac resources slave %d, port %d\n",
+ slave, port);
+ return err;
+ }
+ mlx4_dbg((&(priv->dev)), "alloc mac %llx idx %d slave %d port %d\n",
+ (unsigned long long) vp_oper->state.mac, vp_oper->mac_idx, slave, port);
+ }
+ }
+ return 0;
+}
+
+static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave)
+{
+ int port;
+ struct mlx4_vport_oper_state *vp_oper;
+
+ for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ if (NO_INDX != vp_oper->vlan_idx) {
+ __mlx4_unregister_vlan(&priv->dev,
+ port, vp_oper->state.default_vlan);
+ vp_oper->vlan_idx = NO_INDX;
+ }
+ if (NO_INDX != vp_oper->mac_idx) {
+ __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac);
+ vp_oper->mac_idx = NO_INDX;
+ }
+ }
+ return;
+}
+
+static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
+ u16 param, u8 toggle)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ u32 reply;
+ u8 is_going_down = 0;
+ int i;
+ unsigned long flags;
+
+ slave_state[slave].comm_toggle ^= 1;
+ reply = (u32) slave_state[slave].comm_toggle << 31;
+ if (toggle != slave_state[slave].comm_toggle) {
+ mlx4_warn(dev, "Incorrect toggle %d from slave %d. *** MASTER"
+ "STATE COMPROMISIED ***\n", toggle, slave);
+ goto reset_slave;
+ }
+ if (cmd == MLX4_COMM_CMD_RESET) {
+ mlx4_warn(dev, "Received reset from slave:%d\n", slave);
+ slave_state[slave].active = false;
+ slave_state[slave].old_vlan_api = false;
+ mlx4_master_deactivate_admin_state(priv, slave);
+ for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) {
+ slave_state[slave].event_eq[i].eqn = -1;
+ slave_state[slave].event_eq[i].token = 0;
+ }
+ /*check if we are in the middle of FLR process,
+ if so return "retry" status to the slave*/
+ if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd)
+ goto inform_slave_state;
+
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave);
+
+ /* write the version in the event field */
+ reply |= mlx4_comm_get_version();
+
+ goto reset_slave;
+ }
+ /*command from slave in the middle of FLR*/
+ if (cmd != MLX4_COMM_CMD_RESET &&
+ MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) {
+ mlx4_warn(dev, "slave:%d is Trying to run cmd (0x%x) "
+ "in the middle of FLR\n", slave, cmd);
+ return;
+ }
+
+ switch (cmd) {
+ case MLX4_COMM_CMD_VHCR0:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_RESET)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma = ((u64) param) << 48;
+ priv->mfunc.master.slave_state[slave].cookie = 0;
+ break;
+ case MLX4_COMM_CMD_VHCR1:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma |= ((u64) param) << 32;
+ break;
+ case MLX4_COMM_CMD_VHCR2:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR1)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma |= ((u64) param) << 16;
+ break;
+ case MLX4_COMM_CMD_VHCR_EN:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR2)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma |= param;
+ if (mlx4_master_activate_admin_state(priv, slave))
+ goto reset_slave;
+ slave_state[slave].active = true;
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave);
+ break;
+ case MLX4_COMM_CMD_VHCR_POST:
+ if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
+ (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST))
+ goto reset_slave;
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ if (mlx4_master_process_vhcr(dev, slave, NULL)) {
+ mlx4_err(dev, "Failed processing vhcr for slave: %d,"
+ " resetting slave.\n", slave);
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ goto reset_slave;
+ }
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ break;
+ default:
+ mlx4_warn(dev, "Bad comm cmd: %d from slave: %d\n", cmd, slave);
+ goto reset_slave;
+ }
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ if (!slave_state[slave].is_slave_going_down)
+ slave_state[slave].last_cmd = cmd;
+ else
+ is_going_down = 1;
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ if (is_going_down) {
+ mlx4_warn(dev, "Slave is going down aborting command (%d)"
+ " executing from slave: %d\n",
+ cmd, slave);
+ return;
+ }
+ __raw_writel((__force u32) cpu_to_be32(reply),
+ &priv->mfunc.comm[slave].slave_read);
+ mmiowb();
+
+ return;
+
+reset_slave:
+ /* cleanup any slave resources */
+ mlx4_delete_all_resources_for_slave(dev, slave);
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ if (!slave_state[slave].is_slave_going_down)
+ slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ /*with slave in the middle of flr, no need to clean resources again.*/
+inform_slave_state:
+ __raw_writel((__force u32) cpu_to_be32(reply),
+ &priv->mfunc.comm[slave].slave_read);
+ wmb();
+}
+
+/* master command processing */
+void mlx4_master_comm_channel(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work,
+ struct mlx4_mfunc_master_ctx,
+ comm_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv =
+ container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+ __be32 *bit_vec;
+ u32 comm_cmd;
+ u32 vec;
+ int i, j, slave;
+ int toggle;
+ int served = 0;
+ int reported = 0;
+ u32 slt;
+
+ bit_vec = master->comm_arm_bit_vector;
+ for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
+ vec = be32_to_cpu(bit_vec[i]);
+ for (j = 0; j < 32; j++) {
+ if (!(vec & (1 << j)))
+ continue;
+ ++reported;
+ slave = (i * 32) + j;
+ comm_cmd = swab32(readl(
+ &mfunc->comm[slave].slave_write));
+ slt = swab32(readl(&mfunc->comm[slave].slave_read))
+ >> 31;
+ toggle = comm_cmd >> 31;
+ if (toggle != slt) {
+ if (master->slave_state[slave].comm_toggle
+ != slt) {
+ mlx4_info(dev, "slave %d out of sync."
+ " read toggle %d, state toggle %d. "
+ "Resynching.\n", slave, slt,
+ master->slave_state[slave].comm_toggle);
+ master->slave_state[slave].comm_toggle =
+ slt;
+ }
+ mlx4_master_do_cmd(dev, slave,
+ comm_cmd >> 16 & 0xff,
+ comm_cmd & 0xffff, toggle);
+ ++served;
+ } else
+ mlx4_err(dev, "slave %d out of sync."
+ " read toggle %d, write toggle %d.\n", slave, slt,
+ toggle);
+ }
+ }
+
+ if (reported && reported != served)
+ mlx4_warn(dev, "Got command event with bitmask from %d slaves"
+ " but %d were served\n",
+ reported, served);
+}
+/* master command processing */
+void mlx4_master_arm_comm_channel(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work,
+ struct mlx4_mfunc_master_ctx,
+ arm_comm_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv =
+ container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+
+ if (mlx4_ARM_COMM_CHANNEL(dev))
+ mlx4_warn(dev, "Failed to arm comm channel events\n");
+}
+
+static int sync_toggles(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int wr_toggle;
+ int rd_toggle;
+ unsigned long end;
+
+ wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31;
+ end = jiffies + msecs_to_jiffies(5000);
+
+ while (time_before(jiffies, end)) {
+ rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31;
+ if (rd_toggle == wr_toggle) {
+ priv->cmd.comm_toggle = rd_toggle;
+ return 0;
+ }
+
+ cond_resched();
+ }
+
+ /*
+ * we could reach here if for example the previous VM using this
+ * function misbehaved and left the channel with unsynced state. We
+ * should fix this here and give this VM a chance to use a properly
+ * synced channel
+ */
+ mlx4_warn(dev, "recovering from previously mis-behaved VM\n");
+ __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_read);
+ __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_write);
+ priv->cmd.comm_toggle = 0;
+
+ return 0;
+}
+
+int mlx4_multi_func_init(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state;
+ int i, j, err, port;
+
+ if (mlx4_is_master(dev))
+ priv->mfunc.comm =
+ ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) +
+ priv->fw.comm_base, MLX4_COMM_PAGESIZE);
+ else
+ priv->mfunc.comm =
+ ioremap(pci_resource_start(dev->pdev, 2) +
+ MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE);
+ if (!priv->mfunc.comm) {
+ mlx4_err(dev, "Couldn't map communication vector.\n");
+ goto err_vhcr;
+ }
+
+ if (mlx4_is_master(dev)) {
+ priv->mfunc.master.slave_state =
+ kzalloc(dev->num_slaves *
+ sizeof(struct mlx4_slave_state), GFP_KERNEL);
+ if (!priv->mfunc.master.slave_state)
+ goto err_comm;
+
+ priv->mfunc.master.vf_admin =
+ kzalloc(dev->num_slaves *
+ sizeof(struct mlx4_vf_admin_state), GFP_KERNEL);
+ if (!priv->mfunc.master.vf_admin)
+ goto err_comm_admin;
+
+ priv->mfunc.master.vf_oper =
+ kzalloc(dev->num_slaves *
+ sizeof(struct mlx4_vf_oper_state), GFP_KERNEL);
+ if (!priv->mfunc.master.vf_oper)
+ goto err_comm_oper;
+
+ for (i = 0; i < dev->num_slaves; ++i) {
+ s_state = &priv->mfunc.master.slave_state[i];
+ s_state->last_cmd = MLX4_COMM_CMD_RESET;
+ mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]);
+ for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j)
+ s_state->event_eq[j].eqn = -1;
+ __raw_writel((__force u32) 0,
+ &priv->mfunc.comm[i].slave_write);
+ __raw_writel((__force u32) 0,
+ &priv->mfunc.comm[i].slave_read);
+ mmiowb();
+ for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+ s_state->vlan_filter[port] =
+ kzalloc(sizeof(struct mlx4_vlan_fltr),
+ GFP_KERNEL);
+ if (!s_state->vlan_filter[port]) {
+ if (--port)
+ kfree(s_state->vlan_filter[port]);
+ goto err_slaves;
+ }
+ INIT_LIST_HEAD(&s_state->mcast_filters[port]);
+ priv->mfunc.master.vf_admin[i].vport[port].default_vlan = MLX4_VGT;
+ priv->mfunc.master.vf_oper[i].vport[port].state.default_vlan = MLX4_VGT;
+ priv->mfunc.master.vf_oper[i].vport[port].vlan_idx = NO_INDX;
+ priv->mfunc.master.vf_oper[i].vport[port].mac_idx = NO_INDX;
+ }
+ spin_lock_init(&s_state->lock);
+ }
+
+ memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
+ priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
+ INIT_WORK(&priv->mfunc.master.comm_work,
+ mlx4_master_comm_channel);
+ INIT_WORK(&priv->mfunc.master.arm_comm_work,
+ mlx4_master_arm_comm_channel);
+ INIT_WORK(&priv->mfunc.master.slave_event_work,
+ mlx4_gen_slave_eqe);
+ INIT_WORK(&priv->mfunc.master.slave_flr_event_work,
+ mlx4_master_handle_slave_flr);
+ spin_lock_init(&priv->mfunc.master.slave_state_lock);
+ spin_lock_init(&priv->mfunc.master.slave_eq.event_lock);
+ priv->mfunc.master.comm_wq =
+ create_singlethread_workqueue("mlx4_comm");
+ if (!priv->mfunc.master.comm_wq)
+ goto err_slaves;
+
+ if (mlx4_init_resource_tracker(dev))
+ goto err_thread;
+
+ err = mlx4_ARM_COMM_CHANNEL(dev);
+ if (err) {
+ mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
+ err);
+ goto err_resource;
+ }
+
+ } else {
+ err = sync_toggles(dev);
+ if (err) {
+ mlx4_err(dev, "Couldn't sync toggles\n");
+ goto err_comm;
+ }
+ }
+ return 0;
+
+err_resource:
+ mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL);
+err_thread:
+ flush_workqueue(priv->mfunc.master.comm_wq);
+ destroy_workqueue(priv->mfunc.master.comm_wq);
+err_slaves:
+ while (--i) {
+ for (port = 1; port <= MLX4_MAX_PORTS; port++)
+ kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+ }
+ kfree(priv->mfunc.master.vf_oper);
+err_comm_oper:
+ kfree(priv->mfunc.master.vf_admin);
+err_comm_admin:
+ kfree(priv->mfunc.master.slave_state);
+err_comm:
+ iounmap(priv->mfunc.comm);
+err_vhcr:
+ dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ priv->mfunc.vhcr,
+ priv->mfunc.vhcr_dma);
+ priv->mfunc.vhcr = NULL;
+ return -ENOMEM;
+}
+
int mlx4_cmd_init(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
mutex_init(&priv->cmd.hcr_mutex);
+ mutex_init(&priv->cmd.slave_cmd_mutex);
sema_init(&priv->cmd.poll_sem, 1);
priv->cmd.use_events = 0;
priv->cmd.toggle = 1;
- priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE,
- MLX4_HCR_SIZE);
- if (!priv->cmd.hcr) {
- mlx4_err(dev, "Couldn't map command register.");
- return -ENOMEM;
+ priv->cmd.hcr = NULL;
+ priv->mfunc.vhcr = NULL;
+
+ if (!mlx4_is_slave(dev)) {
+ priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) +
+ MLX4_HCR_BASE, MLX4_HCR_SIZE);
+ if (!priv->cmd.hcr) {
+ mlx4_err(dev, "Couldn't map command register.\n");
+ return -ENOMEM;
+ }
}
+ if (mlx4_is_mfunc(dev)) {
+ priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ &priv->mfunc.vhcr_dma,
+ GFP_KERNEL);
+ if (!priv->mfunc.vhcr) {
+ mlx4_err(dev, "Couldn't allocate VHCR.\n");
+ goto err_hcr;
+ }
+ }
+
priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
MLX4_MAILBOX_SIZE,
MLX4_MAILBOX_SIZE, 0);
- if (!priv->cmd.pool) {
+ if (!priv->cmd.pool)
+ goto err_vhcr;
+
+ return 0;
+
+err_vhcr:
+ if (mlx4_is_mfunc(dev))
+ dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
+ priv->mfunc.vhcr = NULL;
+
+err_hcr:
+ if (!mlx4_is_slave(dev))
iounmap(priv->cmd.hcr);
- return -ENOMEM;
+ return -ENOMEM;
+}
+
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, port;
+
+ if (mlx4_is_master(dev)) {
+ flush_workqueue(priv->mfunc.master.comm_wq);
+ destroy_workqueue(priv->mfunc.master.comm_wq);
+ for (i = 0; i < dev->num_slaves; i++) {
+ for (port = 1; port <= MLX4_MAX_PORTS; port++)
+ kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+ }
+ kfree(priv->mfunc.master.slave_state);
+ kfree(priv->mfunc.master.vf_admin);
+ kfree(priv->mfunc.master.vf_oper);
}
- return 0;
+ iounmap(priv->mfunc.comm);
}
void mlx4_cmd_cleanup(struct mlx4_dev *dev)
@@ -363,7 +2326,13 @@
struct mlx4_priv *priv = mlx4_priv(dev);
pci_pool_destroy(priv->cmd.pool);
- iounmap(priv->cmd.hcr);
+
+ if (!mlx4_is_slave(dev))
+ iounmap(priv->cmd.hcr);
+ if (mlx4_is_mfunc(dev))
+ dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
+ priv->mfunc.vhcr = NULL;
}
/*
@@ -374,6 +2343,7 @@
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
+ int err = 0;
priv->cmd.context = kmalloc(priv->cmd.max_cmds *
sizeof (struct mlx4_cmd_context),
@@ -398,11 +2368,10 @@
; /* nothing */
--priv->cmd.token_mask;
+ down(&priv->cmd.poll_sem);
priv->cmd.use_events = 1;
- down(&priv->cmd.poll_sem);
-
- return 0;
+ return err;
}
/*
@@ -438,11 +2407,14 @@
return ERR_PTR(-ENOMEM);
}
+ memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE);
+
return mailbox;
}
EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox);
-void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox)
+void mlx4_free_cmd_mailbox(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox)
{
if (!mailbox)
return;
@@ -451,3 +2423,184 @@
kfree(mailbox);
}
EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox);
+
+u32 mlx4_comm_get_version(void)
+{
+ return ((u32) CMD_CHAN_IF_REV << 8) | (u32) CMD_CHAN_VER;
+}
+
+static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf)
+{
+ if ((vf < 0) || (vf >= dev->num_vfs)) {
+ mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs);
+ return -EINVAL;
+ }
+ return (vf+1);
+}
+
+int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+ int slave;
+
+ if (!mlx4_is_master(dev))
+ return -EPROTONOSUPPORT;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+ s_info->mac = mlx4_mac_to_u64(mac);
+ mlx4_info(dev, "default mac on vf %d port %d to %llX will take afect only after vf restart\n",
+ vf, port, (unsigned long long) s_info->mac);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_mac);
+
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_oper_state *vf_oper;
+ struct mlx4_vport_state *vf_admin;
+ int slave;
+
+ if ((!mlx4_is_master(dev)) ||
+ !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VLAN_CONTROL))
+ return -EPROTONOSUPPORT;
+
+ if ((vlan > 4095) || (qos > 7))
+ return -EINVAL;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+ vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+
+ if ((0 == vlan) && (0 == qos))
+ vf_admin->default_vlan = MLX4_VGT;
+ else
+ vf_admin->default_vlan = vlan;
+ vf_admin->default_qos = qos;
+
+ if (priv->mfunc.master.slave_state[slave].active &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) {
+ mlx4_info(dev, "updating vf %d port %d config params immediately\n",
+ vf, port);
+ mlx4_master_immediate_activate_vlan_qos(priv, slave, port);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan);
+
+ /* mlx4_get_slave_default_vlan -
+ * retrun true if VST ( default vlan)
+ * if VST will fill vlan & qos (if not NULL) */
+bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, u16 *vlan, u8 *qos)
+{
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_priv *priv;
+
+ priv = mlx4_priv(dev);
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+
+ if (MLX4_VGT != vp_oper->state.default_vlan) {
+ if (vlan)
+ *vlan = vp_oper->state.default_vlan;
+ if (qos)
+ *qos = vp_oper->state.default_qos;
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_slave_default_vlan);
+
+int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+ int slave;
+
+ if ((!mlx4_is_master(dev)) ||
+ !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FSM))
+ return -EPROTONOSUPPORT;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+ s_info->spoofchk = setting;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk);
+
+int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+ struct mlx4_vport_oper_state *vp_oper;
+ int slave;
+ u8 link_stat_event;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ switch (link_state) {
+ case IFLA_VF_LINK_STATE_AUTO:
+ /* get link curent state */
+ if (!priv->sense.do_sense_port[port])
+ link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE;
+ else
+ link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN;
+ break;
+
+ case IFLA_VF_LINK_STATE_ENABLE:
+ link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE;
+ break;
+
+ case IFLA_VF_LINK_STATE_DISABLE:
+ link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN;
+ break;
+
+ default:
+ mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n",
+ link_state, slave, port);
+ return -EINVAL;
+ };
+ /* update the admin & oper state on the link state */
+ s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ s_info->link_state = link_state;
+ vp_oper->state.link_state = link_state;
+
+ /* send event */
+ mlx4_gen_port_state_change_eqe(dev, slave, port, link_stat_event);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state);
+
+int mlx4_get_vf_link_state(struct mlx4_dev *dev, int port, int vf)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+ int slave;
+
+ if (!mlx4_is_master(dev))
+ return -EPROTONOSUPPORT;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+ return s_info->link_state;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_vf_link_state);
+
Modified: trunk/sys/ofed/drivers/net/mlx4/cq.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/cq.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/cq.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,7 +2,7 @@
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -34,9 +34,8 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/hardirq.h>
-
+#include <linux/module.h>
#include <linux/mlx4/cmd.h>
#include <linux/mlx4/cq.h>
@@ -43,27 +42,6 @@
#include "mlx4.h"
#include "icm.h"
-struct mlx4_cq_context {
- __be32 flags;
- u16 reserved1[3];
- __be16 page_offset;
- __be32 logsize_usrpage;
- __be16 cq_period;
- __be16 cq_max_count;
- u8 reserved2[3];
- u8 comp_eqn;
- u8 log_page_size;
- u8 reserved3[2];
- u8 mtt_base_addr_h;
- __be32 mtt_base_addr_l;
- __be32 last_notified_index;
- __be32 solicit_producer_index;
- __be32 consumer_index;
- __be32 producer_index;
- u32 reserved4[2];
- __be64 db_rec_addr;
-};
-
#define MLX4_CQ_STATUS_OK ( 0 << 28)
#define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28)
#define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28)
@@ -75,10 +53,18 @@
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
{
+ struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
struct mlx4_cq *cq;
+ read_lock(&cq_table->cq_table_lock);
+
cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
cqn & (dev->caps.num_cqs - 1));
+ if (cq)
+ atomic_inc(&cq->refcount);
+
+ read_unlock(&cq_table->cq_table_lock);
+
if (!cq) {
mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
return;
@@ -87,6 +73,9 @@
++cq->arm_sn;
cq->comp(cq);
+
+ if (atomic_dec_and_test(&cq->refcount))
+ complete(&cq->free);
}
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
@@ -94,13 +83,13 @@
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
struct mlx4_cq *cq;
- spin_lock(&cq_table->lock);
+ read_lock(&cq_table->cq_table_lock);
cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
if (cq)
atomic_inc(&cq->refcount);
- spin_unlock(&cq_table->lock);
+ read_unlock(&cq_table->cq_table_lock);
if (!cq) {
mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
@@ -116,8 +105,9 @@
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num)
{
- return mlx4_cmd(dev, mailbox->dma, cq_num, 0, MLX4_CMD_SW2HW_CQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd(dev, mailbox->dma, cq_num, 0,
+ MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -124,15 +114,15 @@
int cq_num, u32 opmod)
{
return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num)
{
- return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
- mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+ cq_num, mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
@@ -187,26 +177,105 @@
}
EXPORT_SYMBOL_GPL(mlx4_cq_resize);
-static int mlx4_find_least_loaded_vector(struct mlx4_priv *priv)
+int mlx4_cq_ignore_overrun(struct mlx4_dev *dev, struct mlx4_cq *cq)
{
- int i;
- int index = 0;
- int min = priv->eq_table.eq[0].load;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_cq_context *cq_context;
+ int err;
- for (i = 1; i < priv->dev.caps.num_comp_vectors; i++) {
- if (priv->eq_table.eq[i].load < min) {
- index = i;
- min = priv->eq_table.eq[i].load;
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ cq_context = mailbox->buf;
+ memset(cq_context, 0, sizeof *cq_context);
+
+ cq_context->flags |= cpu_to_be32(MLX4_CQ_FLAG_OI);
+
+ err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 3);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_ignore_overrun);
+
+int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cq_table *cq_table = &priv->cq_table;
+ int err;
+
+ *cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
+ if (*cqn == -1)
+ return -ENOMEM;
+
+ err = mlx4_table_get(dev, &cq_table->table, *cqn);
+ if (err)
+ goto err_out;
+
+ err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
+ if (err)
+ goto err_put;
+ return 0;
+
+err_put:
+ mlx4_table_put(dev, &cq_table->table, *cqn);
+
+err_out:
+ mlx4_bitmap_free(&cq_table->bitmap, *cqn, MLX4_NO_RR);
+ return err;
+}
+
+static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+ else {
+ *cqn = get_param_l(&out_param);
+ return 0;
}
}
+ return __mlx4_cq_alloc_icm(dev, cqn);
+}
- return index;
+void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cq_table *cq_table = &priv->cq_table;
+
+ mlx4_table_put(dev, &cq_table->cmpt_table, cqn);
+ mlx4_table_put(dev, &cq_table->table, cqn);
+ mlx4_bitmap_free(&cq_table->bitmap, cqn, MLX4_NO_RR);
}
-int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
- struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
- unsigned vector, int collapsed)
+static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, cqn);
+ err = mlx4_cmd(dev, in_param, RES_CQ, RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed freeing cq:%d\n", cqn);
+ } else
+ __mlx4_cq_free_icm(dev, cqn);
+}
+
+int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
+ struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec,
+ struct mlx4_cq *cq, unsigned vector, int collapsed,
+ int timestamp_en)
+{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cq_table *cq_table = &priv->cq_table;
struct mlx4_cmd_mailbox *mailbox;
@@ -214,29 +283,20 @@
u64 mtt_addr;
int err;
- cq->vector = (vector == MLX4_LEAST_ATTACHED_VECTOR) ?
- mlx4_find_least_loaded_vector(priv) : vector;
-
- if (cq->vector >= dev->caps.num_comp_vectors)
+ if (vector > dev->caps.num_comp_vectors + dev->caps.comp_pool)
return -EINVAL;
- cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
- if (cq->cqn == -1)
- return -ENOMEM;
+ cq->vector = vector;
- err = mlx4_table_get(dev, &cq_table->table, cq->cqn);
+ err = mlx4_cq_alloc_icm(dev, &cq->cqn);
if (err)
- goto err_out;
+ return err;
- err = mlx4_table_get(dev, &cq_table->cmpt_table, cq->cqn);
- if (err)
- goto err_put;
-
spin_lock_irq(&cq_table->lock);
err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
spin_unlock_irq(&cq_table->lock);
if (err)
- goto err_cmpt_put;
+ goto err_icm;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
@@ -248,8 +308,11 @@
memset(cq_context, 0, sizeof *cq_context);
cq_context->flags = cpu_to_be32(!!collapsed << 18);
+ if (timestamp_en)
+ cq_context->flags |= cpu_to_be32(1 << 19);
+
cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
- cq_context->comp_eqn = priv->eq_table.eq[cq->vector].eqn;
+ cq_context->comp_eqn = priv->eq_table.eq[vector].eqn;
cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
mtt_addr = mlx4_mtt_addr(dev, mtt);
@@ -262,7 +325,6 @@
if (err)
goto err_radix;
- priv->eq_table.eq[cq->vector].load++;
cq->cons_index = 0;
cq->arm_sn = 1;
cq->uar = uar;
@@ -269,6 +331,9 @@
atomic_set(&cq->refcount, 1);
init_completion(&cq->free);
+ cq->eqn = priv->eq_table.eq[cq->vector].eqn;
+ cq->irq = priv->eq_table.eq[cq->vector].irq;
+
return 0;
err_radix:
@@ -276,15 +341,9 @@
radix_tree_delete(&cq_table->tree, cq->cqn);
spin_unlock_irq(&cq_table->lock);
-err_cmpt_put:
- mlx4_table_put(dev, &cq_table->cmpt_table, cq->cqn);
+err_icm:
+ mlx4_cq_free_icm(dev, cq->cqn);
-err_put:
- mlx4_table_put(dev, &cq_table->table, cq->cqn);
-
-err_out:
- mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
-
return err;
}
EXPORT_SYMBOL_GPL(mlx4_cq_alloc);
@@ -300,7 +359,6 @@
mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
synchronize_irq(priv->eq_table.eq[cq->vector].irq);
- priv->eq_table.eq[cq->vector].load--;
spin_lock_irq(&cq_table->lock);
radix_tree_delete(&cq_table->tree, cq->cqn);
@@ -310,8 +368,7 @@
complete(&cq->free);
wait_for_completion(&cq->free);
- mlx4_table_put(dev, &cq_table->table, cq->cqn);
- mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
+ mlx4_cq_free_icm(dev, cq->cqn);
}
EXPORT_SYMBOL_GPL(mlx4_cq_free);
@@ -321,7 +378,10 @@
int err;
spin_lock_init(&cq_table->lock);
+ rwlock_init(&cq_table->cq_table_lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+ if (mlx4_is_slave(dev))
+ return 0;
err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs,
dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0);
@@ -333,6 +393,8 @@
void mlx4_cleanup_cq_table(struct mlx4_dev *dev)
{
+ if (mlx4_is_slave(dev))
+ return;
/* Nothing to do to clean up radix_tree */
mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap);
}
Modified: trunk/sys/ofed/drivers/net/mlx4/en_cq.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/en_cq.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/en_cq.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,12 +31,13 @@
*
*/
-#include "mlx4_en.h"
-
#include <linux/mlx4/cq.h>
#include <linux/mlx4/qp.h>
#include <linux/mlx4/cmd.h>
+#include "mlx4_en.h"
+
+
static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event)
{
return;
@@ -44,52 +45,72 @@
int mlx4_en_create_cq(struct mlx4_en_priv *priv,
- struct mlx4_en_cq *cq,
- int entries, int ring, enum cq_type mode)
+ struct mlx4_en_cq **pcq,
+ int entries, int ring, enum cq_type mode,
+ int node)
{
struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_cq *cq;
int err;
+ cq = kzalloc_node(sizeof(struct mlx4_en_cq), GFP_KERNEL, node);
+ if (!cq) {
+ cq = kzalloc(sizeof(struct mlx4_en_cq), GFP_KERNEL);
+ if (!cq) {
+ en_err(priv, "Failed to allocate CW struture\n");
+ return -ENOMEM;
+ }
+ }
+
cq->size = entries;
+ cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
+
cq->tq = taskqueue_create_fast("mlx4_en_que", M_NOWAIT,
- taskqueue_thread_enqueue, &cq->tq);
- if (mode == RX) {
- cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
- cq->vector = (ring + priv->port) %
- mdev->dev->caps.num_comp_vectors;
+ taskqueue_thread_enqueue, &cq->tq);
+ if (mode == RX) {
TASK_INIT(&cq->cq_task, 0, mlx4_en_rx_que, cq);
taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s rx cq",
- if_name(priv->dev));
+ if_name(priv->dev));
+
} else {
- cq->buf_size = sizeof(struct mlx4_cqe);
- cq->vector = MLX4_LEAST_ATTACHED_VECTOR;
TASK_INIT(&cq->cq_task, 0, mlx4_en_tx_que, cq);
taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s tx cq",
- if_name(priv->dev));
+ if_name(priv->dev));
}
cq->ring = ring;
cq->is_tx = mode;
- mtx_init(&cq->lock.m, "mlx4 cq", NULL, MTX_DEF);
+ spin_lock_init(&cq->lock);
err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
cq->buf_size, 2 * PAGE_SIZE);
if (err)
- return err;
+ goto err_cq;
err = mlx4_en_map_buffer(&cq->wqres.buf);
if (err)
- mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
- else
- cq->buf = (struct mlx4_cqe *) cq->wqres.buf.direct.buf;
+ goto err_res;
+ cq->buf = (struct mlx4_cqe *) cq->wqres.buf.direct.buf;
+ *pcq = cq;
+
+ return 0;
+
+err_res:
+ mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
+err_cq:
+ kfree(cq);
return err;
}
-int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+
+int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
+ int cq_idx)
{
struct mlx4_en_dev *mdev = priv->mdev;
- int err;
+ int err = 0;
+ char name[25];
+ int timestamp_en = 0;
cq->dev = mdev->pndev[priv->port];
cq->mcq.set_ci_db = cq->wqres.db.db;
@@ -98,11 +119,40 @@
*cq->mcq.arm_db = 0;
memset(cq->buf, 0, cq->buf_size);
+ if (cq->is_tx == RX) {
+ if (mdev->dev->caps.comp_pool) {
+ if (!cq->vector) {
+ sprintf(name, "%s-%d", if_name(priv->dev),
+ cq->ring);
+ /* Set IRQ for specific name (per ring) */
+ if (mlx4_assign_eq(mdev->dev, name, &cq->vector)) {
+ cq->vector = (cq->ring + 1 + priv->port)
+ % mdev->dev->caps.num_comp_vectors;
+ mlx4_warn(mdev, "Failed Assigning an EQ to "
+ "%s ,Falling back to legacy EQ's\n",
+ name);
+ }
+ }
+ } else {
+ cq->vector = (cq->ring + 1 + priv->port) %
+ mdev->dev->caps.num_comp_vectors;
+ }
+ } else {
+ struct mlx4_en_cq *rx_cq;
+ /*
+ * For TX we use the same irq per
+ * ring we assigned for the RX
+ */
+ cq_idx = cq_idx % priv->rx_ring_num;
+ rx_cq = priv->rx_cq[cq_idx];
+ cq->vector = rx_cq->vector;
+ }
+
if (!cq->is_tx)
- cq->size = priv->rx_ring[cq->ring].actual_size;
-
- err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
- cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
+ cq->size = priv->rx_ring[cq->ring]->actual_size;
+ err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
+ &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq,
+ cq->vector, 0, timestamp_en);
if (err)
return err;
@@ -109,39 +159,43 @@
cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq;
cq->mcq.event = mlx4_en_cq_event;
- if (cq->is_tx) {
- init_timer(&cq->timer);
- cq->timer.function = mlx4_en_poll_tx_cq;
- cq->timer.data = (unsigned long) cq;
- }
+ if (cq->is_tx) {
+ init_timer(&cq->timer);
+ cq->timer.function = mlx4_en_poll_tx_cq;
+ cq->timer.data = (unsigned long) cq;
+ }
+
return 0;
}
-void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
{
struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_cq *cq = *pcq;
taskqueue_drain(cq->tq, &cq->cq_task);
taskqueue_free(cq->tq);
mlx4_en_unmap_buffer(&cq->wqres.buf);
mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
- cq->buf_size = 0;
- cq->buf = NULL;
- mtx_destroy(&cq->lock.m);
+ if (priv->mdev->dev->caps.comp_pool && cq->vector)
+ mlx4_release_eq(priv->mdev->dev, cq->vector);
+ kfree(cq);
+ *pcq = NULL;
}
void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
{
- struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_dev *mdev = priv->mdev;
- taskqueue_drain(cq->tq, &cq->cq_task);
- if (cq->is_tx)
- del_timer(&cq->timer);
+ taskqueue_drain(cq->tq, &cq->cq_task);
+ if (cq->is_tx)
+ del_timer(&cq->timer);
- mlx4_cq_free(mdev->dev, &cq->mcq);
+ mlx4_cq_free(mdev->dev, &cq->mcq);
}
+
/* Set rx cq moderation parameters */
int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
{
Modified: trunk/sys/ofed/drivers/net/mlx4/en_main.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/en_main.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/en_main.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -34,6 +34,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
+#include <linux/slab.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/device.h>
@@ -41,15 +42,8 @@
#include "mlx4_en.h"
-MODULE_AUTHOR("Liran Liss, Yevgeny Petrilin");
-MODULE_DESCRIPTION("Mellanox ConnectX HCA Ethernet driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION " ("DRV_RELDATE")");
+/* Mellanox ConnectX HCA Ethernet driver */
-static const char mlx4_en_version[] =
- DRV_NAME ": Mellanox ConnectX HCA Ethernet driver v"
- DRV_VERSION " (" DRV_RELDATE ")\n";
-
#define MLX4_EN_PARM_INT(X, def_val, desc) \
static unsigned int X = def_val;\
module_param(X , uint, 0444); \
@@ -60,21 +54,10 @@
* Device scope module parameters
*/
-
-/* Enable RSS TCP traffic */
-MLX4_EN_PARM_INT(tcp_rss, 1,
- "Enable RSS for incomming TCP traffic or disabled (0)");
/* Enable RSS UDP traffic */
MLX4_EN_PARM_INT(udp_rss, 1,
- "Enable RSS for incomming UDP traffic or disabled (0)");
+ "Enable RSS for incoming UDP traffic");
-/* Number of LRO sessions per Rx ring (rounded up to a power of two) */
-MLX4_EN_PARM_INT(num_lro, MLX4_EN_MAX_LRO_DESCRIPTORS,
- "Number of LRO sessions per ring or disabled (0)");
-
-/* Allow reassembly of fragmented IP packets */
-MLX4_EN_PARM_INT(ip_reasm, 1, "Allow reassembly of fragmented IP packets (!0)");
-
/* Priority pausing */
MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]."
" Per priority bit mask");
@@ -81,19 +64,23 @@
MLX4_EN_PARM_INT(pfcrx, 0, "Priority based Flow Control policy on RX[7:0]."
" Per priority bit mask");
+#define MAX_PFC_TX 0xff
+#define MAX_PFC_RX 0xff
+
+
static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
{
struct mlx4_en_profile *params = &mdev->profile;
int i;
- params->tcp_rss = tcp_rss;
params->udp_rss = udp_rss;
- if (params->udp_rss && !mdev->dev->caps.udp_rss) {
+ params->num_tx_rings_p_up = min_t(int, mp_ncpus,
+ MLX4_EN_MAX_TX_RING_P_UP);
+ if (params->udp_rss && !(mdev->dev->caps.flags
+ & MLX4_DEV_CAP_FLAG_UDP_RSS)) {
mlx4_warn(mdev, "UDP RSS is not supported on this device.\n");
params->udp_rss = 0;
}
- params->num_lro = min_t(int, num_lro , MLX4_EN_MAX_LRO_DESCRIPTORS);
- params->ip_reasm = ip_reasm;
for (i = 1; i <= MLX4_MAX_PORTS; i++) {
params->prof[i].rx_pause = 1;
params->prof[i].rx_ppp = pfcrx;
@@ -101,14 +88,15 @@
params->prof[i].tx_ppp = pfctx;
params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
- params->prof[i].tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 +
- (!!pfcrx) * MLX4_EN_NUM_PPP_RINGS;
+ params->prof[i].tx_ring_num = params->num_tx_rings_p_up *
+ MLX4_EN_NUM_UP;
+ params->prof[i].rss_rings = 0;
}
return 0;
}
-static void *get_netdev(struct mlx4_dev *dev, void *ctx, u8 port)
+static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port)
{
struct mlx4_en_dev *endev = ctx;
@@ -116,18 +104,17 @@
}
static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
- enum mlx4_dev_event event, int port)
+ enum mlx4_dev_event event, unsigned long port)
{
struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr;
struct mlx4_en_priv *priv;
- if (!mdev->pndev[port])
- return;
-
- priv = netdev_priv(mdev->pndev[port]);
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
case MLX4_DEV_EVENT_PORT_DOWN:
+ if (!mdev->pndev[port])
+ return;
+ priv = netdev_priv(mdev->pndev[port]);
/* To prevent races, we poll the link state in a separate
task rather than changing it here */
priv->link_state = event;
@@ -138,8 +125,15 @@
mlx4_err(mdev, "Internal error detected, restarting device\n");
break;
+ case MLX4_DEV_EVENT_SLAVE_INIT:
+ case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+ break;
default:
- mlx4_warn(mdev, "Unhandled event: %d\n", event);
+ if (port < 1 || port > dev->caps.num_ports ||
+ !mdev->pndev[port])
+ return;
+ mlx4_warn(mdev, "Unhandled event %d for port %d\n", event,
+ (int) port);
}
}
@@ -146,7 +140,7 @@
static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
{
struct mlx4_en_dev *mdev = endev_ptr;
- int i;
+ int i, ret;
mutex_lock(&mdev->state_lock);
mdev->device_up = false;
@@ -158,26 +152,21 @@
flush_workqueue(mdev->workqueue);
destroy_workqueue(mdev->workqueue);
- mlx4_mr_free(dev, &mdev->mr);
+ ret = mlx4_mr_free(dev, &mdev->mr);
+ if (ret)
+ mlx4_err(mdev, "Error deregistering MR. The system may have become unstable.");
+ iounmap(mdev->uar_map);
mlx4_uar_free(dev, &mdev->priv_uar);
mlx4_pd_free(dev, mdev->priv_pdn);
- sx_destroy(&mdev->state_lock.sx);
- mtx_destroy(&mdev->uar_lock.m);
kfree(mdev);
}
static void *mlx4_en_add(struct mlx4_dev *dev)
{
- static int mlx4_en_version_printed;
struct mlx4_en_dev *mdev;
int i;
int err;
- if (!mlx4_en_version_printed) {
- printk(KERN_INFO "%s", mlx4_en_version);
- mlx4_en_version_printed++;
- }
-
mdev = kzalloc(sizeof *mdev, GFP_KERNEL);
if (!mdev) {
dev_err(&dev->pdev->dev, "Device struct alloc failed, "
@@ -192,10 +181,11 @@
if (mlx4_uar_alloc(dev, &mdev->priv_uar))
goto err_pd;
- mtx_init(&mdev->uar_lock.m, "mlx4 uar", NULL, MTX_DEF);
- mdev->uar_map = ioremap(mdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ mdev->uar_map = ioremap((phys_addr_t) mdev->priv_uar.pfn << PAGE_SHIFT,
+ PAGE_SIZE);
if (!mdev->uar_map)
goto err_uar;
+ spin_lock_init(&mdev->uar_lock);
mdev->dev = dev;
mdev->dma_device = &(dev->pdev->dev);
@@ -211,7 +201,7 @@
MLX4_PERM_LOCAL_WRITE | MLX4_PERM_LOCAL_READ,
0, 0, &mdev->mr)) {
mlx4_err(mdev, "Failed allocating memory region\n");
- goto err_uar;
+ goto err_map;
}
if (mlx4_mr_enable(mdev->dev, &mdev->mr)) {
mlx4_err(mdev, "Failed enabling memory region\n");
@@ -225,21 +215,24 @@
goto err_mr;
}
- /* Configure wich ports to start according to module parameters */
+ /* Configure which ports to start according to module parameters */
mdev->port_cnt = 0;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
mdev->port_cnt++;
- /* If we did not receive an explicit number of Rx rings, default to
- * the number of completion vectors populated by the mlx4_core */
+
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
- mlx4_info(mdev, "Using %d tx rings for port:%d\n",
- mdev->profile.prof[i].tx_ring_num, i);
- mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two(
- min_t(int, dev->caps.num_comp_vectors, MAX_RX_RINGS));
-
- mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n",
- mdev->profile.prof[i].rx_ring_num, i);
+ if (!dev->caps.comp_pool) {
+ mdev->profile.prof[i].rx_ring_num =
+ rounddown_pow_of_two(max_t(int, MIN_RX_RINGS,
+ min_t(int,
+ dev->caps.num_comp_vectors,
+ DEF_RX_RINGS)));
+ } else {
+ mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two(
+ min_t(int, dev->caps.comp_pool /
+ dev->caps.num_ports, MAX_MSIX_P_PORT));
+ }
}
/* Create our own workqueue for reset/multicast tasks
@@ -253,7 +246,7 @@
/* At this stage all non-port specific tasks are complete:
* mark the card state as up */
- sx_init(&mdev->state_lock.sx, "mlxen state");
+ mutex_init(&mdev->state_lock);
mdev->device_up = true;
/* Setup ports */
@@ -261,32 +254,20 @@
/* Create a netdev for each port */
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
mlx4_info(mdev, "Activating port:%d\n", i);
- if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) {
+ if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i]))
mdev->pndev[i] = NULL;
- goto err_free_netdev;
- }
}
+
return mdev;
-
-err_free_netdev:
- mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
- if (mdev->pndev[i])
- mlx4_en_destroy_netdev(mdev->pndev[i]);
- }
-
- mutex_lock(&mdev->state_lock);
- mdev->device_up = false;
- mutex_unlock(&mdev->state_lock);
- flush_workqueue(mdev->workqueue);
-
- /* Stop event queue before we drop down to release shared SW state */
- destroy_workqueue(mdev->workqueue);
-
err_mr:
- mlx4_mr_free(dev, &mdev->mr);
+ err = mlx4_mr_free(dev, &mdev->mr);
+ if (err)
+ mlx4_err(mdev, "Error deregistering MR. The system may have become unstable.");
+err_map:
+ if (mdev->uar_map)
+ iounmap(mdev->uar_map);
err_uar:
- mtx_destroy(&mdev->uar_lock.m);
mlx4_uar_free(dev, &mdev->priv_uar);
err_pd:
mlx4_pd_free(dev, mdev->priv_pdn);
@@ -296,67 +277,42 @@
return NULL;
}
-enum mlx4_query_reply mlx4_en_query(void *endev_ptr, void *int_dev)
+static struct mlx4_interface mlx4_en_interface = {
+ .add = mlx4_en_add,
+ .remove = mlx4_en_remove,
+ .event = mlx4_en_event,
+ .get_dev = mlx4_en_get_netdev,
+ .protocol = MLX4_PROT_ETH,
+};
+
+static void mlx4_en_verify_params(void)
{
- struct mlx4_en_dev *mdev = endev_ptr;
- struct net_device *netdev = int_dev;
- int p;
-
- for (p = 1; p <= MLX4_MAX_PORTS; ++p)
- if (mdev->pndev[p] == netdev)
- return p;
+ if (pfctx > MAX_PFC_TX) {
+ pr_warn("mlx4_en: WARNING: illegal module parameter pfctx 0x%x - "
+ "should be in range 0-0x%x, will be changed to default (0)\n",
+ pfctx, MAX_PFC_TX);
+ pfctx = 0;
+ }
- return MLX4_QUERY_NOT_MINE;
+ if (pfcrx > MAX_PFC_RX) {
+ pr_warn("mlx4_en: WARNING: illegal module parameter pfcrx 0x%x - "
+ "should be in range 0-0x%x, will be changed to default (0)\n",
+ pfcrx, MAX_PFC_RX);
+ pfcrx = 0;
+ }
}
-#if 0
-static struct pci_device_id mlx4_en_pci_table[] = {
- { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
- { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
- { PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
- { PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE Virt+ */
- { PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX EN 40GigE PCIe 2.0 5GT/s */
- { PCI_VDEVICE(MELLANOX, 0x6778) }, /* MT26488 ConnectX VPI PCIe 2.0 5GT/s - IB DDR / 10GigE Virt+ */
- { PCI_VDEVICE(MELLANOX, 0x1000) },
- { PCI_VDEVICE(MELLANOX, 0x1001) },
- { PCI_VDEVICE(MELLANOX, 0x1002) },
- { PCI_VDEVICE(MELLANOX, 0x1003) },
- { PCI_VDEVICE(MELLANOX, 0x1004) },
- { PCI_VDEVICE(MELLANOX, 0x1005) },
- { PCI_VDEVICE(MELLANOX, 0x1006) },
- { PCI_VDEVICE(MELLANOX, 0x1007) },
- { PCI_VDEVICE(MELLANOX, 0x1008) },
- { PCI_VDEVICE(MELLANOX, 0x1009) },
- { PCI_VDEVICE(MELLANOX, 0x100a) },
- { PCI_VDEVICE(MELLANOX, 0x100b) },
- { PCI_VDEVICE(MELLANOX, 0x100c) },
- { PCI_VDEVICE(MELLANOX, 0x100d) },
- { PCI_VDEVICE(MELLANOX, 0x100e) },
- { PCI_VDEVICE(MELLANOX, 0x100f) },
- { 0, }
-};
-MODULE_DEVICE_TABLE(pci, mlx4_en_pci_table);
-#endif
-
-static struct mlx4_interface mlx4_en_interface = {
- .add = mlx4_en_add,
- .remove = mlx4_en_remove,
- .event = mlx4_en_event,
- .query = mlx4_en_query,
- .get_prot_dev = get_netdev,
- .protocol = MLX4_PROT_EN,
-};
-
static int __init mlx4_en_init(void)
{
+ mlx4_en_verify_params();
+
+#ifdef CONFIG_DEBUG_FS
+ int err = 0;
+ err = mlx4_en_register_debugfs();
+ if (err)
+ pr_err(KERN_ERR "Failed to register debugfs\n");
+#endif
return mlx4_register_interface(&mlx4_en_interface);
}
@@ -363,6 +319,9 @@
static void __exit mlx4_en_cleanup(void)
{
mlx4_unregister_interface(&mlx4_en_interface);
+#ifdef CONFIG_DEBUG_FS
+ mlx4_en_unregister_debugfs();
+#endif
}
module_init(mlx4_en_init);
@@ -379,5 +338,5 @@
.name = "mlxen",
.evhand = mlxen_evhand,
};
-DECLARE_MODULE(mlxen, mlxen_mod, SI_SUB_SMP, SI_ORDER_ANY);
+DECLARE_MODULE(mlxen, mlxen_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY);
MODULE_DEPEND(mlxen, mlx4, 1, 1, 1);
Modified: trunk/sys/ofed/drivers/net/mlx4/en_netdev.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/en_netdev.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/en_netdev.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,201 +31,816 @@
*
*/
-#include "mlx4_en.h"
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#ifdef CONFIG_NET_RX_BUSY_POLL
+#include <net/busy_poll.h>
+#endif
+#include <linux/list.h>
+#include <linux/if_ether.h>
+
#include <linux/mlx4/driver.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/cmd.h>
#include <linux/mlx4/cq.h>
-#include <linux/delay.h>
-#include <net/ethernet.h>
-#include <net/if_vlan_var.h>
#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include "mlx4_en.h"
+#include "en_port.h"
+
static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv);
+static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv);
+static int mlx4_en_unit;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+/* must be called with local_bh_disable()d */
+static int mlx4_en_low_latency_recv(struct napi_struct *napi)
+{
+ struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
+ struct net_device *dev = cq->dev;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring];
+ int done;
+
+ if (!priv->port_up)
+ return LL_FLUSH_FAILED;
+
+ if (!mlx4_en_cq_lock_poll(cq))
+ return LL_FLUSH_BUSY;
+
+ done = mlx4_en_process_rx_cq(dev, cq, 4);
+#ifdef LL_EXTENDED_STATS
+ if (done)
+ rx_ring->cleaned += done;
+ else
+ rx_ring->misses++;
+#endif
+
+ mlx4_en_cq_unlock_poll(cq);
+
+ return done;
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+#ifdef CONFIG_RFS_ACCEL
+
+struct mlx4_en_filter {
+ struct list_head next;
+ struct work_struct work;
+
+ u8 ip_proto;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+
+ int rxq_index;
+ struct mlx4_en_priv *priv;
+ u32 flow_id; /* RFS infrastructure id */
+ int id; /* mlx4_en driver id */
+ u64 reg_id; /* Flow steering API id */
+ u8 activated; /* Used to prevent expiry before filter
+ * is attached
+ */
+ struct hlist_node filter_chain;
+};
+
+static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv);
+
+static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto)
+{
+ switch (ip_proto) {
+ case IPPROTO_UDP:
+ return MLX4_NET_TRANS_RULE_ID_UDP;
+ case IPPROTO_TCP:
+ return MLX4_NET_TRANS_RULE_ID_TCP;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+};
+
+static void mlx4_en_filter_work(struct work_struct *work)
+{
+ struct mlx4_en_filter *filter = container_of(work,
+ struct mlx4_en_filter,
+ work);
+ struct mlx4_en_priv *priv = filter->priv;
+ struct mlx4_spec_list spec_tcp_udp = {
+ .id = mlx4_ip_proto_to_trans_rule_id(filter->ip_proto),
+ {
+ .tcp_udp = {
+ .dst_port = filter->dst_port,
+ .dst_port_msk = (__force __be16)-1,
+ .src_port = filter->src_port,
+ .src_port_msk = (__force __be16)-1,
+ },
+ },
+ };
+ struct mlx4_spec_list spec_ip = {
+ .id = MLX4_NET_TRANS_RULE_ID_IPV4,
+ {
+ .ipv4 = {
+ .dst_ip = filter->dst_ip,
+ .dst_ip_msk = (__force __be32)-1,
+ .src_ip = filter->src_ip,
+ .src_ip_msk = (__force __be32)-1,
+ },
+ },
+ };
+ struct mlx4_spec_list spec_eth = {
+ .id = MLX4_NET_TRANS_RULE_ID_ETH,
+ };
+ struct mlx4_net_trans_rule rule = {
+ .list = LIST_HEAD_INIT(rule.list),
+ .queue_mode = MLX4_NET_TRANS_Q_LIFO,
+ .exclusive = 1,
+ .allow_loopback = 1,
+ .promisc_mode = MLX4_FS_REGULAR,
+ .port = priv->port,
+ .priority = MLX4_DOMAIN_RFS,
+ };
+ int rc;
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ if (spec_tcp_udp.id < 0) {
+ en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n",
+ filter->ip_proto);
+ goto ignore;
+ }
+ list_add_tail(&spec_eth.list, &rule.list);
+ list_add_tail(&spec_ip.list, &rule.list);
+ list_add_tail(&spec_tcp_udp.list, &rule.list);
+
+ rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn;
+ memcpy(spec_eth.eth.dst_mac, priv->dev->dev_addr, ETH_ALEN);
+ memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+
+ filter->activated = 0;
+
+ if (filter->reg_id) {
+ rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id);
+ if (rc && rc != -ENOENT)
+ en_err(priv, "Error detaching flow. rc = %d\n", rc);
+ }
+
+ rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id);
+ if (rc)
+ en_err(priv, "Error attaching flow. err = %d\n", rc);
+
+ignore:
+ mlx4_en_filter_rfs_expire(priv);
+
+ filter->activated = 1;
+}
+
+static inline struct hlist_head *
+filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip,
+ __be16 src_port, __be16 dst_port)
+{
+ unsigned long l;
+ int bucket_idx;
+
+ l = (__force unsigned long)src_port |
+ ((__force unsigned long)dst_port << 2);
+ l ^= (__force unsigned long)(src_ip ^ dst_ip);
+
+ bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT);
+
+ return &priv->filter_hash[bucket_idx];
+}
+
+static struct mlx4_en_filter *
+mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip,
+ __be32 dst_ip, u8 ip_proto, __be16 src_port,
+ __be16 dst_port, u32 flow_id)
+{
+ struct mlx4_en_filter *filter = NULL;
+
+ filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC);
+ if (!filter)
+ return NULL;
+
+ filter->priv = priv;
+ filter->rxq_index = rxq_index;
+ INIT_WORK(&filter->work, mlx4_en_filter_work);
+
+ filter->src_ip = src_ip;
+ filter->dst_ip = dst_ip;
+ filter->ip_proto = ip_proto;
+ filter->src_port = src_port;
+ filter->dst_port = dst_port;
+
+ filter->flow_id = flow_id;
+
+ filter->id = priv->last_filter_id++ % RPS_NO_FILTER;
+
+ list_add_tail(&filter->next, &priv->filters);
+ hlist_add_head(&filter->filter_chain,
+ filter_hash_bucket(priv, src_ip, dst_ip, src_port,
+ dst_port));
+
+ return filter;
+}
+
+static void mlx4_en_filter_free(struct mlx4_en_filter *filter)
+{
+ struct mlx4_en_priv *priv = filter->priv;
+ int rc;
+
+ list_del(&filter->next);
+
+ rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id);
+ if (rc && rc != -ENOENT)
+ en_err(priv, "Error detaching flow. rc = %d\n", rc);
+
+ kfree(filter);
+}
+
+static inline struct mlx4_en_filter *
+mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip,
+ u8 ip_proto, __be16 src_port, __be16 dst_port)
+{
+ struct hlist_node *elem;
+ struct mlx4_en_filter *filter;
+ struct mlx4_en_filter *ret = NULL;
+
+ hlist_for_each_entry(filter, elem,
+ filter_hash_bucket(priv, src_ip, dst_ip,
+ src_port, dst_port),
+ filter_chain) {
+ if (filter->src_ip == src_ip &&
+ filter->dst_ip == dst_ip &&
+ filter->ip_proto == ip_proto &&
+ filter->src_port == src_port &&
+ filter->dst_port == dst_port) {
+ ret = filter;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int
+mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id)
+{
+ struct mlx4_en_priv *priv = netdev_priv(net_dev);
+ struct mlx4_en_filter *filter;
+ const struct iphdr *ip;
+ const __be16 *ports;
+ u8 ip_proto;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+ int nhoff = skb_network_offset(skb);
+ int ret = 0;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return -EPROTONOSUPPORT;
+
+ ip = (const struct iphdr *)(skb->data + nhoff);
+ if (ip_is_fragment(ip))
+ return -EPROTONOSUPPORT;
+
+ if ((ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP))
+ return -EPROTONOSUPPORT;
+ ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl);
+
+ ip_proto = ip->protocol;
+ src_ip = ip->saddr;
+ dst_ip = ip->daddr;
+ src_port = ports[0];
+ dst_port = ports[1];
+
+ spin_lock_bh(&priv->filters_lock);
+ filter = mlx4_en_filter_find(priv, src_ip, dst_ip, ip_proto,
+ src_port, dst_port);
+ if (filter) {
+ if (filter->rxq_index == rxq_index)
+ goto out;
+
+ filter->rxq_index = rxq_index;
+ } else {
+ filter = mlx4_en_filter_alloc(priv, rxq_index,
+ src_ip, dst_ip, ip_proto,
+ src_port, dst_port, flow_id);
+ if (!filter) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+
+ queue_work(priv->mdev->workqueue, &filter->work);
+
+out:
+ ret = filter->id;
+err:
+ spin_unlock_bh(&priv->filters_lock);
+
+ return ret;
+}
+
+void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *rx_ring)
+{
+ struct mlx4_en_filter *filter, *tmp;
+ LIST_HEAD(del_list);
+
+ spin_lock_bh(&priv->filters_lock);
+ list_for_each_entry_safe(filter, tmp, &priv->filters, next) {
+ list_move(&filter->next, &del_list);
+ hlist_del(&filter->filter_chain);
+ }
+ spin_unlock_bh(&priv->filters_lock);
+
+ list_for_each_entry_safe(filter, tmp, &del_list, next) {
+ cancel_work_sync(&filter->work);
+ mlx4_en_filter_free(filter);
+ }
+}
+
+static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL;
+ LIST_HEAD(del_list);
+ int i = 0;
+
+ spin_lock_bh(&priv->filters_lock);
+ list_for_each_entry_safe(filter, tmp, &priv->filters, next) {
+ if (i > MLX4_EN_FILTER_EXPIRY_QUOTA)
+ break;
+
+ if (filter->activated &&
+ !work_pending(&filter->work) &&
+ rps_may_expire_flow(priv->dev,
+ filter->rxq_index, filter->flow_id,
+ filter->id)) {
+ list_move(&filter->next, &del_list);
+ hlist_del(&filter->filter_chain);
+ } else
+ last_filter = filter;
+
+ i++;
+ }
+
+ if (last_filter && (&last_filter->next != priv->filters.next))
+ list_move(&priv->filters, &last_filter->next);
+
+ spin_unlock_bh(&priv->filters_lock);
+
+ list_for_each_entry_safe(filter, tmp, &del_list, next)
+ mlx4_en_filter_free(filter);
+}
+#endif
+
static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err;
int idx;
- u8 field;
- if ((vid == 0) || (vid > 4095)) /* Invalid */
+ if (arg != priv)
return;
+
en_dbg(HW, priv, "adding VLAN:%d\n", vid);
- idx = vid >> 5;
- field = 1 << (vid & 0x1f);
- spin_lock(&priv->vlan_lock);
- priv->vlgrp_modified = true;
- if (priv->vlan_unregister[idx] & field)
- priv->vlan_unregister[idx] &= ~field;
- else
- priv->vlan_register[idx] |= field;
- priv->vlans[idx] |= field;
- spin_unlock(&priv->vlan_lock);
+
+ set_bit(vid, priv->active_vlans);
+
+ /* Add VID to port VLAN filter */
+ mutex_lock(&mdev->state_lock);
+ if (mdev->device_up && priv->port_up) {
+ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
+ if (err)
+ en_err(priv, "Failed configuring VLAN filter\n");
+ }
+ if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx))
+ en_dbg(HW, priv, "failed adding vlan %d\n", vid);
+ mutex_unlock(&mdev->state_lock);
+
}
static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
- int idx;
- u8 field;
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err;
- if ((vid == 0) || (vid > 4095)) /* Invalid */
+ if (arg != priv)
return;
+
en_dbg(HW, priv, "Killing VID:%d\n", vid);
- idx = vid >> 5;
- field = 1 << (vid & 0x1f);
- spin_lock(&priv->vlan_lock);
- priv->vlgrp_modified = true;
- if (priv->vlan_register[idx] & field)
- priv->vlan_register[idx] &= ~field;
- else
- priv->vlan_unregister[idx] |= field;
- priv->vlans[idx] &= ~field;
- spin_unlock(&priv->vlan_lock);
+
+ clear_bit(vid, priv->active_vlans);
+
+ /* Remove VID from port VLAN filter */
+ mutex_lock(&mdev->state_lock);
+ mlx4_unregister_vlan(mdev->dev, priv->port, vid);
+
+ if (mdev->device_up && priv->port_up) {
+ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
+ if (err)
+ en_err(priv, "Failed configuring VLAN filter\n");
+ }
+ mutex_unlock(&mdev->state_lock);
+
}
-u64 mlx4_en_mac_to_u64(u8 *addr)
+static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv,
+ unsigned char *mac, int *qpn, u64 *reg_id)
{
- u64 mac = 0;
- int i;
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_dev *dev = mdev->dev;
+ int err;
- for (i = 0; i < ETHER_ADDR_LEN; i++) {
- mac <<= 8;
- mac |= addr[i];
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_B0: {
+ struct mlx4_qp qp;
+ u8 gid[16] = {0};
+
+ qp.qpn = *qpn;
+ memcpy(&gid[10], mac, ETH_ALEN);
+ gid[5] = priv->port;
+
+ err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
+ break;
}
- return mac;
+ case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+ struct mlx4_spec_list spec_eth = { {NULL} };
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .allow_loopback = 1,
+ .promisc_mode = MLX4_FS_REGULAR,
+ .priority = MLX4_DOMAIN_NIC,
+ };
+
+ rule.port = priv->port;
+ rule.qpn = *qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN);
+ memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+ list_add_tail(&spec_eth.list, &rule.list);
+
+ err = mlx4_flow_attach(dev, &rule, reg_id);
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ if (err)
+ en_warn(priv, "Failed Attaching Unicast\n");
+
+ return err;
}
-static int mlx4_en_cache_mclist(struct net_device *dev, u64 **mcaddrp)
+static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv,
+ unsigned char *mac, int qpn, u64 reg_id)
{
- struct ifmultiaddr *ifma;;
- u64 *mcaddr;
- int cnt;
- int i;
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_dev *dev = mdev->dev;
- *mcaddrp = NULL;
-restart:
- cnt = 0;
- if_maddr_rlock(dev);
- TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
- if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen !=
- ETHER_ADDR_LEN)
- continue;
- cnt++;
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_B0: {
+ struct mlx4_qp qp;
+ u8 gid[16] = {0};
+
+ qp.qpn = qpn;
+ memcpy(&gid[10], mac, ETH_ALEN);
+ gid[5] = priv->port;
+
+ mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
+ break;
}
- if_maddr_runlock(dev);
- if (cnt == 0)
- return (0);
- mcaddr = kmalloc(sizeof(u64) * cnt, GFP_KERNEL);
- if (mcaddr == NULL)
- return (0);
- i = 0;
- if_maddr_rlock(dev);
- TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
- if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen !=
- ETHER_ADDR_LEN)
- continue;
- /* Make sure the list didn't grow. */
- if (i == cnt) {
- if_maddr_runlock(dev);
- kfree(mcaddr);
- goto restart;
- }
- mcaddr[i++] = mlx4_en_mac_to_u64(
- LLADDR((struct sockaddr_dl *)ifma->ifma_addr));
+ case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+ mlx4_flow_detach(dev, reg_id);
+ break;
}
- if_maddr_runlock(dev);
- *mcaddrp = mcaddr;
- return (i);
+ default:
+ en_err(priv, "Invalid steering mode.\n");
+ }
}
-
-static void mlx4_en_set_multicast(struct net_device *dev)
+static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
{
- struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_dev *dev = mdev->dev;
+ struct mlx4_mac_entry *entry;
+ int index = 0;
+ int err = 0;
+ u64 reg_id;
+ int *qpn = &priv->base_qpn;
+ u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev));
- if (!priv->port_up)
- return;
+ en_dbg(DRV, priv, "Registering MAC: %pM for adding\n",
+ IF_LLADDR(priv->dev));
+ index = mlx4_register_mac(dev, priv->port, mac);
+ if (index < 0) {
+ err = index;
+ en_err(priv, "Failed adding MAC: %pM\n",
+ IF_LLADDR(priv->dev));
+ return err;
+ }
- queue_work(priv->mdev->workqueue, &priv->mcast_task);
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
+ int base_qpn = mlx4_get_base_qpn(dev, priv->port);
+ *qpn = base_qpn + index;
+ return 0;
+ }
+
+ err = mlx4_qp_reserve_range(dev, 1, 1, qpn, 0);
+ en_dbg(DRV, priv, "Reserved qp %d\n", *qpn);
+ if (err) {
+ en_err(priv, "Failed to reserve qp for mac registration\n");
+ goto qp_err;
+ }
+
+ err = mlx4_en_uc_steer_add(priv, IF_LLADDR(priv->dev), qpn, ®_id);
+ if (err)
+ goto steer_err;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+ memcpy(entry->mac, IF_LLADDR(priv->dev), sizeof(entry->mac));
+ entry->reg_id = reg_id;
+
+ hlist_add_head(&entry->hlist,
+ &priv->mac_hash[entry->mac[MLX4_EN_MAC_HASH_IDX]]);
+
+ return 0;
+
+alloc_err:
+ mlx4_en_uc_steer_release(priv, IF_LLADDR(priv->dev), *qpn, reg_id);
+
+steer_err:
+ mlx4_qp_release_range(dev, *qpn, 1);
+
+qp_err:
+ mlx4_unregister_mac(dev, priv->port, mac);
+ return err;
}
-static void mlx4_en_do_set_multicast(struct work_struct *work)
+static void mlx4_en_put_qp(struct mlx4_en_priv *priv)
{
- struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
- mcast_task);
- struct net_device *dev = priv->dev;
struct mlx4_en_dev *mdev = priv->mdev;
- int err;
+ struct mlx4_dev *dev = mdev->dev;
+ int qpn = priv->base_qpn;
+ u64 mac;
- mutex_lock(&mdev->state_lock);
- if (!mdev->device_up) {
- en_dbg(HW, priv, "Card is not up, "
- "ignoring multicast change.\n");
- goto out;
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
+ mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev));
+ en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n",
+ IF_LLADDR(priv->dev));
+ mlx4_unregister_mac(dev, priv->port, mac);
+ } else {
+ struct mlx4_mac_entry *entry;
+ struct hlist_node *n, *tmp;
+ struct hlist_head *bucket;
+ unsigned int i;
+
+ for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) {
+ bucket = &priv->mac_hash[i];
+ hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) {
+ mac = mlx4_mac_to_u64(entry->mac);
+ en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n",
+ entry->mac);
+ mlx4_en_uc_steer_release(priv, entry->mac,
+ qpn, entry->reg_id);
+
+ mlx4_unregister_mac(dev, priv->port, mac);
+ hlist_del(&entry->hlist);
+ kfree(entry);
+ }
+ }
+
+ en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n",
+ priv->port, qpn);
+ mlx4_qp_release_range(dev, qpn, 1);
+ priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC;
}
- if (!priv->port_up) {
- en_dbg(HW, priv, "Port is down, "
- "ignoring multicast change.\n");
- goto out;
+}
+
+static void mlx4_en_clear_list(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_mc_list *tmp, *mc_to_del;
+
+ list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) {
+ list_del(&mc_to_del->list);
+ kfree(mc_to_del);
}
+}
- /*
- * Promsicuous mode: disable all filters
+static void mlx4_en_cache_mclist(struct net_device *dev)
+{
+ struct ifmultiaddr *ifma;
+ struct mlx4_en_mc_list *tmp;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if_maddr_rlock(dev);
+ TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen !=
+ ETHER_ADDR_LEN)
+ continue;
+ /* Make sure the list didn't grow. */
+ tmp = kzalloc(sizeof(struct mlx4_en_mc_list), GFP_ATOMIC);
+ if (tmp == NULL) {
+ en_err(priv, "Failed to allocate multicast list\n");
+ break;
+ }
+ memcpy(tmp->addr,
+ LLADDR((struct sockaddr_dl *)ifma->ifma_addr), ETH_ALEN);
+ list_add_tail(&tmp->list, &priv->mc_list);
+ }
+ if_maddr_runlock(dev);
+}
+
+static void update_mclist_flags(struct mlx4_en_priv *priv,
+ struct list_head *dst,
+ struct list_head *src)
+{
+ struct mlx4_en_mc_list *dst_tmp, *src_tmp, *new_mc;
+ bool found;
+
+ /* Find all the entries that should be removed from dst,
+ * These are the entries that are not found in src
*/
+ list_for_each_entry(dst_tmp, dst, list) {
+ found = false;
+ list_for_each_entry(src_tmp, src, list) {
+ if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ dst_tmp->action = MCLIST_REM;
+ }
- if (dev->if_flags & IFF_PROMISC) {
- if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) {
- priv->flags |= MLX4_EN_FLAG_PROMISC;
+ /* Add entries that exist in src but not in dst
+ * mark them as need to add
+ */
+ list_for_each_entry(src_tmp, src, list) {
+ found = false;
+ list_for_each_entry(dst_tmp, dst, list) {
+ if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) {
+ dst_tmp->action = MCLIST_NONE;
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ new_mc = kmalloc(sizeof(struct mlx4_en_mc_list),
+ GFP_KERNEL);
+ if (!new_mc) {
+ en_err(priv, "Failed to allocate current multicast list\n");
+ return;
+ }
+ memcpy(new_mc, src_tmp,
+ sizeof(struct mlx4_en_mc_list));
+ new_mc->action = MCLIST_ADD;
+ list_add_tail(&new_mc->list, dst);
+ }
+ }
+}
- /* Enable promiscouos mode */
- err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
- priv->base_qpn, 1);
+static void mlx4_en_set_rx_mode(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (!priv->port_up)
+ return;
+
+ queue_work(priv->mdev->workqueue, &priv->rx_mode_task);
+}
+
+static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv,
+ struct mlx4_en_dev *mdev)
+{
+ int err = 0;
+ if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) {
+ priv->flags |= MLX4_EN_FLAG_PROMISC;
+
+ /* Enable promiscouos mode */
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_add(mdev->dev,
+ priv->port,
+ priv->base_qpn,
+ MLX4_FS_ALL_DEFAULT);
if (err)
- en_err(priv, "Failed enabling "
- "promiscous mode\n");
+ en_err(priv, "Failed enabling promiscuous mode\n");
+ priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+ break;
- /* Disable port multicast filter (unconditionally) */
- err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
- 0, MLX4_MCAST_DISABLE);
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_unicast_promisc_add(mdev->dev,
+ priv->base_qpn,
+ priv->port);
if (err)
- en_err(priv, "Failed disabling "
- "multicast filter\n");
+ en_err(priv, "Failed enabling unicast promiscuous mode\n");
- /* Disable port VLAN filter */
- err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, NULL);
+ /* Add the default qp number as multicast
+ * promisc
+ */
+ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) {
+ err = mlx4_multicast_promisc_add(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ if (err)
+ en_err(priv, "Failed enabling multicast promiscuous mode\n");
+ priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+ }
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ err = mlx4_SET_PORT_qpn_calc(mdev->dev,
+ priv->port,
+ priv->base_qpn,
+ 1);
if (err)
- en_err(priv, "Failed disabling VLAN filter\n");
+ en_err(priv, "Failed enabling promiscuous mode\n");
+ break;
}
- goto out;
+
+ /* Disable port multicast filter (unconditionally) */
+ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
+ 0, MLX4_MCAST_DISABLE);
+ if (err)
+ en_err(priv, "Failed disabling multicast filter\n");
}
+}
- /*
- * Not in promiscous mode
- */
+static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv,
+ struct mlx4_en_dev *mdev)
+{
+ int err = 0;
- if (priv->flags & MLX4_EN_FLAG_PROMISC) {
- priv->flags &= ~MLX4_EN_FLAG_PROMISC;
+ priv->flags &= ~MLX4_EN_FLAG_PROMISC;
- /* Disable promiscouos mode */
- err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
- priv->base_qpn, 0);
+ /* Disable promiscouos mode */
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_ALL_DEFAULT);
if (err)
- en_err(priv, "Failed disabling promiscous mode\n");
+ en_err(priv, "Failed disabling promiscuous mode\n");
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ break;
- /* Enable port VLAN filter */
- err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlans);
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_unicast_promisc_remove(mdev->dev,
+ priv->base_qpn,
+ priv->port);
if (err)
- en_err(priv, "Failed enabling VLAN filter\n");
+ en_err(priv, "Failed disabling unicast promiscuous mode\n");
+ /* Disable Multicast promisc */
+ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
+ err = mlx4_multicast_promisc_remove(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ if (err)
+ en_err(priv, "Failed disabling multicast promiscuous mode\n");
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ }
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ err = mlx4_SET_PORT_qpn_calc(mdev->dev,
+ priv->port,
+ priv->base_qpn, 0);
+ if (err)
+ en_err(priv, "Failed disabling promiscuous mode\n");
+ break;
}
+}
+static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
+ struct net_device *dev,
+ struct mlx4_en_dev *mdev)
+{
+ struct mlx4_en_mc_list *mclist, *tmp;
+ u8 mc_list[16] = {0};
+ int err = 0;
+ u64 mcast_addr = 0;
+
+
/* Enable/disable the multicast filter according to IFF_ALLMULTI */
if (dev->if_flags & IFF_ALLMULTI) {
err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
@@ -232,11 +847,54 @@
0, MLX4_MCAST_DISABLE);
if (err)
en_err(priv, "Failed disabling multicast filter\n");
+
+ /* Add the default qp number as multicast promisc */
+ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) {
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_add(mdev->dev,
+ priv->port,
+ priv->base_qpn,
+ MLX4_FS_MC_DEFAULT);
+ break;
+
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_multicast_promisc_add(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ break;
+ }
+ if (err)
+ en_err(priv, "Failed entering multicast promisc mode\n");
+ priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+ }
} else {
- u64 *mcaddr;
- int mccount;
- int i;
+ /* Disable Multicast promisc */
+ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_MC_DEFAULT);
+ break;
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_multicast_promisc_remove(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ break;
+ }
+ if (err)
+ en_err(priv, "Failed disabling multicast promiscuous mode\n");
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ }
+
err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
0, MLX4_MCAST_DISABLE);
if (err)
@@ -248,17 +906,96 @@
/* Update multicast list - we cache all addresses so they won't
* change while HW is updated holding the command semaphor */
- mccount = mlx4_en_cache_mclist(dev, &mcaddr);
- for (i = 0; i < mccount; i++)
+ mlx4_en_cache_mclist(dev);
+ list_for_each_entry(mclist, &priv->mc_list, list) {
+ mcast_addr = mlx4_mac_to_u64(mclist->addr);
mlx4_SET_MCAST_FLTR(mdev->dev, priv->port,
- mcaddr[i], 0, MLX4_MCAST_CONFIG);
+ mcast_addr, 0, MLX4_MCAST_CONFIG);
+ }
err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
0, MLX4_MCAST_ENABLE);
if (err)
en_err(priv, "Failed enabling multicast filter\n");
- kfree(mcaddr);
+ update_mclist_flags(priv, &priv->curr_list, &priv->mc_list);
+ list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) {
+ if (mclist->action == MCLIST_REM) {
+ /* detach this address and delete from list */
+ memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
+ mc_list[5] = priv->port;
+ err = mlx4_multicast_detach(mdev->dev,
+ &priv->rss_map.indir_qp,
+ mc_list,
+ MLX4_PROT_ETH,
+ mclist->reg_id);
+ if (err)
+ en_err(priv, "Fail to detach multicast address\n");
+
+ /* remove from list */
+ list_del(&mclist->list);
+ kfree(mclist);
+ } else if (mclist->action == MCLIST_ADD) {
+ /* attach the address */
+ memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
+ /* needed for B0 steering support */
+ mc_list[5] = priv->port;
+ err = mlx4_multicast_attach(mdev->dev,
+ &priv->rss_map.indir_qp,
+ mc_list,
+ priv->port, 0,
+ MLX4_PROT_ETH,
+ &mclist->reg_id);
+ if (err)
+ en_err(priv, "Fail to attach multicast address\n");
+
+ }
+ }
}
+}
+
+static void mlx4_en_do_set_rx_mode(struct work_struct *work)
+{
+ struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+ rx_mode_task);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct net_device *dev = priv->dev;
+
+
+ mutex_lock(&mdev->state_lock);
+ if (!mdev->device_up) {
+ en_dbg(HW, priv, "Card is not up, ignoring rx mode change.\n");
+ goto out;
+ }
+ if (!priv->port_up) {
+ en_dbg(HW, priv, "Port is down, ignoring rx mode change.\n");
+ goto out;
+ }
+ if (!mlx4_en_QUERY_PORT(mdev, priv->port)) {
+ if (priv->port_state.link_state) {
+ priv->last_link_state = MLX4_DEV_EVENT_PORT_UP;
+ /* update netif baudrate */
+ priv->dev->if_baudrate =
+ IF_Mbps(priv->port_state.link_speed);
+ /* Important note: the following call for if_link_state_change
+ * is needed for interface up scenario (start port, link state
+ * change) */
+ if_link_state_change(priv->dev, LINK_STATE_UP);
+ en_dbg(HW, priv, "Link Up\n");
+ }
+ }
+
+ /* Promsicuous mode: disable all filters */
+ if ((dev->if_flags & IFF_PROMISC) ||
+ (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC)) {
+ mlx4_en_set_promisc_mode(priv, mdev);
+ goto out;
+ }
+
+ /* Not in promiscuous mode */
+ if (priv->flags & MLX4_EN_FLAG_PROMISC)
+ mlx4_en_clear_promisc_mode(priv, mdev);
+
+ mlx4_en_do_multicast(priv, dev, mdev);
out:
mutex_unlock(&mdev->state_lock);
}
@@ -272,7 +1009,7 @@
int i;
for (i = 0; i < priv->rx_ring_num; i++) {
- cq = &priv->rx_cq[i];
+ cq = priv->rx_cq[i];
spin_lock_irqsave(&cq->lock, flags);
napi_synchronize(&cq->napi);
mlx4_en_process_rx_cq(dev, cq, 0);
@@ -283,18 +1020,18 @@
static void mlx4_en_watchdog_timeout(void *arg)
{
- struct mlx4_en_priv *priv = arg;
- struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_priv *priv = arg;
+ struct mlx4_en_dev *mdev = priv->mdev;
- en_dbg(DRV, priv, "Scheduling watchdog\n");
- queue_work(mdev->workqueue, &priv->watchdog_task);
- if (priv->port_up)
- callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT,
- mlx4_en_watchdog_timeout, priv);
+ en_dbg(DRV, priv, "Scheduling watchdog\n");
+ queue_work(mdev->workqueue, &priv->watchdog_task);
+ if (priv->port_up)
+ callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT,
+ mlx4_en_watchdog_timeout, priv);
}
-/* XXX This clears user settings in too many cases. */
+
static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv)
{
struct mlx4_en_cq *cq;
@@ -301,7 +1038,7 @@
int i;
/* If we haven't received a specific coalescing setting
- * (module param), we set the moderation paramters as follows:
+ * (module param), we set the moderation parameters as follows:
* - moder_cnt is set to the number of mtu sized packets to
* satisfy our coelsing target.
* - moder_time is set to a fixed value.
@@ -308,21 +1045,26 @@
*/
priv->rx_frames = MLX4_EN_RX_COAL_TARGET / priv->dev->if_mtu + 1;
priv->rx_usecs = MLX4_EN_RX_COAL_TIME;
- en_dbg(INTR, priv, "Default coalesing params for mtu:%ld - "
- "rx_frames:%d rx_usecs:%d\n",
- priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs);
+ priv->tx_frames = MLX4_EN_TX_COAL_PKTS;
+ priv->tx_usecs = MLX4_EN_TX_COAL_TIME;
+ en_dbg(INTR, priv, "Default coalesing params for mtu: %u - "
+ "rx_frames:%d rx_usecs:%d\n",
+ (unsigned)priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs);
/* Setup cq moderation params */
for (i = 0; i < priv->rx_ring_num; i++) {
- cq = &priv->rx_cq[i];
+ cq = priv->rx_cq[i];
cq->moder_cnt = priv->rx_frames;
cq->moder_time = priv->rx_usecs;
+ priv->last_moder_time[i] = MLX4_EN_AUTO_CONF;
+ priv->last_moder_packets[i] = 0;
+ priv->last_moder_bytes[i] = 0;
}
for (i = 0; i < priv->tx_ring_num; i++) {
- cq = &priv->tx_cq[i];
- cq->moder_cnt = MLX4_EN_TX_COAL_PKTS;
- cq->moder_time = MLX4_EN_TX_COAL_TIME;
+ cq = priv->tx_cq[i];
+ cq->moder_cnt = priv->tx_frames;
+ cq->moder_time = priv->tx_usecs;
}
/* Reset auto-moderation params */
@@ -332,11 +1074,8 @@
priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH;
priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL;
priv->adaptive_rx_coal = 1;
- priv->last_moder_time = MLX4_EN_AUTO_CONF;
priv->last_moder_jiffies = 0;
- priv->last_moder_packets = 0;
priv->last_moder_tx_packets = 0;
- priv->last_moder_bytes = 0;
}
static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv)
@@ -348,45 +1087,31 @@
unsigned long avg_pkt_size;
unsigned long rx_packets;
unsigned long rx_bytes;
- unsigned long tx_packets;
- unsigned long tx_pkt_diff;
unsigned long rx_pkt_diff;
int moder_time;
- int i, err;
+ int ring, err;
if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ)
return;
- spin_lock(&priv->stats_lock);
- rx_packets = priv->dev->if_ipackets;
- rx_bytes = priv->dev->if_ibytes;
- tx_packets = priv->dev->if_opackets;
- spin_unlock(&priv->stats_lock);
+ for (ring = 0; ring < priv->rx_ring_num; ring++) {
+ spin_lock(&priv->stats_lock);
+ rx_packets = priv->rx_ring[ring]->packets;
+ rx_bytes = priv->rx_ring[ring]->bytes;
+ spin_unlock(&priv->stats_lock);
- if (!priv->last_moder_jiffies || !period)
- goto out;
+ rx_pkt_diff = ((unsigned long) (rx_packets -
+ priv->last_moder_packets[ring]));
+ packets = rx_pkt_diff;
+ rate = packets * HZ / period;
+ avg_pkt_size = packets ? ((unsigned long) (rx_bytes -
+ priv->last_moder_bytes[ring])) / packets : 0;
- tx_pkt_diff = ((unsigned long) (tx_packets -
- priv->last_moder_tx_packets));
- rx_pkt_diff = ((unsigned long) (rx_packets -
- priv->last_moder_packets));
- packets = max(tx_pkt_diff, rx_pkt_diff);
- rate = packets * HZ / period;
- avg_pkt_size = packets ? ((unsigned long) (rx_bytes -
- priv->last_moder_bytes)) / packets : 0;
-
- /* Apply auto-moderation only when packet rate exceeds a rate that
- * it matters */
- if (rate > MLX4_EN_RX_RATE_THRESH) {
- /* If tx and rx packet rates are not balanced, assume that
- * traffic is mainly BW bound and apply maximum moderation.
- * Otherwise, moderate according to packet rate */
- if (2 * tx_pkt_diff > 3 * rx_pkt_diff ||
- 2 * rx_pkt_diff > 3 * tx_pkt_diff) {
- moder_time = priv->rx_usecs_high;
- } else {
- if (rate < priv->pkt_rate_low ||
- avg_pkt_size < MLX4_EN_AVG_PKT_SMALL)
+ /* Apply auto-moderation only when packet rate
+ * exceeds a rate that it matters */
+ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) &&
+ avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) {
+ if (rate < priv->pkt_rate_low)
moder_time = priv->rx_usecs_low;
else if (rate > priv->pkt_rate_high)
moder_time = priv->rx_usecs_high;
@@ -395,82 +1120,26 @@
(priv->rx_usecs_high - priv->rx_usecs_low) /
(priv->pkt_rate_high - priv->pkt_rate_low) +
priv->rx_usecs_low;
+ } else {
+ moder_time = priv->rx_usecs_low;
}
- } else {
- /* When packet rate is low, use default moderation rather than
- * 0 to prevent interrupt storms if traffic suddenly increases */
- moder_time = priv->rx_usecs;
- }
- en_dbg(INTR, priv, "tx rate:%lu rx_rate:%lu\n",
- tx_pkt_diff * HZ / period, rx_pkt_diff * HZ / period);
-
- en_dbg(INTR, priv, "Rx moder_time changed from:%d to %d period:%lu "
- "[jiff] packets:%lu avg_pkt_size:%lu rate:%lu [p/s])\n",
- priv->last_moder_time, moder_time, period, packets,
- avg_pkt_size, rate);
-
- if (moder_time != priv->last_moder_time) {
- priv->last_moder_time = moder_time;
- for (i = 0; i < priv->rx_ring_num; i++) {
- cq = &priv->rx_cq[i];
+ if (moder_time != priv->last_moder_time[ring]) {
+ priv->last_moder_time[ring] = moder_time;
+ cq = priv->rx_cq[ring];
cq->moder_time = moder_time;
err = mlx4_en_set_cq_moder(priv, cq);
- if (err) {
- en_err(priv, "Failed modifying moderation for cq:%d\n", i);
- break;
- }
+ if (err)
+ en_err(priv, "Failed modifying moderation for cq:%d\n",
+ ring);
}
+ priv->last_moder_packets[ring] = rx_packets;
+ priv->last_moder_bytes[ring] = rx_bytes;
}
-out:
- priv->last_moder_packets = rx_packets;
- priv->last_moder_tx_packets = tx_packets;
- priv->last_moder_bytes = rx_bytes;
priv->last_moder_jiffies = jiffies;
}
-static void mlx4_en_handle_vlans(struct mlx4_en_priv *priv)
-{
- u8 vlan_register[VLAN_FLTR_SIZE];
- u8 vlan_unregister[VLAN_FLTR_SIZE];
- int i, j, idx;
- u16 vid;
-
- /* cache the vlan data for processing
- * done under lock to avoid changes during work */
- spin_lock(&priv->vlan_lock);
- for (i = 0; i < VLAN_FLTR_SIZE; i++) {
- vlan_register[i] = priv->vlan_register[i];
- priv->vlan_register[i] = 0;
- vlan_unregister[i] = priv->vlan_unregister[i];
- priv->vlan_unregister[i] = 0;
- }
- priv->vlgrp_modified = false;
- spin_unlock(&priv->vlan_lock);
-
- /* Configure the vlan filter
- * The vlgrp is updated with all the vids that need to be allowed */
- if (mlx4_SET_VLAN_FLTR(priv->mdev->dev, priv->port, priv->vlans))
- en_err(priv, "Failed configuring VLAN filter\n");
-
- /* Configure the VLAN table */
- for (i = 0; i < VLAN_FLTR_SIZE; i++) {
- for (j = 0; j < 32; j++) {
- vid = (i << 5) + j;
- if (vlan_register[i] & (1 << j))
- if (mlx4_register_vlan(priv->mdev->dev, priv->port, vid, &idx))
- en_dbg(HW, priv, "failed registering vlan %d\n", vid);
- if (vlan_unregister[i] & (1 << j)) {
- if (!mlx4_find_cached_vlan(priv->mdev->dev, priv->port, vid, &idx))
- mlx4_unregister_vlan(priv->mdev->dev, priv->port, idx);
- else
- en_dbg(HW, priv, "could not find vid %d in cache\n", vid);
- }
- }
- }
-}
-
static void mlx4_en_do_get_stats(struct work_struct *work)
{
struct delayed_work *delay = to_delayed_work(work);
@@ -479,16 +1148,12 @@
struct mlx4_en_dev *mdev = priv->mdev;
int err;
- err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0);
- if (err)
- en_dbg(HW, priv, "Could not update stats \n");
-
-
mutex_lock(&mdev->state_lock);
if (mdev->device_up) {
if (priv->port_up) {
- if (priv->vlgrp_modified)
- mlx4_en_handle_vlans(priv);
+ err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0);
+ if (err)
+ en_dbg(HW, priv, "Could not update stats\n");
mlx4_en_auto_moderation(priv);
}
@@ -495,10 +1160,23 @@
queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
}
- if (mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port]) {
- panic("mlx4_en_do_get_stats: Unexpected mac removed for %d\n",
- priv->port);
- mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port] = 0;
+ mutex_unlock(&mdev->state_lock);
+}
+
+/* mlx4_en_service_task - Run service task for tasks that needed to be done
+ * periodically
+ */
+static void mlx4_en_service_task(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv,
+ service_task);
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ mutex_lock(&mdev->state_lock);
+ if (mdev->device_up) {
+ queue_delayed_work(mdev->workqueue, &priv->service_task,
+ SERVICE_TASK_DELAY);
}
mutex_unlock(&mdev->state_lock);
}
@@ -515,8 +1193,22 @@
* report to system log */
if (priv->last_link_state != linkstate) {
if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) {
+ en_info(priv, "Link Down\n");
if_link_state_change(priv->dev, LINK_STATE_DOWN);
- } else {
+ /* update netif baudrate */
+ priv->dev->if_baudrate = 0;
+
+ /* make sure the port is up before notifying the OS.
+ * This is tricky since we get here on INIT_PORT and
+ * in such case we can't tell the OS the port is up.
+ * To solve this there is a call to if_link_state_change
+ * in set_rx_mode.
+ * */
+ } else if (priv->port_up && (linkstate == MLX4_DEV_EVENT_PORT_UP)){
+ if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
+ en_info(priv, "Query port failed\n");
+ priv->dev->if_baudrate =
+ IF_Mbps(priv->port_state.link_speed);
en_info(priv, "Link Up\n");
if_link_state_change(priv->dev, LINK_STATE_UP);
}
@@ -532,21 +1224,26 @@
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_cq *cq;
struct mlx4_en_tx_ring *tx_ring;
- u64 config;
int rx_index = 0;
int tx_index = 0;
int err = 0;
int i;
int j;
+ u8 mc_list[16] = {0};
+
if (priv->port_up) {
en_dbg(DRV, priv, "start port called while port already up\n");
return 0;
}
+ INIT_LIST_HEAD(&priv->mc_list);
+ INIT_LIST_HEAD(&priv->curr_list);
+ INIT_LIST_HEAD(&priv->ethtool_list);
+
/* Calculate Rx buf size */
dev->if_mtu = min(dev->if_mtu, priv->max_mtu);
- mlx4_en_calc_rx_buf(dev);
+ mlx4_en_calc_rx_buf(dev);
en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size);
/* Configure rx cq's and rings */
@@ -555,11 +1252,11 @@
en_err(priv, "Failed to activate RX rings\n");
return err;
}
-
for (i = 0; i < priv->rx_ring_num; i++) {
- cq = &priv->rx_cq[i];
+ cq = priv->rx_cq[i];
- err = mlx4_en_activate_cq(priv, cq);
+ mlx4_en_cq_init_lock(cq);
+ err = mlx4_en_activate_cq(priv, cq, i);
if (err) {
en_err(priv, "Failed activating Rx CQ\n");
goto cq_err;
@@ -573,23 +1270,43 @@
goto cq_err;
}
mlx4_en_arm_cq(priv, cq);
- priv->rx_ring[i].cqn = cq->mcq.cqn;
+ priv->rx_ring[i]->cqn = cq->mcq.cqn;
++rx_index;
}
+ /* Set qp number */
+ en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port);
+ err = mlx4_en_get_qp(priv);
+ if (err) {
+ en_err(priv, "Failed getting eth qp\n");
+ goto cq_err;
+ }
+ mdev->mac_removed[priv->port] = 0;
+
+ /* gets default allocated counter index from func cap */
+ /* or sink counter index if no resources */
+ priv->counter_index = mdev->dev->caps.def_counter_index[priv->port - 1];
+
+ en_dbg(DRV, priv, "%s: default counter index %d for port %d\n",
+ __func__, priv->counter_index, priv->port);
+
err = mlx4_en_config_rss_steer(priv);
if (err) {
en_err(priv, "Failed configuring rss steering\n");
- goto cq_err;
+ goto mac_err;
}
+ err = mlx4_en_create_drop_qp(priv);
+ if (err)
+ goto rss_err;
+
/* Configure tx cq's and rings */
for (i = 0; i < priv->tx_ring_num; i++) {
/* Configure cq */
- cq = &priv->tx_cq[i];
- err = mlx4_en_activate_cq(priv, cq);
+ cq = priv->tx_cq[i];
+ err = mlx4_en_activate_cq(priv, cq, i);
if (err) {
- en_err(priv, "Failed allocating Tx CQ\n");
+ en_err(priv, "Failed activating Tx CQ\n");
goto tx_err;
}
err = mlx4_en_set_cq_moder(priv, cq);
@@ -602,13 +1319,19 @@
cq->buf->wqe_index = cpu_to_be16(0xffff);
/* Configure ring */
- tx_ring = &priv->tx_ring[i];
- err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn);
+ tx_ring = priv->tx_ring[i];
+
+ err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn,
+ i / priv->num_tx_rings_p_up);
if (err) {
- en_err(priv, "Failed allocating Tx ring\n");
+ en_err(priv, "Failed activating Tx ring %d\n", i);
mlx4_en_deactivate_cq(priv, cq);
goto tx_err;
}
+
+ /* Arm CQ for TX completions */
+ mlx4_en_arm_cq(priv, cq);
+
/* Set initial ownership of all Tx TXBBs to SW (1) */
for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
*((u32 *) (tx_ring->buf + j)) = 0xffffffff;
@@ -617,14 +1340,14 @@
/* Configure port */
err = mlx4_SET_PORT_general(mdev->dev, priv->port,
- priv->rx_mb_size + ETHER_CRC_LEN,
+ priv->rx_mb_size,
priv->prof->tx_pause,
priv->prof->tx_ppp,
priv->prof->rx_pause,
priv->prof->rx_ppp);
if (err) {
- en_err(priv, "Failed setting port general configurations "
- "for port %d, with error %d\n", priv->port, err);
+ en_err(priv, "Failed setting port general configurations for port %d, with error %d\n",
+ priv->port, err);
goto tx_err;
}
/* Set default qp number */
@@ -633,16 +1356,6 @@
en_err(priv, "Failed setting default qp numbers\n");
goto tx_err;
}
- /* Set port mac number */
- en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port);
- err = mlx4_register_mac(mdev->dev, priv->port,
- mlx4_en_mac_to_u64(IF_LLADDR(dev)),
- &priv->mac_index);
- if (err) {
- en_err(priv, "Failed setting port mac\n");
- goto tx_err;
- }
- mdev->mac_removed[priv->port] = 0;
/* Init port */
en_dbg(HW, priv, "Initializing port\n");
@@ -649,71 +1362,54 @@
err = mlx4_INIT_PORT(mdev->dev, priv->port);
if (err) {
en_err(priv, "Failed Initializing port\n");
- goto mac_err;
+ goto tx_err;
}
- /* Set the various hardware offload abilities */
- dev->if_hwassist = 0;
- if (dev->if_capenable & IFCAP_TSO4)
- dev->if_hwassist |= CSUM_TSO;
- if (dev->if_capenable & IFCAP_TXCSUM)
- dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
- if (dev->if_capenable & IFCAP_RXCSUM)
- priv->rx_csum = 1;
- else
- priv->rx_csum = 0;
+ /* Attach rx QP to bradcast address */
+ memset(&mc_list[10], 0xff, ETH_ALEN);
+ mc_list[5] = priv->port; /* needed for B0 steering support */
+ if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list,
+ priv->port, 0, MLX4_PROT_ETH,
+ &priv->broadcast_id))
+ mlx4_warn(mdev, "Failed Attaching Broadcast\n");
- err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
- if (err) {
- en_err(priv, "Failed to get WoL info, unable to modify\n");
- goto wol_err;
- }
- if (dev->if_capenable & IFCAP_WOL_MAGIC) {
- config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED |
- MLX4_EN_WOL_MAGIC;
- } else {
- config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC);
- config |= MLX4_EN_WOL_DO_MODIFY;
- }
+ /* Must redo promiscuous mode setup. */
+ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC);
- err = mlx4_wol_write(priv->mdev->dev, config, priv->port);
- if (err) {
- en_err(priv, "Failed to set WoL information\n");
- goto wol_err;
- }
+ /* Schedule multicast task to populate multicast list */
+ queue_work(mdev->workqueue, &priv->rx_mode_task);
+ mlx4_set_stats_bitmap(mdev->dev, priv->stats_bitmap);
+
priv->port_up = true;
- /* Populate multicast list */
- mlx4_en_set_multicast(dev);
+ /* Enable the queues. */
+ dev->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ dev->if_drv_flags |= IFF_DRV_RUNNING;
+#ifdef CONFIG_DEBUG_FS
+ mlx4_en_create_debug_files(priv);
+#endif
+ callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT,
+ mlx4_en_watchdog_timeout, priv);
- /* Enable the queues. */
- atomic_clear_int(&dev->if_drv_flags, IFF_DRV_OACTIVE);
- atomic_set_int(&dev->if_drv_flags, IFF_DRV_RUNNING);
- callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT,
- mlx4_en_watchdog_timeout, priv);
-
return 0;
-wol_err:
- /* close port*/
- mlx4_CLOSE_PORT(mdev->dev, priv->port);
-
-mac_err:
- mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
tx_err:
while (tx_index--) {
- mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]);
- mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]);
+ mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]);
+ mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]);
}
-
+ mlx4_en_destroy_drop_qp(priv);
+rss_err:
mlx4_en_release_rss_steer(priv);
+mac_err:
+ mlx4_en_put_qp(priv);
cq_err:
while (rx_index--)
- mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]);
+ mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]);
for (i = 0; i < priv->rx_ring_num; i++)
- mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
+ mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
return err; /* need to close devices */
}
@@ -723,7 +1419,9 @@
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_mc_list *mclist, *tmp;
int i;
+ u8 mc_list[16] = {0};
if (!priv->port_up) {
en_dbg(DRV, priv, "stop port called while port already down\n");
@@ -730,38 +1428,94 @@
return;
}
+#ifdef CONFIG_DEBUG_FS
+ mlx4_en_delete_debug_files(priv);
+#endif
+
+ /* close port*/
+ mlx4_CLOSE_PORT(mdev->dev, priv->port);
+
/* Set port as not active */
priv->port_up = false;
+ if (priv->counter_index != 0xff) {
+ mlx4_counter_free(mdev->dev, priv->port, priv->counter_index);
+ priv->counter_index = 0xff;
+ }
- /* Unregister Mac address for the port */
- mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
- mdev->mac_removed[priv->port] = 1;
+ /* Promsicuous mode */
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ priv->flags &= ~(MLX4_EN_FLAG_PROMISC |
+ MLX4_EN_FLAG_MC_PROMISC);
+ mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_ALL_DEFAULT);
+ mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_MC_DEFAULT);
+ } else if (priv->flags & MLX4_EN_FLAG_PROMISC) {
+ priv->flags &= ~MLX4_EN_FLAG_PROMISC;
+ /* Disable promiscouos mode */
+ mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn,
+ priv->port);
+
+ /* Disable Multicast promisc */
+ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
+ mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn,
+ priv->port);
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ }
+ }
+
+ /* Detach All multicasts */
+ memset(&mc_list[10], 0xff, ETH_ALEN);
+ mc_list[5] = priv->port; /* needed for B0 steering support */
+ mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list,
+ MLX4_PROT_ETH, priv->broadcast_id);
+ list_for_each_entry(mclist, &priv->curr_list, list) {
+ memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
+ mc_list[5] = priv->port;
+ mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp,
+ mc_list, MLX4_PROT_ETH, mclist->reg_id);
+ }
+ mlx4_en_clear_list(dev);
+ list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) {
+ list_del(&mclist->list);
+ kfree(mclist);
+ }
+
+ /* Flush multicast filter */
+ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG);
+ mlx4_en_destroy_drop_qp(priv);
+
/* Free TX Rings */
for (i = 0; i < priv->tx_ring_num; i++) {
- mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]);
- mlx4_en_deactivate_cq(priv, &priv->tx_cq[i]);
+ mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]);
+ mlx4_en_deactivate_cq(priv, priv->tx_cq[i]);
}
msleep(10);
for (i = 0; i < priv->tx_ring_num; i++)
- mlx4_en_free_tx_buf(dev, &priv->tx_ring[i]);
+ mlx4_en_free_tx_buf(dev, priv->tx_ring[i]);
/* Free RSS qps */
mlx4_en_release_rss_steer(priv);
+ /* Unregister Mac address for the port */
+ mlx4_en_put_qp(priv);
+ mdev->mac_removed[priv->port] = 1;
+
/* Free RX Rings */
for (i = 0; i < priv->rx_ring_num; i++) {
- mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
- mlx4_en_deactivate_cq(priv, &priv->rx_cq[i]);
+ struct mlx4_en_cq *cq = priv->rx_cq[i];
+ mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
+ mlx4_en_deactivate_cq(priv, cq);
}
- /* close port*/
- mlx4_CLOSE_PORT(mdev->dev, priv->port);
+ callout_stop(&priv->watchdog_timer);
- callout_stop(&priv->watchdog_timer);
-
- atomic_clear_int(&dev->if_drv_flags, IFF_DRV_RUNNING);
+ dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
}
static void mlx4_en_restart(struct work_struct *work)
@@ -773,12 +1527,13 @@
struct mlx4_en_tx_ring *ring;
int i;
+
if (priv->blocked == 0 || priv->port_up == 0)
return;
for (i = 0; i < priv->tx_ring_num; i++) {
- ring = &priv->tx_ring[i];
+ ring = priv->tx_ring[i];
if (ring->blocked &&
- ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks)
+ ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks)
goto reset;
}
return;
@@ -790,6 +1545,8 @@
mutex_lock(&mdev->state_lock);
if (priv->port_up) {
mlx4_en_stop_port(dev);
+ //for (i = 0; i < priv->tx_ring_num; i++)
+ // netdev_tx_reset_queue(priv->tx_ring[i]->tx_queue);
if (mlx4_en_start_port(dev))
en_err(priv, "Failed restarting port %d\n", priv->port);
}
@@ -796,21 +1553,49 @@
mutex_unlock(&mdev->state_lock);
}
-
-static void
-mlx4_en_init(void *arg)
+static void mlx4_en_clear_stats(struct net_device *dev)
{
- struct mlx4_en_priv *priv;
- struct mlx4_en_dev *mdev;
- struct ifnet *dev;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
int i;
- priv = arg;
- dev = priv->dev;
- mdev = priv->mdev;
+ if (!mlx4_is_slave(mdev->dev))
+ if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1))
+ en_dbg(HW, priv, "Failed dumping statistics\n");
+
+ memset(&priv->pstats, 0, sizeof(priv->pstats));
+ memset(&priv->pkstats, 0, sizeof(priv->pkstats));
+ memset(&priv->port_stats, 0, sizeof(priv->port_stats));
+ memset(&priv->vport_stats, 0, sizeof(priv->vport_stats));
+
+ for (i = 0; i < priv->tx_ring_num; i++) {
+ priv->tx_ring[i]->bytes = 0;
+ priv->tx_ring[i]->packets = 0;
+ priv->tx_ring[i]->tx_csum = 0;
+ priv->tx_ring[i]->oversized_packets = 0;
+ }
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ priv->rx_ring[i]->bytes = 0;
+ priv->rx_ring[i]->packets = 0;
+ priv->rx_ring[i]->csum_ok = 0;
+ priv->rx_ring[i]->csum_none = 0;
+ }
+}
+
+static void mlx4_en_open(void* arg)
+{
+
+ struct mlx4_en_priv *priv;
+ struct mlx4_en_dev *mdev;
+ struct net_device *dev;
+ int err = 0;
+
+ priv = arg;
+ mdev = priv->mdev;
+ dev = priv->dev;
+
+
mutex_lock(&mdev->state_lock);
- if (dev->if_drv_flags & IFF_DRV_RUNNING)
- mlx4_en_stop_port(dev);
if (!mdev->device_up) {
en_err(priv, "Cannot open - device down/disabled\n");
@@ -817,27 +1602,16 @@
goto out;
}
- /* Reset HW statistics and performance counters */
- if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1))
- en_dbg(HW, priv, "Failed dumping statistics\n");
+ /* Reset HW statistics and SW counters */
+ mlx4_en_clear_stats(dev);
- memset(&priv->pstats, 0, sizeof(priv->pstats));
-
- for (i = 0; i < priv->tx_ring_num; i++) {
- priv->tx_ring[i].bytes = 0;
- priv->tx_ring[i].packets = 0;
- }
- for (i = 0; i < priv->rx_ring_num; i++) {
- priv->rx_ring[i].bytes = 0;
- priv->rx_ring[i].packets = 0;
- }
-
- mlx4_en_set_default_moderation(priv);
- if (mlx4_en_start_port(dev))
+ err = mlx4_en_start_port(dev);
+ if (err)
en_err(priv, "Failed starting port:%d\n", priv->port);
out:
mutex_unlock(&mdev->state_lock);
+ return;
}
void mlx4_en_free_resources(struct mlx4_en_priv *priv)
@@ -844,23 +1618,30 @@
{
int i;
+#ifdef CONFIG_RFS_ACCEL
+ if (priv->dev->rx_cpu_rmap) {
+ free_irq_cpu_rmap(priv->dev->rx_cpu_rmap);
+ priv->dev->rx_cpu_rmap = NULL;
+ }
+#endif
+
for (i = 0; i < priv->tx_ring_num; i++) {
- if (priv->tx_ring[i].tx_info)
+ if (priv->tx_ring && priv->tx_ring[i])
mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]);
- if (priv->tx_cq[i].buf)
+ if (priv->tx_cq && priv->tx_cq[i])
mlx4_en_destroy_cq(priv, &priv->tx_cq[i]);
}
for (i = 0; i < priv->rx_ring_num; i++) {
- if (priv->rx_ring[i].rx_info)
- mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i]);
- if (priv->rx_cq[i].buf)
+ if (priv->rx_ring[i])
+ mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i],
+ priv->prof->rx_ring_size, priv->stride);
+ if (priv->rx_cq[i])
mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
}
- /* Free the stats tree when we resize the rings. */
+
if (priv->sysctl)
sysctl_ctx_free(&priv->stat_ctx);
-
}
int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
@@ -867,44 +1648,71 @@
{
struct mlx4_en_port_profile *prof = priv->prof;
int i;
+ int node = 0;
- /* Create tx Rings */
- for (i = 0; i < priv->tx_ring_num; i++) {
- if (mlx4_en_create_cq(priv, &priv->tx_cq[i],
- prof->tx_ring_size, i, TX))
+ /* Create rx Rings */
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ if (mlx4_en_create_cq(priv, &priv->rx_cq[i],
+ prof->rx_ring_size, i, RX, node))
goto err;
- if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i],
- prof->tx_ring_size, TXBB_SIZE))
+ if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
+ prof->rx_ring_size, node))
goto err;
}
- /* Create rx Rings */
- for (i = 0; i < priv->rx_ring_num; i++) {
- if (mlx4_en_create_cq(priv, &priv->rx_cq[i],
- prof->rx_ring_size, i, RX))
+ /* Create tx Rings */
+ for (i = 0; i < priv->tx_ring_num; i++) {
+ if (mlx4_en_create_cq(priv, &priv->tx_cq[i],
+ prof->tx_ring_size, i, TX, node))
goto err;
- if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
- prof->rx_ring_size))
+ if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i],
+ prof->tx_ring_size, TXBB_SIZE, node, i))
goto err;
}
- /* Re-create stat sysctls in case the number of rings changed. */
+#ifdef CONFIG_RFS_ACCEL
+ priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->rx_ring_num);
+ if (!priv->dev->rx_cpu_rmap)
+ goto err;
+#endif
+ /* Re-create stat sysctls in case the number of rings changed. */
mlx4_en_sysctl_stat(priv);
-
- /* Populate Tx priority mappings */
- mlx4_en_set_prio_map(priv, priv->tx_prio_map,
- priv->tx_ring_num - MLX4_EN_NUM_HASH_RINGS);
-
return 0;
err:
en_err(priv, "Failed to allocate NIC resources\n");
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ if (priv->rx_ring[i])
+ mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i],
+ prof->rx_ring_size,
+ priv->stride);
+ if (priv->rx_cq[i])
+ mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
+ }
+ for (i = 0; i < priv->tx_ring_num; i++) {
+ if (priv->tx_ring[i])
+ mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]);
+ if (priv->tx_cq[i])
+ mlx4_en_destroy_cq(priv, &priv->tx_cq[i]);
+ }
+ priv->port_up = false;
return -ENOMEM;
}
+struct en_port_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct en_port *, struct en_port_attribute *, char *buf);
+ ssize_t (*store)(struct en_port *, struct en_port_attribute *, char *buf, size_t count);
+};
+#define PORT_ATTR_RO(_name) \
+struct en_port_attribute en_port_attr_##_name = __ATTR_RO(_name)
+
+#define EN_PORT_ATTR(_name, _mode, _show, _store) \
+struct en_port_attribute en_port_attr_##_name = __ATTR(_name, _mode, _show, _store)
+
void mlx4_en_destroy_netdev(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -912,24 +1720,31 @@
en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
- if (priv->vlan_attach != NULL)
- EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
- if (priv->vlan_detach != NULL)
- EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
+ if (priv->vlan_attach != NULL)
+ EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
+ if (priv->vlan_detach != NULL)
+ EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
/* Unregister device - this will close the port if it was up */
- if (priv->registered)
+ if (priv->registered) {
+ mutex_lock(&mdev->state_lock);
ether_ifdetach(dev);
+ mutex_unlock(&mdev->state_lock);
+ }
if (priv->allocated)
mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE);
- if (priv->sysctl)
- sysctl_ctx_free(&priv->conf_ctx);
+ mutex_lock(&mdev->state_lock);
+ mlx4_en_stop_port(dev);
+ mutex_unlock(&mdev->state_lock);
+
cancel_delayed_work(&priv->stats_task);
+ cancel_delayed_work(&priv->service_task);
/* flush any pending task for this netdev */
flush_workqueue(mdev->workqueue);
+ callout_drain(&priv->watchdog_timer);
/* Detach the netdev so tasks would not attempt to access it */
mutex_lock(&mdev->state_lock);
@@ -936,11 +1751,19 @@
mdev->pndev[priv->port] = NULL;
mutex_unlock(&mdev->state_lock);
+
mlx4_en_free_resources(priv);
- mtx_destroy(&priv->stats_lock.m);
- mtx_destroy(&priv->vlan_lock.m);
- kfree(priv);
- if_free(dev);
+
+ /* freeing the sysctl conf cannot be called from within mlx4_en_free_resources */
+ if (priv->sysctl)
+ sysctl_ctx_free(&priv->conf_ctx);
+
+ kfree(priv->tx_ring);
+ kfree(priv->tx_cq);
+
+ kfree(priv);
+ if_free(dev);
+
}
static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
@@ -949,8 +1772,8 @@
struct mlx4_en_dev *mdev = priv->mdev;
int err = 0;
- en_dbg(DRV, priv, "Change MTU called - current:%ld new:%d\n",
- dev->if_mtu, new_mtu);
+ en_dbg(DRV, priv, "Change MTU called - current:%u new:%u\n",
+ (unsigned)dev->if_mtu, (unsigned)new_mtu);
if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) {
en_err(priv, "Bad MTU size:%d.\n", new_mtu);
@@ -961,15 +1784,14 @@
if (dev->if_drv_flags & IFF_DRV_RUNNING) {
if (!mdev->device_up) {
/* NIC is probably restarting - let watchdog task reset
- * the port */
+ * * the port */
en_dbg(DRV, priv, "Change MTU called with card down!?\n");
} else {
mlx4_en_stop_port(dev);
- mlx4_en_set_default_moderation(priv);
err = mlx4_en_start_port(dev);
if (err) {
en_err(priv, "Failed restarting port:%d\n",
- priv->port);
+ priv->port);
queue_work(mdev->workqueue, &priv->watchdog_task);
}
}
@@ -986,8 +1808,6 @@
active = IFM_ETHER;
if (priv->last_link_state == MLX4_DEV_EVENT_PORT_DOWN)
return (active);
- if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
- return (active);
active |= IFM_FDX;
trans_type = priv->port_state.transciver;
/* XXX I don't know all of the transceiver values. */
@@ -1013,7 +1833,6 @@
return (active);
}
-
static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
{
struct mlx4_en_priv *priv;
@@ -1048,9 +1867,10 @@
case IFM_10G_SR:
case IFM_10G_CX4:
case IFM_1000_T:
- if (IFM_SUBTYPE(ifm->ifm_media) ==
- IFM_SUBTYPE(mlx4_en_calc_media(priv)) &&
- (ifm->ifm_media & IFM_FDX))
+ case IFM_40G_CR4:
+ if ((IFM_SUBTYPE(ifm->ifm_media)
+ == IFM_SUBTYPE(mlx4_en_calc_media(priv)))
+ && (ifm->ifm_media & IFM_FDX))
break;
/* Fallthrough */
default:
@@ -1087,6 +1907,7 @@
mdev = priv->mdev;
ifr = (struct ifreq *) data;
switch (command) {
+
case SIOCSIFMTU:
error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu);
break;
@@ -1096,8 +1917,9 @@
mutex_lock(&mdev->state_lock);
mlx4_en_start_port(dev);
mutex_unlock(&mdev->state_lock);
- } else
- mlx4_en_set_multicast(dev);
+ } else {
+ mlx4_en_set_rx_mode(dev);
+ }
} else {
mutex_lock(&mdev->state_lock);
if (dev->if_drv_flags & IFF_DRV_RUNNING) {
@@ -1109,7 +1931,7 @@
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
- mlx4_en_set_multicast(dev);
+ mlx4_en_set_rx_mode(dev);
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
@@ -1116,11 +1938,59 @@
error = ifmedia_ioctl(dev, ifr, &priv->media, command);
break;
case SIOCSIFCAP:
+ mutex_lock(&mdev->state_lock);
mask = ifr->ifr_reqcap ^ dev->if_capenable;
- if (mask & IFCAP_HWCSUM)
- dev->if_capenable ^= IFCAP_HWCSUM;
- if (mask & IFCAP_TSO4)
+ if (mask & IFCAP_TXCSUM) {
+ dev->if_capenable ^= IFCAP_TXCSUM;
+ dev->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
+
+ if (IFCAP_TSO4 & dev->if_capenable &&
+ !(IFCAP_TXCSUM & dev->if_capenable)) {
+ dev->if_capenable &= ~IFCAP_TSO4;
+ dev->if_hwassist &= ~CSUM_TSO;
+ if_printf(dev,
+ "tso4 disabled due to -txcsum.\n");
+ }
+ }
+ if (mask & IFCAP_TXCSUM_IPV6) {
+ dev->if_capenable ^= IFCAP_TXCSUM_IPV6;
+ dev->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
+
+ if (IFCAP_TSO6 & dev->if_capenable &&
+ !(IFCAP_TXCSUM_IPV6 & dev->if_capenable)) {
+ dev->if_capenable &= ~IFCAP_TSO6;
+ dev->if_hwassist &= ~CSUM_TSO;
+ if_printf(dev,
+ "tso6 disabled due to -txcsum6.\n");
+ }
+ }
+ if (mask & IFCAP_RXCSUM)
+ dev->if_capenable ^= IFCAP_RXCSUM;
+ if (mask & IFCAP_RXCSUM_IPV6)
+ dev->if_capenable ^= IFCAP_RXCSUM_IPV6;
+
+ if (mask & IFCAP_TSO4) {
+ if (!(IFCAP_TSO4 & dev->if_capenable) &&
+ !(IFCAP_TXCSUM & dev->if_capenable)) {
+ if_printf(dev, "enable txcsum first.\n");
+ error = EAGAIN;
+ goto out;
+ }
dev->if_capenable ^= IFCAP_TSO4;
+ }
+ if (mask & IFCAP_TSO6) {
+ if (!(IFCAP_TSO6 & dev->if_capenable) &&
+ !(IFCAP_TXCSUM_IPV6 & dev->if_capenable)) {
+ if_printf(dev, "enable txcsum6 first.\n");
+ error = EAGAIN;
+ goto out;
+ }
+ dev->if_capenable ^= IFCAP_TSO6;
+ }
+ if (dev->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6))
+ dev->if_hwassist |= CSUM_TSO;
+ else
+ dev->if_hwassist &= ~CSUM_TSO;
if (mask & IFCAP_LRO)
dev->if_capenable ^= IFCAP_LRO;
if (mask & IFCAP_VLAN_HWTAGGING)
@@ -1130,9 +2000,36 @@
if (mask & IFCAP_WOL_MAGIC)
dev->if_capenable ^= IFCAP_WOL_MAGIC;
if (dev->if_drv_flags & IFF_DRV_RUNNING)
- mlx4_en_init(priv);
+ mlx4_en_start_port(dev);
+out:
+ mutex_unlock(&mdev->state_lock);
VLAN_CAPABILITIES(dev);
break;
+#if __FreeBSD_version >= 1100036
+ case SIOCGI2C: {
+ struct ifi2creq i2c;
+
+ error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
+ if (error)
+ break;
+ if (i2c.len > sizeof(i2c.data)) {
+ error = EINVAL;
+ break;
+ }
+ /*
+ * Note that we ignore i2c.addr here. The driver hardcodes
+ * the address to 0x50, while standard expects it to be 0xA0.
+ */
+ error = mlx4_get_module_info(mdev->dev, priv->port,
+ i2c.offset, i2c.len, i2c.data);
+ if (error < 0) {
+ error = -error;
+ break;
+ }
+ error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
+ break;
+ }
+#endif
default:
error = ether_ioctl(dev, command, data);
break;
@@ -1141,231 +2038,605 @@
return (error);
}
-static int mlx4_en_set_ring_size(struct net_device *dev,
- int rx_size, int tx_size)
+
+int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
+ struct mlx4_en_port_profile *prof)
{
- struct mlx4_en_priv *priv = netdev_priv(dev);
- struct mlx4_en_dev *mdev = priv->mdev;
- int port_up = 0;
- int err = 0;
+ struct net_device *dev;
+ struct mlx4_en_priv *priv;
+ uint8_t dev_addr[ETHER_ADDR_LEN];
+ int err;
+ int i;
- rx_size = roundup_pow_of_two(rx_size);
- rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE);
- rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE);
- tx_size = roundup_pow_of_two(tx_size);
- tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE);
- tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE);
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ dev = priv->dev = if_alloc(IFT_ETHER);
+ if (dev == NULL) {
+ en_err(priv, "Net device allocation failed\n");
+ kfree(priv);
+ return -ENOMEM;
+ }
+ dev->if_softc = priv;
+ if_initname(dev, "mlxen", atomic_fetchadd_int(&mlx4_en_unit, 1));
+ dev->if_mtu = ETHERMTU;
+ dev->if_init = mlx4_en_open;
+ dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ dev->if_ioctl = mlx4_en_ioctl;
+ dev->if_transmit = mlx4_en_transmit;
+ dev->if_qflush = mlx4_en_qflush;
+ dev->if_snd.ifq_maxlen = prof->tx_ring_size;
- if (rx_size == (priv->port_up ?
- priv->rx_ring[0].actual_size : priv->rx_ring[0].size) &&
- tx_size == priv->tx_ring[0].size)
- return 0;
+ /*
+ * Initialize driver private data
+ */
+ priv->counter_index = 0xff;
+ spin_lock_init(&priv->stats_lock);
+ INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode);
+ INIT_WORK(&priv->watchdog_task, mlx4_en_restart);
+ INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
+ INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
+ INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task);
+ callout_init(&priv->watchdog_timer, 1);
+#ifdef CONFIG_RFS_ACCEL
+ INIT_LIST_HEAD(&priv->filters);
+ spin_lock_init(&priv->filters_lock);
+#endif
- mutex_lock(&mdev->state_lock);
- if (priv->port_up) {
- port_up = 1;
- mlx4_en_stop_port(dev);
+ priv->msg_enable = MLX4_EN_MSG_LEVEL;
+ priv->dev = dev;
+ priv->mdev = mdev;
+ priv->ddev = &mdev->pdev->dev;
+ priv->prof = prof;
+ priv->port = port;
+ priv->port_up = false;
+ priv->flags = prof->flags;
+
+ priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up;
+ priv->tx_ring_num = prof->tx_ring_num;
+ priv->tx_ring = kcalloc(MAX_TX_RINGS,
+ sizeof(struct mlx4_en_tx_ring *), GFP_KERNEL);
+ if (!priv->tx_ring) {
+ err = -ENOMEM;
+ goto out;
}
- mlx4_en_free_resources(priv);
- priv->prof->tx_ring_size = tx_size;
- priv->prof->rx_ring_size = rx_size;
+ priv->tx_cq = kcalloc(sizeof(struct mlx4_en_cq *), MAX_TX_RINGS,
+ GFP_KERNEL);
+ if (!priv->tx_cq) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ priv->rx_ring_num = prof->rx_ring_num;
+ priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
+ priv->mac_index = -1;
+ priv->last_ifq_jiffies = 0;
+ priv->if_counters_rx_errors = 0;
+ priv->if_counters_rx_no_buffer = 0;
+#ifdef CONFIG_MLX4_EN_DCB
+ if (!mlx4_is_slave(priv->mdev->dev)) {
+ priv->dcbx_cap = DCB_CAP_DCBX_HOST;
+ priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
+ dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
+ } else {
+ en_info(priv, "QoS disabled - no HW support\n");
+ dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops;
+ }
+ }
+#endif
+
+ for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i)
+ INIT_HLIST_HEAD(&priv->mac_hash[i]);
+
+ /* Query for default mac and max mtu */
+ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port];
+ priv->mac = mdev->dev->caps.def_mac[priv->port];
+ if (ILLEGAL_MAC(priv->mac)) {
+#if BITS_PER_LONG == 64
+ en_err(priv, "Port: %d, invalid mac burned: 0x%lx, quiting\n",
+ priv->port, priv->mac);
+#elif BITS_PER_LONG == 32
+ en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n",
+ priv->port, priv->mac);
+#endif
+ err = -EINVAL;
+ goto out;
+ }
+
+ priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+ DS_SIZE);
+
+ mlx4_en_sysctl_conf(priv);
+
err = mlx4_en_alloc_resources(priv);
+ if (err)
+ goto out;
+
+ /* Allocate page for receive rings */
+ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res,
+ MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE);
if (err) {
- en_err(priv, "Failed reallocating port resources\n");
+ en_err(priv, "Failed to allocate page for rx qps\n");
goto out;
}
- if (port_up) {
- err = mlx4_en_start_port(dev);
- if (err)
- en_err(priv, "Failed starting port\n");
+ priv->allocated = 1;
+
+ /*
+ * Set driver features
+ */
+ dev->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
+ dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
+ dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
+ dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
+ dev->if_capabilities |= IFCAP_LRO;
+
+ if (mdev->LSO_support)
+ dev->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTSO;
+
+ /* set TSO limits so that we don't have to drop TX packets */
+ dev->if_hw_tsomax = MLX4_EN_TX_MAX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* hdr */;
+ dev->if_hw_tsomaxsegcount = MLX4_EN_TX_MAX_MBUF_FRAGS - 1 /* hdr */;
+ dev->if_hw_tsomaxsegsize = MLX4_EN_TX_MAX_MBUF_SIZE;
+
+ dev->if_capenable = dev->if_capabilities;
+
+ dev->if_hwassist = 0;
+ if (dev->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6))
+ dev->if_hwassist |= CSUM_TSO;
+ if (dev->if_capenable & IFCAP_TXCSUM)
+ dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
+ if (dev->if_capenable & IFCAP_TXCSUM_IPV6)
+ dev->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
+
+
+ /* Register for VLAN events */
+ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
+ mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
+ priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
+ mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
+
+ mdev->pndev[priv->port] = dev;
+
+ priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN;
+ mlx4_en_set_default_moderation(priv);
+
+ /* Set default MAC */
+ for (i = 0; i < ETHER_ADDR_LEN; i++)
+ dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i));
+
+
+ ether_ifattach(dev, dev_addr);
+ if_link_state_change(dev, LINK_STATE_DOWN);
+ ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
+ mlx4_en_media_change, mlx4_en_media_status);
+ ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL);
+ ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL);
+ ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL);
+ ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_40G_CR4, 0, NULL);
+ ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
+
+ en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num);
+ en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
+
+ priv->registered = 1;
+
+ en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num);
+ en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
+
+
+ priv->rx_mb_size = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN;
+ err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+ priv->rx_mb_size,
+ prof->tx_pause, prof->tx_ppp,
+ prof->rx_pause, prof->rx_ppp);
+ if (err) {
+ en_err(priv, "Failed setting port general configurations "
+ "for port %d, with error %d\n", priv->port, err);
+ goto out;
}
+
+ /* Init port */
+ en_warn(priv, "Initializing port\n");
+ err = mlx4_INIT_PORT(mdev->dev, priv->port);
+ if (err) {
+ en_err(priv, "Failed Initializing port\n");
+ goto out;
+ }
+
+ queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
+
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
+ queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY);
+
+ return 0;
+
out:
- mutex_unlock(&mdev->state_lock);
+ mlx4_en_destroy_netdev(dev);
return err;
}
-static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS)
+static int mlx4_en_set_ring_size(struct net_device *dev,
+ int rx_size, int tx_size)
{
- struct mlx4_en_priv *priv;
- int size;
- int error;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int port_up = 0;
+ int err = 0;
- priv = arg1;
- size = priv->prof->rx_ring_size;
- error = sysctl_handle_int(oidp, &size, 0, req);
- if (error || !req->newptr)
- return (error);
- error = -mlx4_en_set_ring_size(priv->dev, size,
- priv->prof->tx_ring_size);
+ rx_size = roundup_pow_of_two(rx_size);
+ rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE);
+ rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE);
+ tx_size = roundup_pow_of_two(tx_size);
+ tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE);
+ tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE);
- return (error);
+ if (rx_size == (priv->port_up ?
+ priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) &&
+ tx_size == priv->tx_ring[0]->size)
+ return 0;
+ mutex_lock(&mdev->state_lock);
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev);
+ }
+ mlx4_en_free_resources(priv);
+ priv->prof->tx_ring_size = tx_size;
+ priv->prof->rx_ring_size = rx_size;
+ err = mlx4_en_alloc_resources(priv);
+ if (err) {
+ en_err(priv, "Failed reallocating port resources\n");
+ goto out;
+ }
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err)
+ en_err(priv, "Failed starting port\n");
+ }
+out:
+ mutex_unlock(&mdev->state_lock);
+ return err;
}
+static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx4_en_priv *priv;
+ int size;
+ int error;
+ priv = arg1;
+ size = priv->prof->rx_ring_size;
+ error = sysctl_handle_int(oidp, &size, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ error = -mlx4_en_set_ring_size(priv->dev, size,
+ priv->prof->tx_ring_size);
+ return (error);
+}
+
static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS)
{
- struct mlx4_en_priv *priv;
- int size;
- int error;
+ struct mlx4_en_priv *priv;
+ int size;
+ int error;
- priv = arg1;
- size = priv->prof->tx_ring_size;
- error = sysctl_handle_int(oidp, &size, 0, req);
- if (error || !req->newptr)
- return (error);
- error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size,
- size);
+ priv = arg1;
+ size = priv->prof->tx_ring_size;
+ error = sysctl_handle_int(oidp, &size, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size,
+ size);
- return (error);
+ return (error);
}
-static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS)
+static int mlx4_en_get_module_info(struct net_device *dev,
+ struct ethtool_modinfo *modinfo)
{
- struct mlx4_en_priv *priv;
- int ppp;
- int error;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int ret;
+ u8 data[4];
- priv = arg1;
- ppp = priv->prof->tx_ppp;
- error = sysctl_handle_int(oidp, &ppp, 0, req);
- if (error || !req->newptr)
- return (error);
- if (ppp > 0xff || ppp < 0)
- return (-EINVAL);
- priv->prof->tx_ppp = ppp;
- error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port,
- priv->rx_mb_size + ETHER_CRC_LEN,
- priv->prof->tx_pause,
- priv->prof->tx_ppp,
- priv->prof->rx_pause,
- priv->prof->rx_ppp);
+ /* Read first 2 bytes to get Module & REV ID */
+ ret = mlx4_get_module_info(mdev->dev, priv->port,
+ 0/*offset*/, 2/*size*/, data);
- return (error);
+ if (ret < 2) {
+ en_err(priv, "Failed to read eeprom module first two bytes, error: 0x%x\n", -ret);
+ return -EIO;
+ }
+
+ switch (data[0] /* identifier */) {
+ case MLX4_MODULE_ID_QSFP:
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ break;
+ case MLX4_MODULE_ID_QSFP_PLUS:
+ if (data[1] >= 0x3) { /* revision id */
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ }
+ break;
+ case MLX4_MODULE_ID_QSFP28:
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+ break;
+ case MLX4_MODULE_ID_SFP:
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ break;
+ default:
+ en_err(priv, "mlx4_en_get_module_info : Not recognized cable type\n");
+ return -EINVAL;
+ }
+
+ return 0;
}
-static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS)
+static int mlx4_en_get_module_eeprom(struct net_device *dev,
+ struct ethtool_eeprom *ee,
+ u8 *data)
{
- struct mlx4_en_priv *priv;
- struct mlx4_en_dev *mdev;
- int tx_ring_num;
- int ppp;
- int error;
- int port_up;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int offset = ee->offset;
+ int i = 0, ret;
- port_up = 0;
- priv = arg1;
- mdev = priv->mdev;
- ppp = priv->prof->rx_ppp;
- error = sysctl_handle_int(oidp, &ppp, 0, req);
+ if (ee->len == 0)
+ return -EINVAL;
+
+ memset(data, 0, ee->len);
+
+ while (i < ee->len) {
+ en_dbg(DRV, priv,
+ "mlx4_get_module_info i(%d) offset(%d) len(%d)\n",
+ i, offset, ee->len - i);
+
+ ret = mlx4_get_module_info(mdev->dev, priv->port,
+ offset, ee->len - i, data + i);
+
+ if (!ret) /* Done reading */
+ return 0;
+
+ if (ret < 0) {
+ en_err(priv,
+ "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n",
+ i, offset, ee->len - i, ret);
+ return -1;
+ }
+
+ i += ret;
+ offset += ret;
+ }
+ return 0;
+}
+
+static void mlx4_en_print_eeprom(u8 *data, __u32 len)
+{
+ int i;
+ int j = 0;
+ int row = 0;
+ const int NUM_OF_BYTES = 16;
+
+ printf("\nOffset\t\tValues\n");
+ printf("------\t\t------\n");
+ while(row < len){
+ printf("0x%04x\t\t",row);
+ for(i=0; i < NUM_OF_BYTES; i++){
+ printf("%02x ", data[j]);
+ row++;
+ j++;
+ }
+ printf("\n");
+ }
+}
+
+/* Read cable EEPROM module information by first inspecting the first
+ * two bytes to get the length and then read the rest of the information.
+ * The information is printed to dmesg. */
+static int mlx4_en_read_eeprom(SYSCTL_HANDLER_ARGS)
+{
+
+ u8* data;
+ int error;
+ int result = 0;
+ struct mlx4_en_priv *priv;
+ struct net_device *dev;
+ struct ethtool_modinfo modinfo;
+ struct ethtool_eeprom ee;
+
+ error = sysctl_handle_int(oidp, &result, 0, req);
if (error || !req->newptr)
return (error);
- if (ppp > 0xff || ppp < 0)
- return (-EINVAL);
- /* See if we have to change the number of tx queues. */
- if (!ppp != !priv->prof->rx_ppp) {
- tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 +
- (!!ppp) * MLX4_EN_NUM_PPP_RINGS;
- mutex_lock(&mdev->state_lock);
- if (priv->port_up) {
- port_up = 1;
- mlx4_en_stop_port(priv->dev);
+
+ if (result == 1) {
+ priv = arg1;
+ dev = priv->dev;
+ data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+ error = mlx4_en_get_module_info(dev, &modinfo);
+ if (error) {
+ en_err(priv,
+ "mlx4_en_get_module_info returned with error - FAILED (0x%x)\n",
+ -error);
+ goto out;
}
- mlx4_en_free_resources(priv);
- priv->tx_ring_num = tx_ring_num;
- priv->prof->rx_ppp = ppp;
- error = -mlx4_en_alloc_resources(priv);
- if (error)
- en_err(priv, "Failed reallocating port resources\n");
- if (error == 0 && port_up) {
- error = -mlx4_en_start_port(priv->dev);
- if (error)
- en_err(priv, "Failed starting port\n");
+
+ ee.len = modinfo.eeprom_len;
+ ee.offset = 0;
+
+ error = mlx4_en_get_module_eeprom(dev, &ee, data);
+ if (error) {
+ en_err(priv,
+ "mlx4_en_get_module_eeprom returned with error - FAILED (0x%x)\n",
+ -error);
+ /* Continue printing partial information in case of an error */
}
- mutex_unlock(&mdev->state_lock);
- return (error);
+ /* EEPROM information will be printed in dmesg */
+ mlx4_en_print_eeprom(data, ee.len);
+out:
+ kfree(data);
}
- priv->prof->rx_ppp = ppp;
- error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port,
- priv->rx_mb_size + ETHER_CRC_LEN,
- priv->prof->tx_pause,
- priv->prof->tx_ppp,
- priv->prof->rx_pause,
- priv->prof->rx_ppp);
+ /* Return zero to prevent sysctl failure. */
+ return (0);
+}
- return (error);
+static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx4_en_priv *priv;
+ int ppp;
+ int error;
+
+ priv = arg1;
+ ppp = priv->prof->tx_ppp;
+ error = sysctl_handle_int(oidp, &ppp, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (ppp > 0xff || ppp < 0)
+ return (-EINVAL);
+ priv->prof->tx_ppp = ppp;
+ error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port,
+ priv->rx_mb_size + ETHER_CRC_LEN,
+ priv->prof->tx_pause,
+ priv->prof->tx_ppp,
+ priv->prof->rx_pause,
+ priv->prof->rx_ppp);
+
+ return (error);
}
+static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx4_en_priv *priv;
+ struct mlx4_en_dev *mdev;
+ int ppp;
+ int error;
+ int port_up;
+
+ port_up = 0;
+ priv = arg1;
+ mdev = priv->mdev;
+ ppp = priv->prof->rx_ppp;
+ error = sysctl_handle_int(oidp, &ppp, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (ppp > 0xff || ppp < 0)
+ return (-EINVAL);
+ /* See if we have to change the number of tx queues. */
+ if (!ppp != !priv->prof->rx_ppp) {
+ mutex_lock(&mdev->state_lock);
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(priv->dev);
+ }
+ mlx4_en_free_resources(priv);
+ priv->prof->rx_ppp = ppp;
+ error = -mlx4_en_alloc_resources(priv);
+ if (error)
+ en_err(priv, "Failed reallocating port resources\n");
+ if (error == 0 && port_up) {
+ error = -mlx4_en_start_port(priv->dev);
+ if (error)
+ en_err(priv, "Failed starting port\n");
+ }
+ mutex_unlock(&mdev->state_lock);
+ return (error);
+
+ }
+ priv->prof->rx_ppp = ppp;
+ error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port,
+ priv->rx_mb_size + ETHER_CRC_LEN,
+ priv->prof->tx_pause,
+ priv->prof->tx_ppp,
+ priv->prof->rx_pause,
+ priv->prof->rx_ppp);
+
+ return (error);
+}
+
static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv)
{
- struct net_device *dev;
- struct sysctl_ctx_list *ctx;
- struct sysctl_oid *node;
- struct sysctl_oid_list *node_list;
- struct sysctl_oid *coal;
- struct sysctl_oid_list *coal_list;
+ struct net_device *dev;
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *node;
+ struct sysctl_oid_list *node_list;
+ struct sysctl_oid *coal;
+ struct sysctl_oid_list *coal_list;
+ const char *pnameunit;
- dev = priv->dev;
- ctx = &priv->conf_ctx;
+ dev = priv->dev;
+ ctx = &priv->conf_ctx;
+ pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev);
- sysctl_ctx_init(ctx);
- priv->sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
- OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet");
- node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO,
- "conf", CTLFLAG_RD, NULL, "Configuration");
- node_list = SYSCTL_CHILDREN(node);
+ sysctl_ctx_init(ctx);
+ priv->sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
+ OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet");
+ node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO,
+ "conf", CTLFLAG_RD, NULL, "Configuration");
+ node_list = SYSCTL_CHILDREN(node);
- SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable",
- CTLFLAG_RW, &priv->msg_enable, 0,
- "Driver message enable bitfield");
- SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings",
- CTLTYPE_INT | CTLFLAG_RD, &priv->rx_ring_num, 0,
- "Number of receive rings");
- SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings",
- CTLTYPE_INT | CTLFLAG_RD, &priv->tx_ring_num, 0,
- "Number of transmit rings");
- SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size",
+ SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable",
+ CTLFLAG_RW, &priv->msg_enable, 0,
+ "Driver message enable bitfield");
+ SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings",
+ CTLFLAG_RD, &priv->rx_ring_num, 0,
+ "Number of receive rings");
+ SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings",
+ CTLFLAG_RD, &priv->tx_ring_num, 0,
+ "Number of transmit rings");
+ SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
+ mlx4_en_set_rx_ring_size, "I", "Receive ring size");
+ SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
+ mlx4_en_set_tx_ring_size, "I", "Transmit ring size");
+ SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
+ mlx4_en_set_tx_ppp, "I", "TX Per-priority pause");
+ SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
+ mlx4_en_set_rx_ppp, "I", "RX Per-priority pause");
+ SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "port_num",
+ CTLFLAG_RD, &priv->port, 0,
+ "Port Number");
+ SYSCTL_ADD_STRING(ctx, node_list, OID_AUTO, "device_name",
+ CTLFLAG_RD, __DECONST(void *, pnameunit), 0,
+ "PCI device name");
+
+ /* Add coalescer configuration. */
+ coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO,
+ "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration");
+ coal_list = SYSCTL_CHILDREN(coal);
+ SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low",
+ CTLFLAG_RW, &priv->pkt_rate_low, 0,
+ "Packets per-second for minimum delay");
+ SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low",
+ CTLFLAG_RW, &priv->rx_usecs_low, 0,
+ "Minimum RX delay in micro-seconds");
+ SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high",
+ CTLFLAG_RW, &priv->pkt_rate_high, 0,
+ "Packets per-second for maximum delay");
+ SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high",
+ CTLFLAG_RW, &priv->rx_usecs_high, 0,
+ "Maximum RX delay in micro-seconds");
+ SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval",
+ CTLFLAG_RW, &priv->sample_interval, 0,
+ "adaptive frequency in units of HZ ticks");
+ SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal",
+ CTLFLAG_RW, &priv->adaptive_rx_coal, 0,
+ "Enable adaptive rx coalescing");
+ /* EEPROM support */
+ SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "eeprom_info",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
- mlx4_en_set_rx_ring_size, "I", "Receive ring size");
- SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size",
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
- mlx4_en_set_tx_ring_size, "I", "Transmit ring size");
- SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "ip_reasm",
- CTLFLAG_RW, &priv->ip_reasm, 0,
- "Allow reassembly of IP fragments.");
- SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp",
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
- mlx4_en_set_tx_ppp, "I", "TX Per-priority pause");
- SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp",
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
- mlx4_en_set_rx_ppp, "I", "RX Per-priority pause");
-
- /* Add coalescer configuration. */
- coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO,
- "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration");
- coal_list = SYSCTL_CHILDREN(node);
- SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low",
- CTLFLAG_RW, &priv->pkt_rate_low, 0,
- "Packets per-second for minimum delay");
- SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low",
- CTLFLAG_RW, &priv->rx_usecs_low, 0,
- "Minimum RX delay in micro-seconds");
- SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high",
- CTLFLAG_RW, &priv->pkt_rate_high, 0,
- "Packets per-second for maximum delay");
- SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high",
- CTLFLAG_RW, &priv->rx_usecs_high, 0,
- "Maximum RX delay in micro-seconds");
- SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval",
- CTLFLAG_RW, &priv->sample_interval, 0,
- "adaptive frequency in units of HZ ticks");
- SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal",
- CTLFLAG_RW, &priv->adaptive_rx_coal, 0,
- "Enable adaptive rx coalescing");
+ mlx4_en_read_eeprom, "I", "EEPROM information");
}
static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv)
{
- struct net_device *dev;
struct sysctl_ctx_list *ctx;
struct sysctl_oid *node;
struct sysctl_oid_list *node_list;
@@ -1376,8 +2647,6 @@
char namebuf[128];
int i;
- dev = priv->dev;
-
ctx = &priv->stat_ctx;
sysctl_ctx_init(ctx);
node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO,
@@ -1405,6 +2674,8 @@
&priv->port_stats.wake_queue, "Queue resumed after full");
SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_timeout", CTLFLAG_RD,
&priv->port_stats.tx_timeout, "Transmit timeouts");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_oversized_packets", CTLFLAG_RD,
+ &priv->port_stats.oversized_packets, "TX oversized packets, m_defrag failed");
SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_alloc_failed", CTLFLAG_RD,
&priv->port_stats.rx_alloc_failed, "RX failed to allocate mbuf");
SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_good", CTLFLAG_RD,
@@ -1416,43 +2687,111 @@
"TX checksum offloads");
/* Could strdup the names and add in a loop. This is simpler. */
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "broadcast", CTLFLAG_RD,
- &priv->pkstats.broadcast, "Broadcast packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio0", CTLFLAG_RD,
- &priv->pkstats.tx_prio[0], "TX Priority 0 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio1", CTLFLAG_RD,
- &priv->pkstats.tx_prio[1], "TX Priority 1 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio2", CTLFLAG_RD,
- &priv->pkstats.tx_prio[2], "TX Priority 2 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio3", CTLFLAG_RD,
- &priv->pkstats.tx_prio[3], "TX Priority 3 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio4", CTLFLAG_RD,
- &priv->pkstats.tx_prio[4], "TX Priority 4 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio5", CTLFLAG_RD,
- &priv->pkstats.tx_prio[5], "TX Priority 5 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio6", CTLFLAG_RD,
- &priv->pkstats.tx_prio[6], "TX Priority 6 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio7", CTLFLAG_RD,
- &priv->pkstats.tx_prio[7], "TX Priority 7 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio0", CTLFLAG_RD,
- &priv->pkstats.rx_prio[0], "RX Priority 0 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio1", CTLFLAG_RD,
- &priv->pkstats.rx_prio[1], "RX Priority 1 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio2", CTLFLAG_RD,
- &priv->pkstats.rx_prio[2], "RX Priority 2 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio3", CTLFLAG_RD,
- &priv->pkstats.rx_prio[3], "RX Priority 3 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio4", CTLFLAG_RD,
- &priv->pkstats.rx_prio[4], "RX Priority 4 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio5", CTLFLAG_RD,
- &priv->pkstats.rx_prio[5], "RX Priority 5 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio6", CTLFLAG_RD,
- &priv->pkstats.rx_prio[6], "RX Priority 6 packets");
- SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio7", CTLFLAG_RD,
- &priv->pkstats.rx_prio[7], "RX Priority 7 packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
+ &priv->pkstats.rx_bytes, "RX Bytes");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_packets, "RX packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_multicast_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_multicast_packets, "RX Multicast Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_broadcast_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_broadcast_packets, "RX Broadcast Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_errors", CTLFLAG_RD,
+ &priv->pkstats.rx_errors, "RX Errors");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_dropped", CTLFLAG_RD,
+ &priv->pkstats.rx_dropped, "RX Dropped");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_length_errors", CTLFLAG_RD,
+ &priv->pkstats.rx_length_errors, "RX Length Errors");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_over_errors", CTLFLAG_RD,
+ &priv->pkstats.rx_over_errors, "RX Over Errors");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_crc_errors", CTLFLAG_RD,
+ &priv->pkstats.rx_crc_errors, "RX CRC Errors");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_jabbers", CTLFLAG_RD,
+ &priv->pkstats.rx_jabbers, "RX Jabbers");
+
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_in_range_length_error", CTLFLAG_RD,
+ &priv->pkstats.rx_in_range_length_error, "RX IN_Range Length Error");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_out_range_length_error",
+ CTLFLAG_RD, &priv->pkstats.rx_out_range_length_error,
+ "RX Out Range Length Error");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_lt_64_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_lt_64_bytes_packets, "RX Lt 64 Bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_127_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_127_bytes_packets, "RX 127 bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_255_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_255_bytes_packets, "RX 255 bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_511_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_511_bytes_packets, "RX 511 bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1023_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_1023_bytes_packets, "RX 1023 bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1518_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_1518_bytes_packets, "RX 1518 bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1522_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_1522_bytes_packets, "RX 1522 bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1548_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_1548_bytes_packets, "RX 1548 bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_gt_1548_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.rx_gt_1548_bytes_packets,
+ "RX Greater Then 1548 bytes Packets");
+
+struct mlx4_en_pkt_stats {
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+ unsigned long tx_multicast_packets;
+ unsigned long tx_broadcast_packets;
+ unsigned long tx_errors;
+ unsigned long tx_dropped;
+ unsigned long tx_lt_64_bytes_packets;
+ unsigned long tx_127_bytes_packets;
+ unsigned long tx_255_bytes_packets;
+ unsigned long tx_511_bytes_packets;
+ unsigned long tx_1023_bytes_packets;
+ unsigned long tx_1518_bytes_packets;
+ unsigned long tx_1522_bytes_packets;
+ unsigned long tx_1548_bytes_packets;
+ unsigned long tx_gt_1548_bytes_packets;
+ unsigned long rx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS];
+ unsigned long tx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS];
+#define NUM_PKT_STATS 72
+};
+
+
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_packets, "TX packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
+ &priv->pkstats.tx_bytes, "TX Bytes");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_multicast_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_multicast_packets, "TX Multicast Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_broadcast_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_broadcast_packets, "TX Broadcast Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_errors", CTLFLAG_RD,
+ &priv->pkstats.tx_errors, "TX Errors");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_dropped", CTLFLAG_RD,
+ &priv->pkstats.tx_dropped, "TX Dropped");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_lt_64_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_lt_64_bytes_packets, "TX Less Then 64 Bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_127_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_127_bytes_packets, "TX 127 Bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_255_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_255_bytes_packets, "TX 255 Bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_511_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_511_bytes_packets, "TX 511 Bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1023_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_1023_bytes_packets, "TX 1023 Bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1518_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_1518_bytes_packets, "TX 1518 Bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1522_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_1522_bytes_packets, "TX 1522 Bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1548_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_1548_bytes_packets, "TX 1548 Bytes Packets");
+ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_gt_1548_bytes_packets", CTLFLAG_RD,
+ &priv->pkstats.tx_gt_1548_bytes_packets,
+ "TX Greater Then 1548 Bytes Packets");
+
+
+
for (i = 0; i < priv->tx_ring_num; i++) {
- tx_ring = &priv->tx_ring[i];
+ tx_ring = priv->tx_ring[i];
snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i);
ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf,
CTLFLAG_RD, NULL, "TX Ring");
@@ -1461,12 +2800,10 @@
CTLFLAG_RD, &tx_ring->packets, "TX packets");
SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes",
CTLFLAG_RD, &tx_ring->bytes, "TX bytes");
- SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error",
- CTLFLAG_RD, &tx_ring->errors, "TX soft errors");
+ }
- }
for (i = 0; i < priv->rx_ring_num; i++) {
- rx_ring = &priv->rx_ring[i];
+ rx_ring = priv->rx_ring[i];
snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i);
ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf,
CTLFLAG_RD, NULL, "RX Ring");
@@ -1477,149 +2814,5 @@
CTLFLAG_RD, &rx_ring->bytes, "RX bytes");
SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error",
CTLFLAG_RD, &rx_ring->errors, "RX soft errors");
- SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_queued",
- CTLFLAG_RD, &rx_ring->lro.lro_queued, 0, "LRO Queued");
- SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_flushed",
- CTLFLAG_RD, &rx_ring->lro.lro_flushed, 0, "LRO Flushed");
}
}
-
-int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
- struct mlx4_en_port_profile *prof)
-{
- static volatile int mlx4_en_unit;
- struct net_device *dev;
- struct mlx4_en_priv *priv;
- uint8_t dev_addr[ETHER_ADDR_LEN];
- int err;
- int i;
-
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- dev = priv->dev = if_alloc(IFT_ETHER);
- if (dev == NULL) {
- mlx4_err(mdev, "Net device allocation failed\n");
- kfree(priv);
- return -ENOMEM;
- }
- dev->if_softc = priv;
- if_initname(dev, "mlxen", atomic_fetchadd_int(&mlx4_en_unit, 1));
- dev->if_mtu = ETHERMTU;
- dev->if_baudrate = 1000000000;
- dev->if_init = mlx4_en_init;
- dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- dev->if_ioctl = mlx4_en_ioctl;
- dev->if_transmit = mlx4_en_transmit;
- dev->if_qflush = mlx4_en_qflush;
- dev->if_snd.ifq_maxlen = prof->tx_ring_size;
-
- /*
- * Initialize driver private data
- */
- priv->dev = dev;
- priv->mdev = mdev;
- priv->prof = prof;
- priv->port = port;
- priv->port_up = false;
- priv->rx_csum = 1;
- priv->flags = prof->flags;
- priv->tx_ring_num = prof->tx_ring_num;
- priv->rx_ring_num = prof->rx_ring_num;
- priv->mac_index = -1;
- priv->msg_enable = MLX4_EN_MSG_LEVEL;
- priv->ip_reasm = priv->mdev->profile.ip_reasm;
- mtx_init(&priv->stats_lock.m, "mlx4 stats", NULL, MTX_DEF);
- mtx_init(&priv->vlan_lock.m, "mlx4 vlan", NULL, MTX_DEF);
- INIT_WORK(&priv->mcast_task, mlx4_en_do_set_multicast);
- INIT_WORK(&priv->watchdog_task, mlx4_en_restart);
- INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
- INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
- callout_init(&priv->watchdog_timer, 1);
-
- /* Query for default mac and max mtu */
- priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port];
- priv->mac = mdev->dev->caps.def_mac[priv->port];
-
- if (ILLEGAL_MAC(priv->mac)) {
- en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n",
- priv->port, priv->mac);
- err = -EINVAL;
- goto out;
- }
-
- mlx4_en_sysctl_conf(priv);
-
- err = mlx4_en_alloc_resources(priv);
- if (err)
- goto out;
-
- /* Allocate page for receive rings */
- err = mlx4_alloc_hwq_res(mdev->dev, &priv->res,
- MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE);
- if (err) {
- en_err(priv, "Failed to allocate page for rx qps\n");
- goto out;
- }
- priv->allocated = 1;
-
- /*
- * Set driver features
- */
- dev->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
- dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
- dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
- dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
- if (mdev->LSO_support)
- dev->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
- if (mdev->profile.num_lro)
- dev->if_capabilities |= IFCAP_LRO;
- dev->if_capenable = dev->if_capabilities;
- /*
- * Setup wake-on-lan.
- */
- if (priv->mdev->dev->caps.wol) {
- u64 config;
- if (mlx4_wol_read(priv->mdev->dev, &config, priv->port) == 0) {
- if (config & MLX4_EN_WOL_MAGIC)
- dev->if_capabilities |= IFCAP_WOL_MAGIC;
- if (config & MLX4_EN_WOL_ENABLED)
- dev->if_capenable |= IFCAP_WOL_MAGIC;
- }
- }
-
- /* Register for VLAN events */
- priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
- mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
- priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
- mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
-
- mdev->pndev[priv->port] = dev;
-
- priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN;
- if_link_state_change(dev, LINK_STATE_DOWN);
-
- /* Set default MAC */
- for (i = 0; i < ETHER_ADDR_LEN; i++)
- dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i));
-
- ether_ifattach(dev, dev_addr);
- ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
- mlx4_en_media_change, mlx4_en_media_status);
- ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL);
- ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL);
- ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL);
- ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
- ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
-
- en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num);
- en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
-
- priv->registered = 1;
- queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
-
- return 0;
-
-out:
- mlx4_en_destroy_netdev(dev);
- return err;
-}
-
Modified: trunk/sys/ofed/drivers/net/mlx4/en_port.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/en_port.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/en_port.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,27 +31,25 @@
*
*/
-
-#include "mlx4_en.h"
-
+#include <sys/types.h>
#include <linux/if_vlan.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/cmd.h>
+#include "en_port.h"
+#include "mlx4_en.h"
+#define EN_IFQ_MIN_INTERVAL 3000
-int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
- u64 mac, u64 clear, u8 mode)
-{
- return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
- MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B);
-}
-int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans)
+int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
{
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_set_vlan_fltr_mbox *filter;
- int i, j;
+ int i;
+ int j;
+ int index = 0;
+ u32 entry;
int err = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -59,78 +57,22 @@
return PTR_ERR(mailbox);
filter = mailbox->buf;
- memset(filter, 0, sizeof *filter);
- if (vlans)
- for (i = 0, j = VLAN_FLTR_SIZE - 1; i < VLAN_FLTR_SIZE;
- i++, j--)
- filter->entry[j] = cpu_to_be32(vlans[i]);
- err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_VLAN_FLTR,
- MLX4_CMD_TIME_CLASS_B);
+ memset(filter, 0, sizeof(*filter));
+ for (i = VLAN_FLTR_SIZE - 1; i >= 0; i--) {
+ entry = 0;
+ for (j = 0; j < 32; j++) {
+ if (test_bit(index, priv->active_vlans))
+ entry |= 1 << j;
+ index++;
+ }
+ filter->entry[i] = cpu_to_be32(entry);
+ }
+ err = mlx4_cmd(dev, mailbox->dma, priv->port, 0, MLX4_CMD_SET_VLAN_FLTR,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
-
-int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
- u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
-{
- struct mlx4_cmd_mailbox *mailbox;
- struct mlx4_set_port_general_context *context;
- int err;
- u32 in_mod;
-
- mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
- context = mailbox->buf;
- memset(context, 0, sizeof *context);
-
- context->flags = SET_PORT_GEN_ALL_VALID;
- context->mtu = cpu_to_be16(mtu);
- context->pptx = (pptx * (!pfctx)) << 7;
- context->pfctx = pfctx;
- context->pprx = (pprx * (!pfcrx)) << 7;
- context->pfcrx = pfcrx;
-
- in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
- err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
-
- mlx4_free_cmd_mailbox(dev, mailbox);
- return err;
-}
-
-int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
- u8 promisc)
-{
- struct mlx4_cmd_mailbox *mailbox;
- struct mlx4_set_port_rqp_calc_context *context;
- int err;
- u32 in_mod;
-
- mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
- context = mailbox->buf;
- memset(context, 0, sizeof *context);
-
- context->base_qpn = cpu_to_be32(base_qpn);
- context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_EN_SHIFT | base_qpn);
- context->mcast = cpu_to_be32((dev->caps.mc_promisc_mode <<
- SET_PORT_PROMISC_MODE_SHIFT) | base_qpn);
- context->intra_no_vlan = 0;
- context->no_vlan = MLX4_NO_VLAN_IDX;
- context->intra_vlan_miss = 0;
- context->vlan_miss = MLX4_VLAN_MISS_IDX;
-
- in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
- err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
-
- mlx4_free_cmd_mailbox(dev, mailbox);
- return err;
-}
-
int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
{
struct mlx4_en_query_port_context *qport_context;
@@ -144,7 +86,8 @@
return PTR_ERR(mailbox);
memset(mailbox->buf, 0, sizeof(*qport_context));
err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
- MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
if (err)
goto out;
qport_context = mailbox->buf;
@@ -160,16 +103,21 @@
case MLX4_EN_10G_SPEED_XFI:
state->link_speed = 10000;
break;
+ case MLX4_EN_20G_SPEED:
+ state->link_speed = 20000;
+ break;
case MLX4_EN_40G_SPEED:
state->link_speed = 40000;
break;
+ case MLX4_EN_56G_SPEED:
+ state->link_speed = 56000;
+ break;
default:
state->link_speed = -1;
break;
}
state->transciver = qport_context->transceiver;
- if (be32_to_cpu(qport_context->transceiver_code_hi) & 0x400)
- state->transciver = 0x80;
+ state->autoneg = !!(qport_context->autoneg & MLX4_EN_AUTONEG_MASK);
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
@@ -176,75 +124,56 @@
return err;
}
-static int read_iboe_counters(struct mlx4_dev *dev, int index, u64 counters[])
+int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
{
- struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_en_stat_out_mbox *mlx4_en_stats;
+ struct mlx4_en_stat_out_flow_control_mbox *flowstats;
+ struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]);
+ struct mlx4_en_vport_stats *vport_stats = &priv->vport_stats;
+ struct mlx4_cmd_mailbox *mailbox = NULL;
+ struct mlx4_cmd_mailbox *mailbox_flow = NULL;
+ u64 in_mod = reset << 8 | port;
int err;
- int mode;
- struct mlx4_counters_ext *ext;
- struct mlx4_counters *reg;
+ int i;
+ int do_if_stat = 1;
+ unsigned long period = (unsigned long) (jiffies - priv->last_ifq_jiffies);
+ struct mlx4_en_vport_stats tmp_vport_stats;
+ struct net_device *dev;
- mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox))
- return -ENOMEM;
+ if (jiffies_to_msecs(period) < EN_IFQ_MIN_INTERVAL ||
+ priv->counter_index == 0xff)
+ do_if_stat = 0;
- err = mlx4_cmd_box(dev, 0, mailbox->dma, index, 0,
- MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
- if (err)
- goto out;
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto mailbox_out;
+ }
- mode = be32_to_cpu(((struct mlx4_counters *)mailbox->buf)->counter_mode) & 0xf;
- switch (mode) {
- case 0:
- reg = mailbox->buf;
- counters[0] = be64_to_cpu(reg->rx_frames);
- counters[1] = be64_to_cpu(reg->tx_frames);
- counters[2] = be64_to_cpu(reg->rx_bytes);
- counters[3] = be64_to_cpu(reg->tx_bytes);
- break;
- case 1:
- ext = mailbox->buf;
- counters[0] = be64_to_cpu(ext->rx_uni_frames);
- counters[1] = be64_to_cpu(ext->tx_uni_frames);
- counters[2] = be64_to_cpu(ext->rx_uni_bytes);
- counters[3] = be64_to_cpu(ext->tx_uni_bytes);
- break;
- default:
- err = -EINVAL;
+ mailbox_flow = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox_flow)) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ err = PTR_ERR(mailbox_flow);
+ goto mailbox_out;
}
-out:
- mlx4_free_cmd_mailbox(dev, mailbox);
- return err;
-}
+ /* 0xffs indicates invalid value */
+ memset(mailbox_flow->buf, 0xff, sizeof(*flowstats) *
+ MLX4_NUM_PRIORITIES);
-int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
-{
- struct mlx4_en_stat_out_mbox *mlx4_en_stats;
- struct net_device *dev;
- struct mlx4_en_priv *priv;
- struct mlx4_cmd_mailbox *mailbox;
- u64 in_mod = reset << 8 | port;
- unsigned long oerror;
- unsigned long ierror;
- int err;
- int i;
- int counter;
- u64 counters[4];
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
+ memset(mailbox_flow->buf, 0, sizeof(*flowstats));
+ err = mlx4_cmd_box(mdev->dev, 0, mailbox_flow->dma,
+ in_mod | 1<<12, 0, MLX4_CMD_DUMP_ETH_STATS,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
- dev = mdev->pndev[port];
- priv = netdev_priv(dev);
- memset(counters, 0, sizeof counters);
- counter = mlx4_get_iboe_counter(priv->mdev->dev, port);
- if (counter >= 0)
- err = read_iboe_counters(priv->mdev->dev, counter, counters);
+ if (err)
+ goto out;
+ }
- mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
- memset(mailbox->buf, 0, sizeof(*mlx4_en_stats));
err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
- MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
if (err)
goto out;
@@ -252,74 +181,395 @@
spin_lock(&priv->stats_lock);
- oerror = ierror = 0;
- dev->if_ipackets = counters[0];
- dev->if_ibytes = counters[2];
+ priv->port_stats.rx_chksum_good = 0;
+ priv->port_stats.rx_chksum_none = 0;
for (i = 0; i < priv->rx_ring_num; i++) {
- dev->if_ipackets += priv->rx_ring[i].packets;
- dev->if_ibytes += priv->rx_ring[i].bytes;
- ierror += priv->rx_ring[i].errors;
+ priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok;
+ priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none;
}
- dev->if_opackets = counters[1];
- dev->if_obytes = counters[3];
- for (i = 0; i <= priv->tx_ring_num; i++) {
- dev->if_opackets += priv->tx_ring[i].packets;
- dev->if_obytes += priv->tx_ring[i].bytes;
- oerror += priv->tx_ring[i].errors;
+
+ priv->port_stats.tx_chksum_offload = 0;
+ priv->port_stats.queue_stopped = 0;
+ priv->port_stats.wake_queue = 0;
+ for (i = 0; i < priv->tx_ring_num; i++) {
+ priv->port_stats.tx_chksum_offload += priv->tx_ring[i]->tx_csum;
+ priv->port_stats.queue_stopped += priv->tx_ring[i]->queue_stopped;
+ priv->port_stats.wake_queue += priv->tx_ring[i]->wake_queue;
+ priv->port_stats.oversized_packets += priv->tx_ring[i]->oversized_packets;
}
+ /* RX Statistics */
+ priv->pkstats.rx_packets = be64_to_cpu(mlx4_en_stats->RTOT_prio_0) +
+ be64_to_cpu(mlx4_en_stats->RTOT_prio_1) +
+ be64_to_cpu(mlx4_en_stats->RTOT_prio_2) +
+ be64_to_cpu(mlx4_en_stats->RTOT_prio_3) +
+ be64_to_cpu(mlx4_en_stats->RTOT_prio_4) +
+ be64_to_cpu(mlx4_en_stats->RTOT_prio_5) +
+ be64_to_cpu(mlx4_en_stats->RTOT_prio_6) +
+ be64_to_cpu(mlx4_en_stats->RTOT_prio_7) +
+ be64_to_cpu(mlx4_en_stats->RTOT_novlan);
+ priv->pkstats.rx_bytes = be64_to_cpu(mlx4_en_stats->ROCT_prio_0) +
+ be64_to_cpu(mlx4_en_stats->ROCT_prio_1) +
+ be64_to_cpu(mlx4_en_stats->ROCT_prio_2) +
+ be64_to_cpu(mlx4_en_stats->ROCT_prio_3) +
+ be64_to_cpu(mlx4_en_stats->ROCT_prio_4) +
+ be64_to_cpu(mlx4_en_stats->ROCT_prio_5) +
+ be64_to_cpu(mlx4_en_stats->ROCT_prio_6) +
+ be64_to_cpu(mlx4_en_stats->ROCT_prio_7) +
+ be64_to_cpu(mlx4_en_stats->ROCT_novlan);
+ priv->pkstats.rx_multicast_packets = be64_to_cpu(mlx4_en_stats->MCAST_prio_0) +
+ be64_to_cpu(mlx4_en_stats->MCAST_prio_1) +
+ be64_to_cpu(mlx4_en_stats->MCAST_prio_2) +
+ be64_to_cpu(mlx4_en_stats->MCAST_prio_3) +
+ be64_to_cpu(mlx4_en_stats->MCAST_prio_4) +
+ be64_to_cpu(mlx4_en_stats->MCAST_prio_5) +
+ be64_to_cpu(mlx4_en_stats->MCAST_prio_6) +
+ be64_to_cpu(mlx4_en_stats->MCAST_prio_7) +
+ be64_to_cpu(mlx4_en_stats->MCAST_novlan);
+ priv->pkstats.rx_broadcast_packets = be64_to_cpu(mlx4_en_stats->RBCAST_prio_0) +
+ be64_to_cpu(mlx4_en_stats->RBCAST_prio_1) +
+ be64_to_cpu(mlx4_en_stats->RBCAST_prio_2) +
+ be64_to_cpu(mlx4_en_stats->RBCAST_prio_3) +
+ be64_to_cpu(mlx4_en_stats->RBCAST_prio_4) +
+ be64_to_cpu(mlx4_en_stats->RBCAST_prio_5) +
+ be64_to_cpu(mlx4_en_stats->RBCAST_prio_6) +
+ be64_to_cpu(mlx4_en_stats->RBCAST_prio_7) +
+ be64_to_cpu(mlx4_en_stats->RBCAST_novlan);
+ priv->pkstats.rx_errors = be64_to_cpu(mlx4_en_stats->PCS) +
+ be32_to_cpu(mlx4_en_stats->RJBBR) +
+ be32_to_cpu(mlx4_en_stats->RCRC) +
+ be32_to_cpu(mlx4_en_stats->RRUNT) +
+ be64_to_cpu(mlx4_en_stats->RInRangeLengthErr) +
+ be64_to_cpu(mlx4_en_stats->ROutRangeLengthErr) +
+ be32_to_cpu(mlx4_en_stats->RSHORT) +
+ be64_to_cpu(mlx4_en_stats->RGIANT_prio_0) +
+ be64_to_cpu(mlx4_en_stats->RGIANT_prio_1) +
+ be64_to_cpu(mlx4_en_stats->RGIANT_prio_2) +
+ be64_to_cpu(mlx4_en_stats->RGIANT_prio_3) +
+ be64_to_cpu(mlx4_en_stats->RGIANT_prio_4) +
+ be64_to_cpu(mlx4_en_stats->RGIANT_prio_5) +
+ be64_to_cpu(mlx4_en_stats->RGIANT_prio_6) +
+ be64_to_cpu(mlx4_en_stats->RGIANT_prio_7) +
+ be64_to_cpu(mlx4_en_stats->RGIANT_novlan);
+ priv->pkstats.rx_dropped = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+ priv->pkstats.rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
+ priv->pkstats.rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+ priv->pkstats.rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
+ priv->pkstats.rx_jabbers = be32_to_cpu(mlx4_en_stats->RJBBR);
+ priv->pkstats.rx_in_range_length_error = be64_to_cpu(mlx4_en_stats->RInRangeLengthErr);
+ priv->pkstats.rx_out_range_length_error = be64_to_cpu(mlx4_en_stats->ROutRangeLengthErr);
+ priv->pkstats.rx_lt_64_bytes_packets = be64_to_cpu(mlx4_en_stats->R64_prio_0) +
+ be64_to_cpu(mlx4_en_stats->R64_prio_1) +
+ be64_to_cpu(mlx4_en_stats->R64_prio_2) +
+ be64_to_cpu(mlx4_en_stats->R64_prio_3) +
+ be64_to_cpu(mlx4_en_stats->R64_prio_4) +
+ be64_to_cpu(mlx4_en_stats->R64_prio_5) +
+ be64_to_cpu(mlx4_en_stats->R64_prio_6) +
+ be64_to_cpu(mlx4_en_stats->R64_prio_7) +
+ be64_to_cpu(mlx4_en_stats->R64_novlan);
+ priv->pkstats.rx_127_bytes_packets = be64_to_cpu(mlx4_en_stats->R127_prio_0) +
+ be64_to_cpu(mlx4_en_stats->R127_prio_1) +
+ be64_to_cpu(mlx4_en_stats->R127_prio_2) +
+ be64_to_cpu(mlx4_en_stats->R127_prio_3) +
+ be64_to_cpu(mlx4_en_stats->R127_prio_4) +
+ be64_to_cpu(mlx4_en_stats->R127_prio_5) +
+ be64_to_cpu(mlx4_en_stats->R127_prio_6) +
+ be64_to_cpu(mlx4_en_stats->R127_prio_7) +
+ be64_to_cpu(mlx4_en_stats->R127_novlan);
+ priv->pkstats.rx_255_bytes_packets = be64_to_cpu(mlx4_en_stats->R255_prio_0) +
+ be64_to_cpu(mlx4_en_stats->R255_prio_1) +
+ be64_to_cpu(mlx4_en_stats->R255_prio_2) +
+ be64_to_cpu(mlx4_en_stats->R255_prio_3) +
+ be64_to_cpu(mlx4_en_stats->R255_prio_4) +
+ be64_to_cpu(mlx4_en_stats->R255_prio_5) +
+ be64_to_cpu(mlx4_en_stats->R255_prio_6) +
+ be64_to_cpu(mlx4_en_stats->R255_prio_7) +
+ be64_to_cpu(mlx4_en_stats->R255_novlan);
+ priv->pkstats.rx_511_bytes_packets = be64_to_cpu(mlx4_en_stats->R511_prio_0) +
+ be64_to_cpu(mlx4_en_stats->R511_prio_1) +
+ be64_to_cpu(mlx4_en_stats->R511_prio_2) +
+ be64_to_cpu(mlx4_en_stats->R511_prio_3) +
+ be64_to_cpu(mlx4_en_stats->R511_prio_4) +
+ be64_to_cpu(mlx4_en_stats->R511_prio_5) +
+ be64_to_cpu(mlx4_en_stats->R511_prio_6) +
+ be64_to_cpu(mlx4_en_stats->R511_prio_7) +
+ be64_to_cpu(mlx4_en_stats->R511_novlan);
+ priv->pkstats.rx_1023_bytes_packets = be64_to_cpu(mlx4_en_stats->R1023_prio_0) +
+ be64_to_cpu(mlx4_en_stats->R1023_prio_1) +
+ be64_to_cpu(mlx4_en_stats->R1023_prio_2) +
+ be64_to_cpu(mlx4_en_stats->R1023_prio_3) +
+ be64_to_cpu(mlx4_en_stats->R1023_prio_4) +
+ be64_to_cpu(mlx4_en_stats->R1023_prio_5) +
+ be64_to_cpu(mlx4_en_stats->R1023_prio_6) +
+ be64_to_cpu(mlx4_en_stats->R1023_prio_7) +
+ be64_to_cpu(mlx4_en_stats->R1023_novlan);
+ priv->pkstats.rx_1518_bytes_packets = be64_to_cpu(mlx4_en_stats->R1518_prio_0) +
+ be64_to_cpu(mlx4_en_stats->R1518_prio_1) +
+ be64_to_cpu(mlx4_en_stats->R1518_prio_2) +
+ be64_to_cpu(mlx4_en_stats->R1518_prio_3) +
+ be64_to_cpu(mlx4_en_stats->R1518_prio_4) +
+ be64_to_cpu(mlx4_en_stats->R1518_prio_5) +
+ be64_to_cpu(mlx4_en_stats->R1518_prio_6) +
+ be64_to_cpu(mlx4_en_stats->R1518_prio_7) +
+ be64_to_cpu(mlx4_en_stats->R1518_novlan);
+ priv->pkstats.rx_1522_bytes_packets = be64_to_cpu(mlx4_en_stats->R1522_prio_0) +
+ be64_to_cpu(mlx4_en_stats->R1522_prio_1) +
+ be64_to_cpu(mlx4_en_stats->R1522_prio_2) +
+ be64_to_cpu(mlx4_en_stats->R1522_prio_3) +
+ be64_to_cpu(mlx4_en_stats->R1522_prio_4) +
+ be64_to_cpu(mlx4_en_stats->R1522_prio_5) +
+ be64_to_cpu(mlx4_en_stats->R1522_prio_6) +
+ be64_to_cpu(mlx4_en_stats->R1522_prio_7) +
+ be64_to_cpu(mlx4_en_stats->R1522_novlan);
+ priv->pkstats.rx_1548_bytes_packets = be64_to_cpu(mlx4_en_stats->R1548_prio_0) +
+ be64_to_cpu(mlx4_en_stats->R1548_prio_1) +
+ be64_to_cpu(mlx4_en_stats->R1548_prio_2) +
+ be64_to_cpu(mlx4_en_stats->R1548_prio_3) +
+ be64_to_cpu(mlx4_en_stats->R1548_prio_4) +
+ be64_to_cpu(mlx4_en_stats->R1548_prio_5) +
+ be64_to_cpu(mlx4_en_stats->R1548_prio_6) +
+ be64_to_cpu(mlx4_en_stats->R1548_prio_7) +
+ be64_to_cpu(mlx4_en_stats->R1548_novlan);
+ priv->pkstats.rx_gt_1548_bytes_packets = be64_to_cpu(mlx4_en_stats->R2MTU_prio_0) +
+ be64_to_cpu(mlx4_en_stats->R2MTU_prio_1) +
+ be64_to_cpu(mlx4_en_stats->R2MTU_prio_2) +
+ be64_to_cpu(mlx4_en_stats->R2MTU_prio_3) +
+ be64_to_cpu(mlx4_en_stats->R2MTU_prio_4) +
+ be64_to_cpu(mlx4_en_stats->R2MTU_prio_5) +
+ be64_to_cpu(mlx4_en_stats->R2MTU_prio_6) +
+ be64_to_cpu(mlx4_en_stats->R2MTU_prio_7) +
+ be64_to_cpu(mlx4_en_stats->R2MTU_novlan);
- dev->if_ierrors = be32_to_cpu(mlx4_en_stats->RDROP) + ierror;
- dev->if_oerrors = be32_to_cpu(mlx4_en_stats->TDROP) + oerror;
- dev->if_imcasts = be64_to_cpu(mlx4_en_stats->MCAST_prio_0) +
- be64_to_cpu(mlx4_en_stats->MCAST_prio_1) +
- be64_to_cpu(mlx4_en_stats->MCAST_prio_2) +
- be64_to_cpu(mlx4_en_stats->MCAST_prio_3) +
- be64_to_cpu(mlx4_en_stats->MCAST_prio_4) +
- be64_to_cpu(mlx4_en_stats->MCAST_prio_5) +
- be64_to_cpu(mlx4_en_stats->MCAST_prio_6) +
- be64_to_cpu(mlx4_en_stats->MCAST_prio_7) +
- be64_to_cpu(mlx4_en_stats->MCAST_novlan);
- dev->if_omcasts = be64_to_cpu(mlx4_en_stats->TMCAST_prio_0) +
- be64_to_cpu(mlx4_en_stats->TMCAST_prio_1) +
- be64_to_cpu(mlx4_en_stats->TMCAST_prio_2) +
- be64_to_cpu(mlx4_en_stats->TMCAST_prio_3) +
- be64_to_cpu(mlx4_en_stats->TMCAST_prio_4) +
- be64_to_cpu(mlx4_en_stats->TMCAST_prio_5) +
- be64_to_cpu(mlx4_en_stats->TMCAST_prio_6) +
- be64_to_cpu(mlx4_en_stats->TMCAST_prio_7) +
- be64_to_cpu(mlx4_en_stats->TMCAST_novlan);
- dev->if_collisions = 0;
+ /* Tx Stats */
+ priv->pkstats.tx_packets = be64_to_cpu(mlx4_en_stats->TTOT_prio_0) +
+ be64_to_cpu(mlx4_en_stats->TTOT_prio_1) +
+ be64_to_cpu(mlx4_en_stats->TTOT_prio_2) +
+ be64_to_cpu(mlx4_en_stats->TTOT_prio_3) +
+ be64_to_cpu(mlx4_en_stats->TTOT_prio_4) +
+ be64_to_cpu(mlx4_en_stats->TTOT_prio_5) +
+ be64_to_cpu(mlx4_en_stats->TTOT_prio_6) +
+ be64_to_cpu(mlx4_en_stats->TTOT_prio_7) +
+ be64_to_cpu(mlx4_en_stats->TTOT_novlan);
+ priv->pkstats.tx_bytes = be64_to_cpu(mlx4_en_stats->TOCT_prio_0) +
+ be64_to_cpu(mlx4_en_stats->TOCT_prio_1) +
+ be64_to_cpu(mlx4_en_stats->TOCT_prio_2) +
+ be64_to_cpu(mlx4_en_stats->TOCT_prio_3) +
+ be64_to_cpu(mlx4_en_stats->TOCT_prio_4) +
+ be64_to_cpu(mlx4_en_stats->TOCT_prio_5) +
+ be64_to_cpu(mlx4_en_stats->TOCT_prio_6) +
+ be64_to_cpu(mlx4_en_stats->TOCT_prio_7) +
+ be64_to_cpu(mlx4_en_stats->TOCT_novlan);
+ priv->pkstats.tx_multicast_packets = be64_to_cpu(mlx4_en_stats->TMCAST_prio_0) +
+ be64_to_cpu(mlx4_en_stats->TMCAST_prio_1) +
+ be64_to_cpu(mlx4_en_stats->TMCAST_prio_2) +
+ be64_to_cpu(mlx4_en_stats->TMCAST_prio_3) +
+ be64_to_cpu(mlx4_en_stats->TMCAST_prio_4) +
+ be64_to_cpu(mlx4_en_stats->TMCAST_prio_5) +
+ be64_to_cpu(mlx4_en_stats->TMCAST_prio_6) +
+ be64_to_cpu(mlx4_en_stats->TMCAST_prio_7) +
+ be64_to_cpu(mlx4_en_stats->TMCAST_novlan);
+ priv->pkstats.tx_broadcast_packets = be64_to_cpu(mlx4_en_stats->TBCAST_prio_0) +
+ be64_to_cpu(mlx4_en_stats->TBCAST_prio_1) +
+ be64_to_cpu(mlx4_en_stats->TBCAST_prio_2) +
+ be64_to_cpu(mlx4_en_stats->TBCAST_prio_3) +
+ be64_to_cpu(mlx4_en_stats->TBCAST_prio_4) +
+ be64_to_cpu(mlx4_en_stats->TBCAST_prio_5) +
+ be64_to_cpu(mlx4_en_stats->TBCAST_prio_6) +
+ be64_to_cpu(mlx4_en_stats->TBCAST_prio_7) +
+ be64_to_cpu(mlx4_en_stats->TBCAST_novlan);
+ priv->pkstats.tx_errors = be64_to_cpu(mlx4_en_stats->TGIANT_prio_0) +
+ be64_to_cpu(mlx4_en_stats->TGIANT_prio_1) +
+ be64_to_cpu(mlx4_en_stats->TGIANT_prio_2) +
+ be64_to_cpu(mlx4_en_stats->TGIANT_prio_3) +
+ be64_to_cpu(mlx4_en_stats->TGIANT_prio_4) +
+ be64_to_cpu(mlx4_en_stats->TGIANT_prio_5) +
+ be64_to_cpu(mlx4_en_stats->TGIANT_prio_6) +
+ be64_to_cpu(mlx4_en_stats->TGIANT_prio_7) +
+ be64_to_cpu(mlx4_en_stats->TGIANT_novlan);
+ priv->pkstats.tx_dropped = be32_to_cpu(mlx4_en_stats->TDROP) -
+ priv->pkstats.tx_errors;
+ priv->pkstats.tx_lt_64_bytes_packets = be64_to_cpu(mlx4_en_stats->T64_prio_0) +
+ be64_to_cpu(mlx4_en_stats->T64_prio_1) +
+ be64_to_cpu(mlx4_en_stats->T64_prio_2) +
+ be64_to_cpu(mlx4_en_stats->T64_prio_3) +
+ be64_to_cpu(mlx4_en_stats->T64_prio_4) +
+ be64_to_cpu(mlx4_en_stats->T64_prio_5) +
+ be64_to_cpu(mlx4_en_stats->T64_prio_6) +
+ be64_to_cpu(mlx4_en_stats->T64_prio_7) +
+ be64_to_cpu(mlx4_en_stats->T64_novlan);
+ priv->pkstats.tx_127_bytes_packets = be64_to_cpu(mlx4_en_stats->T127_prio_0) +
+ be64_to_cpu(mlx4_en_stats->T127_prio_1) +
+ be64_to_cpu(mlx4_en_stats->T127_prio_2) +
+ be64_to_cpu(mlx4_en_stats->T127_prio_3) +
+ be64_to_cpu(mlx4_en_stats->T127_prio_4) +
+ be64_to_cpu(mlx4_en_stats->T127_prio_5) +
+ be64_to_cpu(mlx4_en_stats->T127_prio_6) +
+ be64_to_cpu(mlx4_en_stats->T127_prio_7) +
+ be64_to_cpu(mlx4_en_stats->T127_novlan);
+ priv->pkstats.tx_255_bytes_packets = be64_to_cpu(mlx4_en_stats->T255_prio_0) +
+ be64_to_cpu(mlx4_en_stats->T255_prio_1) +
+ be64_to_cpu(mlx4_en_stats->T255_prio_2) +
+ be64_to_cpu(mlx4_en_stats->T255_prio_3) +
+ be64_to_cpu(mlx4_en_stats->T255_prio_4) +
+ be64_to_cpu(mlx4_en_stats->T255_prio_5) +
+ be64_to_cpu(mlx4_en_stats->T255_prio_6) +
+ be64_to_cpu(mlx4_en_stats->T255_prio_7) +
+ be64_to_cpu(mlx4_en_stats->T255_novlan);
+ priv->pkstats.tx_511_bytes_packets = be64_to_cpu(mlx4_en_stats->T511_prio_0) +
+ be64_to_cpu(mlx4_en_stats->T511_prio_1) +
+ be64_to_cpu(mlx4_en_stats->T511_prio_2) +
+ be64_to_cpu(mlx4_en_stats->T511_prio_3) +
+ be64_to_cpu(mlx4_en_stats->T511_prio_4) +
+ be64_to_cpu(mlx4_en_stats->T511_prio_5) +
+ be64_to_cpu(mlx4_en_stats->T511_prio_6) +
+ be64_to_cpu(mlx4_en_stats->T511_prio_7) +
+ be64_to_cpu(mlx4_en_stats->T511_novlan);
+ priv->pkstats.tx_1023_bytes_packets = be64_to_cpu(mlx4_en_stats->T1023_prio_0) +
+ be64_to_cpu(mlx4_en_stats->T1023_prio_1) +
+ be64_to_cpu(mlx4_en_stats->T1023_prio_2) +
+ be64_to_cpu(mlx4_en_stats->T1023_prio_3) +
+ be64_to_cpu(mlx4_en_stats->T1023_prio_4) +
+ be64_to_cpu(mlx4_en_stats->T1023_prio_5) +
+ be64_to_cpu(mlx4_en_stats->T1023_prio_6) +
+ be64_to_cpu(mlx4_en_stats->T1023_prio_7) +
+ be64_to_cpu(mlx4_en_stats->T1023_novlan);
+ priv->pkstats.tx_1518_bytes_packets = be64_to_cpu(mlx4_en_stats->T1518_prio_0) +
+ be64_to_cpu(mlx4_en_stats->T1518_prio_1) +
+ be64_to_cpu(mlx4_en_stats->T1518_prio_2) +
+ be64_to_cpu(mlx4_en_stats->T1518_prio_3) +
+ be64_to_cpu(mlx4_en_stats->T1518_prio_4) +
+ be64_to_cpu(mlx4_en_stats->T1518_prio_5) +
+ be64_to_cpu(mlx4_en_stats->T1518_prio_6) +
+ be64_to_cpu(mlx4_en_stats->T1518_prio_7) +
+ be64_to_cpu(mlx4_en_stats->T1518_novlan);
+ priv->pkstats.tx_1522_bytes_packets = be64_to_cpu(mlx4_en_stats->T1522_prio_0) +
+ be64_to_cpu(mlx4_en_stats->T1522_prio_1) +
+ be64_to_cpu(mlx4_en_stats->T1522_prio_2) +
+ be64_to_cpu(mlx4_en_stats->T1522_prio_3) +
+ be64_to_cpu(mlx4_en_stats->T1522_prio_4) +
+ be64_to_cpu(mlx4_en_stats->T1522_prio_5) +
+ be64_to_cpu(mlx4_en_stats->T1522_prio_6) +
+ be64_to_cpu(mlx4_en_stats->T1522_prio_7) +
+ be64_to_cpu(mlx4_en_stats->T1522_novlan);
+ priv->pkstats.tx_1548_bytes_packets = be64_to_cpu(mlx4_en_stats->T1548_prio_0) +
+ be64_to_cpu(mlx4_en_stats->T1548_prio_1) +
+ be64_to_cpu(mlx4_en_stats->T1548_prio_2) +
+ be64_to_cpu(mlx4_en_stats->T1548_prio_3) +
+ be64_to_cpu(mlx4_en_stats->T1548_prio_4) +
+ be64_to_cpu(mlx4_en_stats->T1548_prio_5) +
+ be64_to_cpu(mlx4_en_stats->T1548_prio_6) +
+ be64_to_cpu(mlx4_en_stats->T1548_prio_7) +
+ be64_to_cpu(mlx4_en_stats->T1548_novlan);
+ priv->pkstats.tx_gt_1548_bytes_packets = be64_to_cpu(mlx4_en_stats->T2MTU_prio_0) +
+ be64_to_cpu(mlx4_en_stats->T2MTU_prio_1) +
+ be64_to_cpu(mlx4_en_stats->T2MTU_prio_2) +
+ be64_to_cpu(mlx4_en_stats->T2MTU_prio_3) +
+ be64_to_cpu(mlx4_en_stats->T2MTU_prio_4) +
+ be64_to_cpu(mlx4_en_stats->T2MTU_prio_5) +
+ be64_to_cpu(mlx4_en_stats->T2MTU_prio_6) +
+ be64_to_cpu(mlx4_en_stats->T2MTU_prio_7) +
+ be64_to_cpu(mlx4_en_stats->T2MTU_novlan);
- priv->pkstats.broadcast =
- be64_to_cpu(mlx4_en_stats->RBCAST_prio_0) +
- be64_to_cpu(mlx4_en_stats->RBCAST_prio_1) +
- be64_to_cpu(mlx4_en_stats->RBCAST_prio_2) +
- be64_to_cpu(mlx4_en_stats->RBCAST_prio_3) +
- be64_to_cpu(mlx4_en_stats->RBCAST_prio_4) +
- be64_to_cpu(mlx4_en_stats->RBCAST_prio_5) +
- be64_to_cpu(mlx4_en_stats->RBCAST_prio_6) +
- be64_to_cpu(mlx4_en_stats->RBCAST_prio_7) +
- be64_to_cpu(mlx4_en_stats->RBCAST_novlan);
- priv->pkstats.rx_prio[0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_0);
- priv->pkstats.rx_prio[1] = be64_to_cpu(mlx4_en_stats->RTOT_prio_1);
- priv->pkstats.rx_prio[2] = be64_to_cpu(mlx4_en_stats->RTOT_prio_2);
- priv->pkstats.rx_prio[3] = be64_to_cpu(mlx4_en_stats->RTOT_prio_3);
- priv->pkstats.rx_prio[4] = be64_to_cpu(mlx4_en_stats->RTOT_prio_4);
- priv->pkstats.rx_prio[5] = be64_to_cpu(mlx4_en_stats->RTOT_prio_5);
- priv->pkstats.rx_prio[6] = be64_to_cpu(mlx4_en_stats->RTOT_prio_6);
- priv->pkstats.rx_prio[7] = be64_to_cpu(mlx4_en_stats->RTOT_prio_7);
- priv->pkstats.tx_prio[0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_0);
- priv->pkstats.tx_prio[1] = be64_to_cpu(mlx4_en_stats->TTOT_prio_1);
- priv->pkstats.tx_prio[2] = be64_to_cpu(mlx4_en_stats->TTOT_prio_2);
- priv->pkstats.tx_prio[3] = be64_to_cpu(mlx4_en_stats->TTOT_prio_3);
- priv->pkstats.tx_prio[4] = be64_to_cpu(mlx4_en_stats->TTOT_prio_4);
- priv->pkstats.tx_prio[5] = be64_to_cpu(mlx4_en_stats->TTOT_prio_5);
- priv->pkstats.tx_prio[6] = be64_to_cpu(mlx4_en_stats->TTOT_prio_6);
- priv->pkstats.tx_prio[7] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7);
+ priv->pkstats.rx_prio[0][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_0);
+ priv->pkstats.rx_prio[0][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_0);
+ priv->pkstats.rx_prio[1][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_1);
+ priv->pkstats.rx_prio[1][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_1);
+ priv->pkstats.rx_prio[2][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_2);
+ priv->pkstats.rx_prio[2][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_2);
+ priv->pkstats.rx_prio[3][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_3);
+ priv->pkstats.rx_prio[3][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_3);
+ priv->pkstats.rx_prio[4][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_4);
+ priv->pkstats.rx_prio[4][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_4);
+ priv->pkstats.rx_prio[5][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_5);
+ priv->pkstats.rx_prio[5][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_5);
+ priv->pkstats.rx_prio[6][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_6);
+ priv->pkstats.rx_prio[6][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_6);
+ priv->pkstats.rx_prio[7][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_7);
+ priv->pkstats.rx_prio[7][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_7);
+ priv->pkstats.rx_prio[8][0] = be64_to_cpu(mlx4_en_stats->RTOT_novlan);
+ priv->pkstats.rx_prio[8][1] = be64_to_cpu(mlx4_en_stats->ROCT_novlan);
+ priv->pkstats.tx_prio[0][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_0);
+ priv->pkstats.tx_prio[0][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_0);
+ priv->pkstats.tx_prio[1][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_1);
+ priv->pkstats.tx_prio[1][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_1);
+ priv->pkstats.tx_prio[2][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_2);
+ priv->pkstats.tx_prio[2][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_2);
+ priv->pkstats.tx_prio[3][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_3);
+ priv->pkstats.tx_prio[3][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_3);
+ priv->pkstats.tx_prio[4][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_4);
+ priv->pkstats.tx_prio[4][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_4);
+ priv->pkstats.tx_prio[5][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_5);
+ priv->pkstats.tx_prio[5][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_5);
+ priv->pkstats.tx_prio[6][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_6);
+ priv->pkstats.tx_prio[6][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_6);
+ priv->pkstats.tx_prio[7][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7);
+ priv->pkstats.tx_prio[7][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_7);
+ priv->pkstats.tx_prio[8][0] = be64_to_cpu(mlx4_en_stats->TTOT_novlan);
+ priv->pkstats.tx_prio[8][1] = be64_to_cpu(mlx4_en_stats->TOCT_novlan);
+
+ flowstats = mailbox_flow->buf;
+
+ for (i = 0; i < MLX4_NUM_PRIORITIES; i++) {
+ priv->flowstats[i].rx_pause =
+ be64_to_cpu(flowstats[i].rx_pause);
+ priv->flowstats[i].rx_pause_duration =
+ be64_to_cpu(flowstats[i].rx_pause_duration);
+ priv->flowstats[i].rx_pause_transition =
+ be64_to_cpu(flowstats[i].rx_pause_transition);
+ priv->flowstats[i].tx_pause =
+ be64_to_cpu(flowstats[i].tx_pause);
+ priv->flowstats[i].tx_pause_duration =
+ be64_to_cpu(flowstats[i].tx_pause_duration);
+ priv->flowstats[i].tx_pause_transition =
+ be64_to_cpu(flowstats[i].tx_pause_transition);
+ }
+
+ memset(&tmp_vport_stats, 0, sizeof(tmp_vport_stats));
spin_unlock(&priv->stats_lock);
+ err = mlx4_get_vport_ethtool_stats(mdev->dev, port,
+ &tmp_vport_stats, reset);
+ spin_lock(&priv->stats_lock);
+ if (!err) {
+ /* ethtool stats format */
+ vport_stats->rx_unicast_packets = tmp_vport_stats.rx_unicast_packets;
+ vport_stats->rx_unicast_bytes = tmp_vport_stats.rx_unicast_bytes;
+ vport_stats->rx_multicast_packets = tmp_vport_stats.rx_multicast_packets;
+ vport_stats->rx_multicast_bytes = tmp_vport_stats.rx_multicast_bytes;
+ vport_stats->rx_broadcast_packets = tmp_vport_stats.rx_broadcast_packets;
+ vport_stats->rx_broadcast_bytes = tmp_vport_stats.rx_broadcast_bytes;
+ vport_stats->rx_dropped = tmp_vport_stats.rx_dropped;
+ vport_stats->rx_errors = tmp_vport_stats.rx_errors;
+ vport_stats->tx_unicast_packets = tmp_vport_stats.tx_unicast_packets;
+ vport_stats->tx_unicast_bytes = tmp_vport_stats.tx_unicast_bytes;
+ vport_stats->tx_multicast_packets = tmp_vport_stats.tx_multicast_packets;
+ vport_stats->tx_multicast_bytes = tmp_vport_stats.tx_multicast_bytes;
+ vport_stats->tx_broadcast_packets = tmp_vport_stats.tx_broadcast_packets;
+ vport_stats->tx_broadcast_bytes = tmp_vport_stats.tx_broadcast_bytes;
+ vport_stats->tx_errors = tmp_vport_stats.tx_errors;
+ }
+ if (!mlx4_is_mfunc(mdev->dev)) {
+ /* netdevice stats format */
+ dev = mdev->pndev[port];
+ dev->if_ipackets = priv->pkstats.rx_packets;
+ dev->if_opackets = priv->pkstats.tx_packets;
+ dev->if_ibytes = priv->pkstats.rx_bytes;
+ dev->if_obytes = priv->pkstats.tx_bytes;
+ dev->if_ierrors = priv->pkstats.rx_errors;
+ dev->if_iqdrops = priv->pkstats.rx_dropped;
+ dev->if_imcasts = priv->pkstats.rx_multicast_packets;
+ dev->if_omcasts = priv->pkstats.tx_multicast_packets;
+ dev->if_collisions = 0;
+ }
+
+ spin_unlock(&priv->stats_lock);
+
out:
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox_flow);
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+
+mailbox_out:
+ if (do_if_stat)
+ priv->last_ifq_jiffies = jiffies;
+
return err;
}
-
Modified: trunk/sys/ofed/drivers/net/mlx4/en_port.h
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/en_port.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/en_port.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -36,43 +36,11 @@
#define SET_PORT_GEN_ALL_VALID 0x7
-#define SET_PORT_PROMISC_EN_SHIFT 31
-#define SET_PORT_PROMISC_MODE_SHIFT 30
+#define SET_PORT_PROMISC_SHIFT 31
+#define SET_PORT_MC_PROMISC_SHIFT 30
-enum {
- MLX4_CMD_SET_VLAN_FLTR = 0x47,
- MLX4_CMD_SET_MCAST_FLTR = 0x48,
- MLX4_CMD_DUMP_ETH_STATS = 0x49,
-};
+#define MLX4_EN_NUM_TC 8
-struct mlx4_set_port_general_context {
- u8 reserved[3];
- u8 flags;
- u16 reserved2;
- __be16 mtu;
- u8 pptx;
- u8 pfctx;
- u16 reserved3;
- u8 pprx;
- u8 pfcrx;
- u16 reserved4;
-};
-
-struct mlx4_set_port_rqp_calc_context {
- __be32 base_qpn;
- __be32 flags;
- u8 reserved[3];
- u8 mac_miss;
- u8 intra_no_vlan;
- u8 no_vlan;
- u8 intra_vlan_miss;
- u8 vlan_miss;
- u8 reserved2[3];
- u8 no_vlan_prio;
- __be32 promisc;
- __be32 mcast;
-};
-
#define VLAN_FLTR_SIZE 128
struct mlx4_set_vlan_fltr_mbox {
__be32 entry[VLAN_FLTR_SIZE];
@@ -86,10 +54,12 @@
};
enum {
+ MLX4_EN_10G_SPEED_XAUI = 0x00,
+ MLX4_EN_10G_SPEED_XFI = 0x01,
MLX4_EN_1G_SPEED = 0x02,
- MLX4_EN_10G_SPEED_XFI = 0x01,
- MLX4_EN_10G_SPEED_XAUI = 0x00,
+ MLX4_EN_20G_SPEED = 0x08,
MLX4_EN_40G_SPEED = 0x40,
+ MLX4_EN_56G_SPEED = 0x20,
MLX4_EN_OTHER_SPEED = 0x0f,
};
@@ -96,19 +66,15 @@
struct mlx4_en_query_port_context {
u8 link_up;
#define MLX4_EN_LINK_UP_MASK 0x80
- u8 reserved;
+ u8 autoneg;
+#define MLX4_EN_AUTONEG_MASK 0x80
__be16 mtu;
u8 reserved2;
u8 link_speed;
-#define MLX4_EN_SPEED_MASK 0x43
+#define MLX4_EN_SPEED_MASK 0x6b
u16 reserved3[5];
__be64 mac;
u8 transceiver;
- u8 reserved4[3];
- __be32 wavelenth;
- u32 reserved5;
- __be32 transceiver_code_hi;
- __be32 transceiver_code_low;
};
@@ -593,6 +559,5 @@
__be32 TDROP;
};
-enum mlx4_query_reply mlx4_en_query(void *endev_ptr, void *int_dev);
#endif
Property changes on: trunk/sys/ofed/drivers/net/mlx4/en_port.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/net/mlx4/en_resources.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/en_resources.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/en_resources.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,24 +31,26 @@
*
*/
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mlx4/qp.h>
#include "mlx4_en.h"
+
void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
int is_tx, int rss, int qpn, int cqn,
- struct mlx4_qp_context *context)
+ int user_prio, struct mlx4_qp_context *context)
{
struct mlx4_en_dev *mdev = priv->mdev;
+ struct net_device *dev = priv->dev;
memset(context, 0, sizeof *context);
- context->flags = cpu_to_be32(7 << 16 | rss << 13);
+ context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET);
context->pd = cpu_to_be32(mdev->priv_pdn);
context->mtu_msgmax = 0xff;
- if (!is_tx && !rss) {
+ if (!is_tx && !rss)
context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
- }
if (is_tx)
context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
else
@@ -57,10 +59,25 @@
context->local_qpn = cpu_to_be32(qpn);
context->pri_path.ackto = 1 & 0x07;
context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
- context->pri_path.counter_index = 0xff;
+ if (user_prio >= 0) {
+ context->pri_path.sched_queue |= user_prio << 3;
+ context->pri_path.feup = 1 << 6;
+ }
+ context->pri_path.counter_index = (u8)(priv->counter_index);
+ if (!rss &&
+ (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) &&
+ context->pri_path.counter_index != 0xFF) {
+ /* disable multicast loopback to qp with same counter */
+ context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB;
+ context->pri_path.vlan_control |=
+ MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+ }
+
context->cqn_send = cpu_to_be32(cqn);
context->cqn_recv = cpu_to_be32(cqn);
context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2);
+ if (!(dev->if_capabilities & IFCAP_VLAN_HWCSUM))
+ context->param3 |= cpu_to_be32(1 << 30);
}
@@ -69,6 +86,8 @@
struct page **pages;
int i;
+ // if nbufs == 1 - there is no need to vmap
+ // if buf->direct.buf is not NULL it means that vmap was already done by mlx4_alloc_buff
if (buf->direct.buf != NULL || buf->nbufs == 1)
return 0;
@@ -89,11 +108,10 @@
void mlx4_en_unmap_buffer(struct mlx4_buf *buf)
{
- if (buf->direct.buf != NULL || buf->nbufs == 1)
+ if (BITS_PER_LONG == 64 || buf->nbufs == 1)
return;
vunmap(buf->direct.buf);
- buf->direct.buf = NULL;
}
void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event)
Modified: trunk/sys/ofed/drivers/net/mlx4/en_rx.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/en_rx.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/en_rx.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -30,65 +30,41 @@
* SOFTWARE.
*
*/
-
#include "opt_inet.h"
-#include "mlx4_en.h"
-
#include <linux/mlx4/cq.h>
+#include <linux/slab.h>
#include <linux/mlx4/qp.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/vmalloc.h>
+#include <linux/mlx4/driver.h>
+#ifdef CONFIG_NET_RX_BUSY_POLL
+#include <net/busy_poll.h>
+#endif
-#include <net/ethernet.h>
-#include <net/if_vlan_var.h>
-#include <sys/mbuf.h>
+#include "mlx4_en.h"
-enum {
- MIN_RX_ARM = 1024,
-};
-static int mlx4_en_alloc_buf(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_desc *rx_desc,
- struct mbuf **mb_list,
- int i)
-{
- struct mlx4_en_dev *mdev = priv->mdev;
- struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
- struct mbuf *mb;
- dma_addr_t dma;
-
- if (i == 0)
- mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, frag_info->frag_size);
- else
- mb = m_getjcl(M_NOWAIT, MT_DATA, 0, frag_info->frag_size);
- if (mb == NULL) {
- priv->port_stats.rx_alloc_failed++;
- return -ENOMEM;
- }
- dma = pci_map_single(mdev->pdev, mb->m_data, frag_info->frag_size,
- PCI_DMA_FROMDEVICE);
- rx_desc->data[i].addr = cpu_to_be64(dma);
- mb_list[i] = mb;
- return 0;
-}
-
static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring, int index)
+ struct mlx4_en_rx_ring *ring,
+ int index)
{
- struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
+ struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *)
+ (ring->buf + (ring->stride * index));
int possible_frags;
int i;
/* Set size and memtype fields */
- for (i = 0; i < priv->num_frags; i++) {
- rx_desc->data[i].byte_count =
- cpu_to_be32(priv->frag_info[i].frag_size);
- rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
- }
+ rx_desc->data[0].byte_count = cpu_to_be32(priv->rx_mb_size - MLX4_NET_IP_ALIGN);
+ rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key);
- /* If the number of used fragments does not fill up the ring stride,
- * remaining (unused) fragments must be padded with null address/size
- * and a special memory key */
+ /*
+ * If the number of used fragments does not fill up the ring
+ * stride, remaining (unused) fragments must be padded with
+ * null address/size and a special memory key:
+ */
possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
- for (i = priv->num_frags; i < possible_frags; i++) {
+ for (i = 1; i < possible_frags; i++) {
rx_desc->data[i].byte_count = 0;
rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
rx_desc->data[i].addr = 0;
@@ -95,54 +71,121 @@
}
}
-static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring, int index)
+static int
+mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
+ __be64 *pdma, struct mlx4_en_rx_mbuf *mb_list)
{
- struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
- struct mbuf **mb_list = ring->rx_info + (index << priv->log_rx_info);
- int i;
+ bus_dma_segment_t segs[1];
+ bus_dmamap_t map;
+ struct mbuf *mb;
+ int nsegs;
+ int err;
- for (i = 0; i < priv->num_frags; i++)
- if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, i))
- goto err;
+ /* try to allocate a new spare mbuf */
+ if (unlikely(ring->spare.mbuf == NULL)) {
+ mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+ if (unlikely(mb == NULL))
+ return (-ENOMEM);
+ /* setup correct length */
+ mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
- return 0;
+ /* make sure IP header gets aligned */
+ m_adj(mb, MLX4_NET_IP_ALIGN);
-err:
- while (i--)
- m_free(mb_list[i]);
- return -ENOMEM;
+ /* load spare mbuf into BUSDMA */
+ err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, ring->spare.dma_map,
+ mb, segs, &nsegs, BUS_DMA_NOWAIT);
+ if (unlikely(err != 0)) {
+ m_freem(mb);
+ return (err);
+ }
+
+ /* store spare info */
+ ring->spare.mbuf = mb;
+ ring->spare.paddr_be = cpu_to_be64(segs[0].ds_addr);
+
+ bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
+ BUS_DMASYNC_PREREAD);
+ }
+
+ /* synchronize and unload the current mbuf, if any */
+ if (likely(mb_list->mbuf != NULL)) {
+ bus_dmamap_sync(ring->dma_tag, mb_list->dma_map,
+ BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(ring->dma_tag, mb_list->dma_map);
+ }
+
+ mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+ if (unlikely(mb == NULL))
+ goto use_spare;
+
+ /* setup correct length */
+ mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
+
+ /* make sure IP header gets aligned */
+ m_adj(mb, MLX4_NET_IP_ALIGN);
+
+ err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, mb_list->dma_map,
+ mb, segs, &nsegs, BUS_DMA_NOWAIT);
+ if (unlikely(err != 0)) {
+ m_freem(mb);
+ goto use_spare;
+ }
+
+ *pdma = cpu_to_be64(segs[0].ds_addr);
+ mb_list->mbuf = mb;
+
+ bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD);
+ return (0);
+
+use_spare:
+ /* swap DMA maps */
+ map = mb_list->dma_map;
+ mb_list->dma_map = ring->spare.dma_map;
+ ring->spare.dma_map = map;
+
+ /* swap MBUFs */
+ mb_list->mbuf = ring->spare.mbuf;
+ ring->spare.mbuf = NULL;
+
+ /* store physical address */
+ *pdma = ring->spare.paddr_be;
+ return (0);
}
-static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
+static void
+mlx4_en_free_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_mbuf *mb_list)
{
- *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
+ bus_dmamap_t map = mb_list->dma_map;
+ bus_dmamap_sync(ring->dma_tag, map, BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(ring->dma_tag, map);
+ m_freem(mb_list->mbuf);
+ mb_list->mbuf = NULL; /* safety clearing */
}
-static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring,
- int index)
+static int
+mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring, int index)
{
- struct mlx4_en_frag_info *frag_info;
- struct mlx4_en_dev *mdev = priv->mdev;
- struct mbuf **mb_list;
- struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
- dma_addr_t dma;
- int nr;
+ struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *)
+ (ring->buf + (index * ring->stride));
+ struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index;
- mb_list = ring->rx_info + (index << priv->log_rx_info);
- for (nr = 0; nr < priv->num_frags; nr++) {
- en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
- frag_info = &priv->frag_info[nr];
- dma = be64_to_cpu(rx_desc->data[nr].addr);
+ mb_list->mbuf = NULL;
- en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma);
- pci_unmap_single(mdev->pdev, dma, frag_info->frag_size,
- PCI_DMA_FROMDEVICE);
- m_free(mb_list[nr]);
+ if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) {
+ priv->port_stats.rx_alloc_failed++;
+ return (-ENOMEM);
}
+ return (0);
}
+static inline void
+mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
+{
+ *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
+}
+
static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
{
struct mlx4_en_rx_ring *ring;
@@ -153,7 +196,7 @@
for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
- ring = &priv->rx_ring[ring_ind];
+ ring = priv->rx_ring[ring_ind];
err = mlx4_en_prepare_rx_desc(priv, ring,
ring->actual_size);
@@ -163,7 +206,8 @@
"enough rx buffers\n");
return -ENOMEM;
} else {
- new_size = rounddown_pow_of_two(ring->actual_size);
+ new_size =
+ rounddown_pow_of_two(ring->actual_size);
en_warn(priv, "Only %d buffers allocated "
"reducing ring size to %d\n",
ring->actual_size, new_size);
@@ -178,11 +222,12 @@
reduce_rings:
for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
- ring = &priv->rx_ring[ring_ind];
+ ring = priv->rx_ring[ring_ind];
while (ring->actual_size > new_size) {
ring->actual_size--;
ring->prod--;
- mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
+ mlx4_en_free_buf(ring,
+ ring->mbuf + ring->actual_size);
}
}
@@ -202,44 +247,107 @@
while (ring->cons != ring->prod) {
index = ring->cons & ring->size_mask;
en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
- mlx4_en_free_rx_desc(priv, ring, index);
+ mlx4_en_free_buf(ring, ring->mbuf + index);
++ring->cons;
}
}
+void mlx4_en_calc_rx_buf(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN +
+ MLX4_NET_IP_ALIGN;
+ if (eff_mtu > MJUM16BYTES) {
+ en_err(priv, "MTU(%d) is too big\n", (int)dev->if_mtu);
+ eff_mtu = MJUM16BYTES;
+ } else if (eff_mtu > MJUM9BYTES) {
+ eff_mtu = MJUM16BYTES;
+ } else if (eff_mtu > MJUMPAGESIZE) {
+ eff_mtu = MJUM9BYTES;
+ } else if (eff_mtu > MCLBYTES) {
+ eff_mtu = MJUMPAGESIZE;
+ } else {
+ eff_mtu = MCLBYTES;
+ }
+
+ priv->rx_mb_size = eff_mtu;
+
+ en_dbg(DRV, priv, "Effective RX MTU: %d bytes\n", eff_mtu);
+}
+
int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring, u32 size)
+ struct mlx4_en_rx_ring **pring,
+ u32 size, int node)
{
struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_rx_ring *ring;
int err;
int tmp;
+ uint32_t x;
+ ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL);
+ if (!ring) {
+ en_err(priv, "Failed to allocate RX ring structure\n");
+ return -ENOMEM;
+ }
+ /* Create DMA descriptor TAG */
+ if ((err = -bus_dma_tag_create(
+ bus_get_dma_tag(mdev->pdev->dev.bsddev),
+ 1, /* any alignment */
+ 0, /* no boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ MJUM16BYTES, /* maxsize */
+ 1, /* nsegments */
+ MJUM16BYTES, /* maxsegsize */
+ 0, /* flags */
+ NULL, NULL, /* lockfunc, lockfuncarg */
+ &ring->dma_tag))) {
+ en_err(priv, "Failed to create DMA tag\n");
+ goto err_ring;
+ }
+
ring->prod = 0;
ring->cons = 0;
ring->size = size;
ring->size_mask = size - 1;
- ring->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
- DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
+ ring->stride = roundup_pow_of_two(
+ sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
ring->log_stride = ffs(ring->stride) - 1;
ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
- tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
- sizeof(struct mbuf *));
+ tmp = size * sizeof(struct mlx4_en_rx_mbuf);
- ring->rx_info = kmalloc(tmp, GFP_KERNEL);
- if (!ring->rx_info) {
- en_err(priv, "Failed allocating rx_info ring\n");
- return -ENOMEM;
+ ring->mbuf = kzalloc(tmp, GFP_KERNEL);
+ if (ring->mbuf == NULL) {
+ err = -ENOMEM;
+ goto err_dma_tag;
+ }
+
+ err = -bus_dmamap_create(ring->dma_tag, 0, &ring->spare.dma_map);
+ if (err != 0)
+ goto err_info;
+
+ for (x = 0; x != size; x++) {
+ err = -bus_dmamap_create(ring->dma_tag, 0,
+ &ring->mbuf[x].dma_map);
+ if (err != 0) {
+ while (x--)
+ bus_dmamap_destroy(ring->dma_tag,
+ ring->mbuf[x].dma_map);
+ goto err_info;
+ }
}
- en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d stride:%d (%d)\n",
- ring->rx_info, tmp, ring->stride, ring->log_stride);
+ en_dbg(DRV, priv, "Allocated MBUF ring at addr:%p size:%d\n",
+ ring->mbuf, tmp);
err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
ring->buf_size, 2 * PAGE_SIZE);
if (err)
- goto err_ring;
+ goto err_dma_map;
err = mlx4_en_map_buffer(&ring->wqres.buf);
if (err) {
@@ -247,16 +355,24 @@
goto err_hwq;
}
ring->buf = ring->wqres.buf.direct.buf;
-
+ *pring = ring;
return 0;
- mlx4_en_unmap_buffer(&ring->wqres.buf);
err_hwq:
mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+err_dma_map:
+ for (x = 0; x != size; x++) {
+ bus_dmamap_destroy(ring->dma_tag,
+ ring->mbuf[x].dma_map);
+ }
+ bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map);
+err_info:
+ vfree(ring->mbuf);
+err_dma_tag:
+ bus_dma_tag_destroy(ring->dma_tag);
err_ring:
- kfree(ring->rx_info);
- ring->rx_info = NULL;
- return err;
+ kfree(ring);
+ return (err);
}
int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
@@ -265,16 +381,18 @@
int i;
int ring_ind;
int err;
- int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
- DS_SIZE * priv->num_frags);
+ int stride = roundup_pow_of_two(
+ sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
- ring = &priv->rx_ring[ring_ind];
+ ring = priv->rx_ring[ring_ind];
ring->prod = 0;
ring->cons = 0;
ring->actual_size = 0;
- ring->cqn = priv->rx_cq[ring_ind].mcq.cqn;
+ ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
+ ring->rx_mb_size = priv->rx_mb_size;
+
ring->stride = stride;
if (ring->stride <= TXBB_SIZE)
ring->buf += TXBB_SIZE;
@@ -285,9 +403,10 @@
memset(ring->buf, 0, ring->buf_size);
mlx4_en_update_rx_prod_db(ring);
- /* Initailize all descriptors */
+ /* Initialize all descriptors */
for (i = 0; i < ring->size; i++)
mlx4_en_init_rx_desc(priv, ring, i);
+
#ifdef INET
/* Configure lro mngr */
if (priv->dev->if_capenable & IFCAP_LRO) {
@@ -298,36 +417,65 @@
}
#endif
}
+
+
err = mlx4_en_fill_rx_buffers(priv);
if (err)
goto err_buffers;
for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
- ring = &priv->rx_ring[ring_ind];
+ ring = priv->rx_ring[ring_ind];
ring->size_mask = ring->actual_size - 1;
mlx4_en_update_rx_prod_db(ring);
}
-
return 0;
err_buffers:
for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
- mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]);
+ mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]);
+ ring_ind = priv->rx_ring_num - 1;
+
+ while (ring_ind >= 0) {
+ ring = priv->rx_ring[ring_ind];
+ if (ring->stride <= TXBB_SIZE)
+ ring->buf -= TXBB_SIZE;
+ ring_ind--;
+ }
+
return err;
}
+
void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring)
+ struct mlx4_en_rx_ring **pring,
+ u32 size, u16 stride)
{
struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_rx_ring *ring = *pring;
+ uint32_t x;
mlx4_en_unmap_buffer(&ring->wqres.buf);
- mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size + TXBB_SIZE);
- kfree(ring->rx_info);
- ring->rx_info = NULL;
+ mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
+ for (x = 0; x != size; x++)
+ bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map);
+ /* free spare mbuf, if any */
+ if (ring->spare.mbuf != NULL) {
+ bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
+ BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(ring->dma_tag, ring->spare.dma_map);
+ m_freem(ring->spare.mbuf);
+ }
+ bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map);
+ vfree(ring->mbuf);
+ bus_dma_tag_destroy(ring->dma_tag);
+ kfree(ring);
+ *pring = NULL;
+#ifdef CONFIG_RFS_ACCEL
+ mlx4_en_cleanup_filters(priv, ring);
+#endif
}
void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
@@ -342,52 +490,20 @@
}
-/* Unmap a completed descriptor and free unused pages */
-static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_desc *rx_desc,
- struct mbuf **mb_list,
- int length)
+static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb)
{
- struct mlx4_en_dev *mdev = priv->mdev;
- struct mlx4_en_frag_info *frag_info;
- dma_addr_t dma;
- struct mbuf *mb;
- int nr;
+ int i;
+ int offset = ETHER_HDR_LEN;
- mb = mb_list[0];
- mb->m_pkthdr.len = length;
- /* Collect used fragments while replacing them in the HW descirptors */
- for (nr = 0; nr < priv->num_frags; nr++) {
- frag_info = &priv->frag_info[nr];
- if (length <= frag_info->frag_prefix_size)
- break;
- if (nr)
- mb->m_next = mb_list[nr];
- mb = mb_list[nr];
- mb->m_len = frag_info[nr].frag_size;
- dma = be64_to_cpu(rx_desc->data[nr].addr);
-
- /* Allocate a replacement page */
- if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, nr))
- goto fail;
-
- /* Unmap buffer */
- pci_unmap_single(mdev->pdev, dma, frag_info[nr].frag_size,
- PCI_DMA_FROMDEVICE);
+ for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
+ if (*(mb->m_data + offset) != (unsigned char) (i & 0xff))
+ goto out_loopback;
}
- /* Adjust size of last fragment to match actual length */
- mb->m_len = length - priv->frag_info[nr - 1].frag_prefix_size;
- mb->m_next = NULL;
- return 0;
+ /* Loopback found */
+ priv->loopback_ok = 1;
-fail:
- /* Drop all accumulated fragments (which have already been replaced in
- * the descriptor) of this packet; remaining fragments are reused... */
- while (nr > 0) {
- nr--;
- m_free(mb_list[nr]);
- }
- return -ENOMEM;
+out_loopback:
+ m_freem(mb);
}
@@ -397,60 +513,58 @@
/* Drop packet on bad receive or bad checksum */
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR)) {
- en_err(priv, "CQE completed in error - vendor "
- "syndrom:%d syndrom:%d\n",
- ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome,
- ((struct mlx4_err_cqe *) cqe)->syndrome);
+ en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n",
+ ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome,
+ ((struct mlx4_err_cqe *)cqe)->syndrome);
return 1;
}
if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
- return 1;;
+ return 1;
}
return 0;
}
-static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb)
+static struct mbuf *
+mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring,
+ struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list,
+ int length)
{
- int i;
- int offset = ETHER_HDR_LEN;
+ struct mbuf *mb;
- for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
- if (*(mb->m_data + offset) != (unsigned char) (i & 0xff))
- goto out_loopback;
- }
- /* Loopback found */
- priv->loopback_ok = 1;
+ /* get mbuf */
+ mb = mb_list->mbuf;
-out_loopback:
- m_freem(mb);
-}
+ /* collect used fragment while atomically replacing it */
+ if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list))
+ return (NULL);
-static struct mbuf *mlx4_en_rx_mb(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_desc *rx_desc,
- struct mbuf **mb_list,
- unsigned int length)
-{
- struct mbuf *mb;
+ /* range check hardware computed value */
+ if (unlikely(length > mb->m_len))
+ length = mb->m_len;
- mb = mb_list[0];
- /* Move relevant fragments to mb */
- if (unlikely(mlx4_en_complete_rx_desc(priv, rx_desc, mb_list, length)))
- return NULL;
-
- return mb;
+ /* update total packet length in packet header */
+ mb->m_len = mb->m_pkthdr.len = length;
+ return (mb);
}
-
+/* For cpu arch with cache line of 64B the performance is better when cqe size==64B
+ * To enlarge cqe size from 32B to 64B --> 32B of garbage (i.e. 0xccccccc)
+ * was added in the beginning of each cqe (the real data is in the corresponding 32B).
+ * The following calc ensures that when factor==1, it means we are alligned to 64B
+ * and we get the real cqe data*/
+#define CQE_FACTOR_INDEX(index, factor) ((index << factor) + factor)
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_cqe *cqe;
- struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring];
- struct mbuf **mb_list;
+ struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
+ struct mlx4_en_rx_mbuf *mb_list;
struct mlx4_en_rx_desc *rx_desc;
struct mbuf *mb;
+ struct mlx4_cq *mcq = &cq->mcq;
+ struct mlx4_cqe *buf = cq->buf;
#ifdef INET
struct lro_entry *queued;
#endif
@@ -457,37 +571,43 @@
int index;
unsigned int length;
int polled = 0;
+ u32 cons_index = mcq->cons_index;
+ u32 size_mask = ring->size_mask;
+ int size = cq->size;
+ int factor = priv->cqe_factor;
if (!priv->port_up)
return 0;
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
- * descriptor offset can be deduced from the CQE index instead of
+ * descriptor offset can be deducted from the CQE index instead of
* reading 'cqe->index' */
- index = cq->mcq.cons_index & ring->size_mask;
- cqe = &cq->buf[index];
+ index = cons_index & size_mask;
+ cqe = &buf[CQE_FACTOR_INDEX(index, factor)];
/* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
- cq->mcq.cons_index & cq->size)) {
+ cons_index & size)) {
+ mb_list = ring->mbuf + index;
+ rx_desc = (struct mlx4_en_rx_desc *)
+ (ring->buf + (index << ring->log_stride));
- mb_list = ring->rx_info + (index << priv->log_rx_info);
- rx_desc = ring->buf + (index << ring->log_stride);
-
/*
* make sure we read the CQE after we read the ownership bit
*/
rmb();
- if (invalid_cqe(priv, cqe))
+ if (invalid_cqe(priv, cqe)) {
goto next;
-
+ }
/*
* Packet is OK - process it.
*/
length = be32_to_cpu(cqe->byte_cnt);
- mb = mlx4_en_rx_mb(priv, rx_desc, mb_list, length);
- if (!mb) {
+ length -= ring->fcs_del;
+
+ mb = mlx4_en_rx_mb(priv, ring, rx_desc, mb_list, length);
+ if (unlikely(!mb)) {
ring->errors++;
goto next;
}
@@ -495,12 +615,13 @@
ring->bytes += length;
ring->packets++;
- if (unlikely(priv->validate_loopback)) {
+ if (unlikely(priv->validate_loopback)) {
validate_loopback(priv, mb);
goto next;
}
- mb->m_pkthdr.flowid = cq->ring;
+ /* forward Toeplitz compatible hash value */
+ mb->m_pkthdr.flowid = be32_to_cpu(cqe->immed_rss_invalid);
mb->m_flags |= M_FLOWID;
mb->m_pkthdr.rcvif = dev;
if (be32_to_cpu(cqe->vlan_my_qpn) &
@@ -508,11 +629,12 @@
mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid);
mb->m_flags |= M_VLANTAG;
}
- if (likely(priv->rx_csum) &&
+ if (likely(dev->if_capenable &
+ (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) &&
(cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
(cqe->checksum == cpu_to_be16(0xffff))) {
priv->port_stats.rx_chksum_good++;
- mb->m_pkthdr.csum_flags =
+ mb->m_pkthdr.csum_flags =
CSUM_IP_CHECKED | CSUM_IP_VALID |
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
mb->m_pkthdr.csum_data = htons(0xffff);
@@ -524,24 +646,18 @@
*/
#ifdef INET
if (mlx4_en_can_lro(cqe->status) &&
- (dev->if_capenable & IFCAP_LRO)) {
+ (dev->if_capenable & IFCAP_LRO)) {
if (ring->lro.lro_cnt != 0 &&
- tcp_lro_rx(&ring->lro, mb, 0) == 0)
+ tcp_lro_rx(&ring->lro, mb, 0) == 0)
goto next;
}
+
#endif
-
/* LRO not possible, complete processing here */
INC_PERF_COUNTER(priv->pstats.lro_misses);
} else {
mb->m_pkthdr.csum_flags = 0;
priv->port_stats.rx_chksum_none++;
-#ifdef INET
- if (priv->ip_reasm &&
- cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4) &&
- !mlx4_en_rx_frags(priv, ring, mb, cqe))
- goto next;
-#endif
}
/* Push it up the stack */
@@ -548,9 +664,9 @@
dev->if_input(dev, mb);
next:
- ++cq->mcq.cons_index;
- index = (cq->mcq.cons_index) & ring->size_mask;
- cqe = &cq->buf[index];
+ ++cons_index;
+ index = cons_index & size_mask;
+ cqe = &buf[CQE_FACTOR_INDEX(index, factor)];
if (++polled == budget)
goto out;
}
@@ -557,7 +673,6 @@
/* Flush all pending IP reassembly sessions */
out:
#ifdef INET
- mlx4_en_flush_frags(priv, ring);
while ((queued = SLIST_FIRST(&ring->lro.lro_active)) != NULL) {
SLIST_REMOVE_HEAD(&ring->lro.lro_active, next);
tcp_lro_flush(&ring->lro, queued);
@@ -564,99 +679,64 @@
}
#endif
AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
- mlx4_cq_set_ci(&cq->mcq);
+ mcq->cons_index = cons_index;
+ mlx4_cq_set_ci(mcq);
wmb(); /* ensure HW sees CQ consumer before we post new buffers */
- ring->cons = cq->mcq.cons_index;
+ ring->cons = mcq->cons_index;
ring->prod += polled; /* Polled descriptors were realocated in place */
mlx4_en_update_rx_prod_db(ring);
return polled;
+
}
-
/* Rx CQ polling - called by NAPI */
static int mlx4_en_poll_rx_cq(struct mlx4_en_cq *cq, int budget)
{
- struct net_device *dev = cq->dev;
- int done;
+ struct net_device *dev = cq->dev;
+ int done;
- done = mlx4_en_process_rx_cq(dev, cq, budget);
- cq->tot_rx += done;
+ done = mlx4_en_process_rx_cq(dev, cq, budget);
+ cq->tot_rx += done;
- return done;
-}
+ return done;
-void mlx4_en_rx_que(void *context, int pending)
-{
- struct mlx4_en_cq *cq;
-
- cq = context;
- while (mlx4_en_poll_rx_cq(cq, MLX4_EN_MAX_RX_POLL)
- == MLX4_EN_MAX_RX_POLL);
- mlx4_en_arm_cq(cq->dev->if_softc, cq);
}
-
void mlx4_en_rx_irq(struct mlx4_cq *mcq)
{
struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
struct mlx4_en_priv *priv = netdev_priv(cq->dev);
- int done;
+ int done;
- done = mlx4_en_poll_rx_cq(cq, MLX4_EN_MAX_RX_POLL);
- if (done == MLX4_EN_MAX_RX_POLL)
+ // Shoot one within the irq context
+ // Because there is no NAPI in freeBSD
+ done = mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET);
+ if (priv->port_up && (done == MLX4_EN_RX_BUDGET) ) {
+ cq->curr_poll_rx_cpu_id = curcpu;
taskqueue_enqueue(cq->tq, &cq->cq_task);
- else
+ }
+ else {
mlx4_en_arm_cq(priv, cq);
+ }
}
-
-#if MLX4_EN_MAX_RX_FRAGS == 3
-static int frag_sizes[] = {
- FRAG_SZ0,
- FRAG_SZ1,
- FRAG_SZ2,
-};
-#elif MLX4_EN_MAX_RX_FRAGS == 2
-static int frag_sizes[] = {
- FRAG_SZ0,
- FRAG_SZ1,
-};
-#else
-#error "Unknown MAX_RX_FRAGS"
-#endif
-
-void mlx4_en_calc_rx_buf(struct net_device *dev)
+void mlx4_en_rx_que(void *context, int pending)
{
- struct mlx4_en_priv *priv = netdev_priv(dev);
- int eff_mtu = dev->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETH_LLC_SNAP_SIZE;
- int buf_size = 0;
- int i, frag;
+ struct mlx4_en_cq *cq;
+ struct thread *td;
- for (i = 0, frag = 0; buf_size < eff_mtu; frag++, i++) {
- /*
- * Allocate small to large but only as much as is needed for
- * the tail.
- */
- while (i > 0 && eff_mtu - buf_size <= frag_sizes[i - 1])
- i--;
- priv->frag_info[frag].frag_size = frag_sizes[i];
- priv->frag_info[frag].frag_prefix_size = buf_size;
- buf_size += priv->frag_info[frag].frag_size;
- }
+ cq = context;
+ td = curthread;
- priv->num_frags = frag;
- priv->rx_mb_size = eff_mtu;
- priv->log_rx_info =
- ROUNDUP_LOG2(priv->num_frags * sizeof(struct mbuf *));
+ thread_lock(td);
+ sched_bind(td, cq->curr_poll_rx_cpu_id);
+ thread_unlock(td);
- en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
- "num_frags:%d):\n", eff_mtu, priv->num_frags);
- for (i = 0; i < priv->num_frags; i++) {
- en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d\n", i,
- priv->frag_info[i].frag_size,
- priv->frag_info[i].frag_prefix_size)
- }
+ while (mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET)
+ == MLX4_EN_RX_BUDGET);
+ mlx4_en_arm_cq(cq->dev->if_softc, cq);
}
+
/* RSS related functions */
static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
@@ -683,9 +763,16 @@
memset(context, 0, sizeof *context);
mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
- qpn, ring->cqn, context);
+ qpn, ring->cqn, -1, context);
context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
+ /* Cancel FCS removal if FW allows */
+ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) {
+ context->param3 |= cpu_to_be32(1 << 29);
+ ring->fcs_del = ETH_FCS_LEN;
+ } else
+ ring->fcs_del = 0;
+
err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
if (err) {
mlx4_qp_remove(mdev->dev, qp);
@@ -697,6 +784,36 @@
return err;
}
+int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
+{
+ int err;
+ u32 qpn;
+
+ err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0);
+ if (err) {
+ en_err(priv, "Failed reserving drop qpn\n");
+ return err;
+ }
+ err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
+ if (err) {
+ en_err(priv, "Failed allocating drop qp\n");
+ mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
+ return err;
+ }
+
+ return 0;
+}
+
+void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv)
+{
+ u32 qpn;
+
+ qpn = priv->drop_qp.qpn;
+ mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp);
+ mlx4_qp_free(priv->mdev->dev, &priv->drop_qp);
+ mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
+}
+
/* Allocate rx qp's and configure them according to rss map */
int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
{
@@ -703,21 +820,22 @@
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_rss_map *rss_map = &priv->rss_map;
struct mlx4_qp_context context;
- struct mlx4_en_rss_context *rss_context;
+ struct mlx4_rss_context *rss_context;
+ int rss_rings;
void *ptr;
- u8 rss_mask;
- int i, qpn;
+ u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
+ MLX4_RSS_TCP_IPV6);
+ int i;
int err = 0;
int good_qps = 0;
+ static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC,
+ 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD,
+ 0x593D56D9, 0xF3253C06, 0x2ADC1FFC};
- if (mdev->profile.udp_rss)
- rss_mask = 0x3f;
- else
- rss_mask = 0x14;
en_dbg(DRV, priv, "Configuring rss steering\n");
err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
- roundup_pow_of_two(priv->rx_ring_num),
- &rss_map->base_qpn);
+ priv->rx_ring_num,
+ &rss_map->base_qpn, 0);
if (err) {
en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
return err;
@@ -724,9 +842,9 @@
}
for (i = 0; i < priv->rx_ring_num; i++) {
- qpn = rss_map->base_qpn + i;
- err = mlx4_en_config_rss_qp(priv, qpn,
- &priv->rx_ring[i],
+ priv->rx_ring[i]->qpn = rss_map->base_qpn + i;
+ err = mlx4_en_config_rss_qp(priv, priv->rx_ring[i]->qpn,
+ priv->rx_ring[i],
&rss_map->state[i],
&rss_map->qps[i]);
if (err)
@@ -736,28 +854,34 @@
}
/* Configure RSS indirection qp */
- err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn);
- if (err) {
- en_err(priv, "Failed to reserve range for RSS "
- "indirection qp\n");
- goto rss_err;
- }
err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
if (err) {
en_err(priv, "Failed to allocate RSS indirection QP\n");
- goto reserve_err;
+ goto rss_err;
}
rss_map->indir_qp.event = mlx4_en_sqp_event;
mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
- priv->rx_ring[0].cqn, &context);
+ priv->rx_ring[0]->cqn, -1, &context);
- ptr = ((void *) &context) + 0x3c;
- rss_context = (struct mlx4_en_rss_context *) ptr;
- rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 |
+ if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num)
+ rss_rings = priv->rx_ring_num;
+ else
+ rss_rings = priv->prof->rss_rings;
+
+ ptr = ((u8 *)&context) + offsetof(struct mlx4_qp_context, pri_path) +
+ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
+ rss_context = ptr;
+ rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 |
(rss_map->base_qpn));
rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
+ if (priv->mdev->profile.udp_rss) {
+ rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
+ rss_context->base_qpn_udp = rss_context->default_qpn;
+ }
rss_context->flags = rss_mask;
- rss_context->base_qpn_udp = rss_context->default_qpn;
+ rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+ for (i = 0; i < 10; i++)
+ rss_context->rss_key[i] = cpu_to_be32(rsskey[i]);
err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
&rss_map->indir_qp, &rss_map->indir_state);
@@ -771,8 +895,6 @@
MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
-reserve_err:
- mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
rss_err:
for (i = 0; i < good_qps; i++) {
mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
@@ -794,7 +916,6 @@
MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
- mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
for (i = 0; i < priv->rx_ring_num; i++) {
mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
@@ -804,3 +925,4 @@
}
mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
}
+
Modified: trunk/sys/ofed/drivers/net/mlx4/en_tx.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/en_tx.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/en_tx.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,16 +31,14 @@
*
*/
-#include "mlx4_en.h"
-
+#include <linux/page.h>
#include <linux/mlx4/cq.h>
+#include <linux/slab.h>
#include <linux/mlx4/qp.h>
+#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
+#include <linux/moduleparam.h>
-#include <net/ethernet.h>
-#include <net/if_vlan_var.h>
-#include <sys/mbuf.h>
-
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
@@ -50,64 +48,105 @@
#include <netinet/tcp_lro.h>
#include <netinet/udp.h>
+#include "mlx4_en.h"
+#include "utils.h"
+
enum {
MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */
MAX_BF = 256,
+ MIN_PKT_LEN = 17,
};
-static int inline_thold = MAX_INLINE;
+static int inline_thold __read_mostly = MAX_INLINE;
-module_param_named(inline_thold, inline_thold, int, 0444);
-MODULE_PARM_DESC(inline_thold, "treshold for using inline data");
+module_param_named(inline_thold, inline_thold, uint, 0444);
+MODULE_PARM_DESC(inline_thold, "threshold for using inline data");
int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
- struct mlx4_en_tx_ring *ring, u32 size,
- u16 stride)
+ struct mlx4_en_tx_ring **pring, u32 size,
+ u16 stride, int node, int queue_idx)
{
struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_tx_ring *ring;
+ uint32_t x;
int tmp;
int err;
+ ring = kzalloc_node(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL, node);
+ if (!ring) {
+ ring = kzalloc(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL);
+ if (!ring) {
+ en_err(priv, "Failed allocating TX ring\n");
+ return -ENOMEM;
+ }
+ }
+
+ /* Create DMA descriptor TAG */
+ if ((err = -bus_dma_tag_create(
+ bus_get_dma_tag(mdev->pdev->dev.bsddev),
+ 1, /* any alignment */
+ 0, /* no boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ MLX4_EN_TX_MAX_PAYLOAD_SIZE, /* maxsize */
+ MLX4_EN_TX_MAX_MBUF_FRAGS, /* nsegments */
+ MLX4_EN_TX_MAX_MBUF_SIZE, /* maxsegsize */
+ 0, /* flags */
+ NULL, NULL, /* lockfunc, lockfuncarg */
+ &ring->dma_tag)))
+ goto done;
+
ring->size = size;
ring->size_mask = size - 1;
ring->stride = stride;
-
- inline_thold = min(inline_thold, MAX_INLINE);
-
+ ring->inline_thold = MAX(MIN_PKT_LEN, MIN(inline_thold, MAX_INLINE));
mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF);
mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF);
/* Allocate the buf ring */
ring->br = buf_ring_alloc(MLX4_EN_DEF_TX_QUEUE_SIZE, M_DEVBUF,
- M_WAITOK, &ring->tx_lock.m);
+ M_WAITOK, &ring->tx_lock.m);
if (ring->br == NULL) {
en_err(priv, "Failed allocating tx_info ring\n");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_free_dma_tag;
}
tmp = size * sizeof(struct mlx4_en_tx_info);
- ring->tx_info = kmalloc(tmp, GFP_KERNEL);
+ ring->tx_info = kzalloc_node(tmp, GFP_KERNEL, node);
if (!ring->tx_info) {
- en_err(priv, "Failed allocating tx_info ring\n");
- err = -ENOMEM;
- goto err_tx;
+ ring->tx_info = kzalloc(tmp, GFP_KERNEL);
+ if (!ring->tx_info) {
+ err = -ENOMEM;
+ goto err_ring;
+ }
}
+
+ /* Create DMA descriptor MAPs */
+ for (x = 0; x != size; x++) {
+ err = -bus_dmamap_create(ring->dma_tag, 0,
+ &ring->tx_info[x].dma_map);
+ if (err != 0) {
+ while (x--) {
+ bus_dmamap_destroy(ring->dma_tag,
+ ring->tx_info[x].dma_map);
+ }
+ goto err_info;
+ }
+ }
+
en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
ring->tx_info, tmp);
- ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
- if (!ring->bounce_buf) {
- en_err(priv, "Failed allocating bounce buffer\n");
- err = -ENOMEM;
- goto err_tx;
- }
ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
+ /* Allocate HW buffers on provided NUMA node */
err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
2 * PAGE_SIZE);
if (err) {
en_err(priv, "Failed allocating hwq resources\n");
- goto err_bounce;
+ goto err_dma_map;
}
err = mlx4_en_map_buffer(&ring->wqres.buf);
@@ -122,9 +161,10 @@
"buf_size:%d dma:%llx\n", ring, ring->buf, ring->size,
ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
- err = mlx4_qp_reserve_range(mdev->dev, 1, 256, &ring->qpn);
+ err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
+ MLX4_RESERVE_BF_QP);
if (err) {
- en_err(priv, "Failed reserving qp for tx ring.\n");
+ en_err(priv, "failed reserving qp for TX ring\n");
goto err_map;
}
@@ -135,14 +175,19 @@
}
ring->qp.event = mlx4_en_sqp_event;
- err = mlx4_bf_alloc(mdev->dev, &ring->bf);
+ err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
if (err) {
+ en_dbg(DRV, priv, "working without blueflame (%d)", err);
ring->bf.uar = &mdev->priv_uar;
ring->bf.uar->map = mdev->uar_map;
ring->bf_enabled = false;
} else
ring->bf_enabled = true;
+ ring->queue_index = queue_idx;
+ if (queue_idx < priv->num_tx_rings_p_up )
+ CPU_SET(queue_idx, &ring->affinity_mask);
+ *pring = ring;
return 0;
err_reserve:
@@ -151,20 +196,26 @@
mlx4_en_unmap_buffer(&ring->wqres.buf);
err_hwq_res:
mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
-err_bounce:
- kfree(ring->bounce_buf);
- ring->bounce_buf = NULL;
-err_tx:
+err_dma_map:
+ for (x = 0; x != size; x++)
+ bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map);
+err_info:
+ vfree(ring->tx_info);
+err_ring:
buf_ring_free(ring->br, M_DEVBUF);
- kfree(ring->tx_info);
- ring->tx_info = NULL;
+err_free_dma_tag:
+ bus_dma_tag_destroy(ring->dma_tag);
+done:
+ kfree(ring);
return err;
}
void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
- struct mlx4_en_tx_ring *ring)
+ struct mlx4_en_tx_ring **pring)
{
struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_tx_ring *ring = *pring;
+ uint32_t x;
en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
buf_ring_free(ring->br, M_DEVBUF);
@@ -172,20 +223,22 @@
mlx4_bf_free(mdev->dev, &ring->bf);
mlx4_qp_remove(mdev->dev, &ring->qp);
mlx4_qp_free(mdev->dev, &ring->qp);
- mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
+ mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
mlx4_en_unmap_buffer(&ring->wqres.buf);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
- kfree(ring->bounce_buf);
- ring->bounce_buf = NULL;
- kfree(ring->tx_info);
- ring->tx_info = NULL;
+ for (x = 0; x != ring->size; x++)
+ bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map);
+ vfree(ring->tx_info);
mtx_destroy(&ring->tx_lock.m);
mtx_destroy(&ring->comp_lock.m);
+ bus_dma_tag_destroy(ring->dma_tag);
+ kfree(ring);
+ *pring = NULL;
}
int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring,
- int cq)
+ int cq, int user_prio)
{
struct mlx4_en_dev *mdev = priv->mdev;
int err;
@@ -196,20 +249,18 @@
ring->last_nr_txbb = 1;
ring->poll_cnt = 0;
ring->blocked = 0;
- memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
memset(ring->buf, 0, ring->buf_size);
ring->qp_state = MLX4_QP_STATE_RST;
- ring->doorbell_qpn = swab32(ring->qp.qpn << 8);
+ ring->doorbell_qpn = ring->qp.qpn << 8;
mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
- ring->cqn, &ring->context);
+ ring->cqn, user_prio, &ring->context);
if (ring->bf_enabled)
ring->context.usr_page = cpu_to_be32(ring->bf.uar->index);
err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
&ring->qp, &ring->qp_state);
-
return err;
}
@@ -222,65 +273,65 @@
MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
}
+static volatile struct mlx4_wqe_data_seg *
+mlx4_en_store_inline_lso_data(volatile struct mlx4_wqe_data_seg *dseg,
+ struct mbuf *mb, int len, __be32 owner_bit)
+{
+ uint8_t *inl = __DEVOLATILE(uint8_t *, dseg);
-static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
- struct mlx4_en_tx_ring *ring,
- int index, u8 owner)
+ /* copy data into place */
+ m_copydata(mb, 0, len, inl + 4);
+ dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT);
+ return (dseg);
+}
+
+static void
+mlx4_en_store_inline_lso_header(volatile struct mlx4_wqe_data_seg *dseg,
+ int len, __be32 owner_bit)
{
- struct mlx4_en_dev *mdev = priv->mdev;
+}
+
+static void
+mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring, u32 index, u8 owner)
+{
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
- struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
- struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
- struct mbuf *mb = tx_info->mb;
- void *end = ring->buf + ring->buf_size;
- int frags = tx_info->nr_segs;
- int i;
- __be32 *ptr = (__be32 *)tx_desc;
- __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
+ struct mlx4_en_tx_desc *tx_desc = (struct mlx4_en_tx_desc *)
+ (ring->buf + (index * TXBB_SIZE));
+ volatile __be32 *ptr = (__be32 *)tx_desc;
+ const __be32 stamp = cpu_to_be32(STAMP_VAL |
+ ((u32)owner << STAMP_SHIFT));
+ u32 i;
- /* Optimize the common case when there are no wraparounds */
- if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
- if (!tx_info->inl) {
- for (i = 0; i < frags; i++) {
- pci_unmap_single(mdev->pdev,
- (dma_addr_t) be64_to_cpu(data[i].addr),
- data[i].byte_count, PCI_DMA_TODEVICE);
- }
- }
- /* Stamp the freed descriptor */
- for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
- *ptr = stamp;
- ptr += STAMP_DWORDS;
- }
+ /* Stamp the freed descriptor */
+ for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
+ *ptr = stamp;
+ ptr += STAMP_DWORDS;
+ }
+}
- } else {
- if (!tx_info->inl) {
- for (i = 0; i < frags; i++) {
- /* Check for wraparound before unmapping */
- if ((void *) data >= end)
- data = (struct mlx4_wqe_data_seg *) ring->buf;
- pci_unmap_single(mdev->pdev,
- (dma_addr_t) be64_to_cpu(data->addr),
- data->byte_count, PCI_DMA_TODEVICE);
- ++data;
- }
- }
- /* Stamp the freed descriptor */
- for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
- *ptr = stamp;
- ptr += STAMP_DWORDS;
- if ((void *) ptr >= end) {
- ptr = ring->buf;
- stamp ^= cpu_to_be32(0x80000000);
- }
- }
+static u32
+mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring, u32 index)
+{
+ struct mlx4_en_tx_info *tx_info;
+ struct mbuf *mb;
- }
- m_freem(mb);
- return tx_info->nr_txbb;
+ tx_info = &ring->tx_info[index];
+ mb = tx_info->mb;
+
+ if (mb == NULL)
+ goto done;
+
+ bus_dmamap_sync(ring->dma_tag, tx_info->dma_map,
+ BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_unload(ring->dma_tag, tx_info->dma_map);
+
+ m_freem(mb);
+done:
+ return (tx_info->nr_txbb);
}
-
int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -292,14 +343,13 @@
ring->cons, ring->prod);
if ((u32) (ring->prod - ring->cons) > ring->size) {
- en_warn(priv, "Tx consumer passed producer!\n");
+ en_warn(priv, "Tx consumer passed producer!\n");
return 0;
}
while (ring->cons != ring->prod) {
ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring,
- ring->cons & ring->size_mask,
- !!(ring->cons & ring->size));
+ ring->cons & ring->size_mask);
ring->cons += ring->last_nr_txbb;
cnt++;
}
@@ -310,102 +360,96 @@
return cnt;
}
-void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num)
+static bool
+mlx4_en_tx_ring_is_full(struct mlx4_en_tx_ring *ring)
{
- int block = 8 / ring_num;
- int extra = 8 - (block * ring_num);
- int num = 0;
- u16 ring = 1;
- int prio;
-
- if (ring_num == 1) {
- for (prio = 0; prio < 8; prio++)
- prio_map[prio] = 0;
- return;
- }
-
- for (prio = 0; prio < 8; prio++) {
- if (extra && (num == block + 1)) {
- ring++;
- num = 0;
- extra--;
- } else if (!extra && (num == block)) {
- ring++;
- num = 0;
- }
- prio_map[prio] = ring;
- en_dbg(DRV, priv, " prio:%d --> ring:%d\n", prio, ring);
- num++;
- }
+ int wqs;
+ wqs = ring->size - (ring->prod - ring->cons);
+ return (wqs < (HEADROOM + (2 * MLX4_EN_TX_WQE_MAX_WQEBBS)));
}
-static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
+static int mlx4_en_process_tx_cq(struct net_device *dev,
+ struct mlx4_en_cq *cq)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_cq *mcq = &cq->mcq;
- struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
- struct mlx4_cqe *cqe = cq->buf;
+ struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];
+ struct mlx4_cqe *cqe;
u16 index;
- u16 new_index;
+ u16 new_index, ring_index, stamp_index;
u32 txbbs_skipped = 0;
- u32 cq_last_sav;
+ u32 txbbs_stamp = 0;
+ u32 cons_index = mcq->cons_index;
+ int size = cq->size;
+ u32 size_mask = ring->size_mask;
+ struct mlx4_cqe *buf = cq->buf;
+ int factor = priv->cqe_factor;
- /* index always points to the first TXBB of the last polled descriptor */
- index = ring->cons & ring->size_mask;
- new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
- if (index == new_index)
- return;
-
if (!priv->port_up)
- return;
+ return 0;
- /*
- * We use a two-stage loop:
- * - the first samples the HW-updated CQE
- * - the second frees TXBBs until the last sample
- * This lets us amortize CQE cache misses, while still polling the CQ
- * until is quiescent.
- */
- cq_last_sav = mcq->cons_index;
- do {
+ index = cons_index & size_mask;
+ cqe = &buf[(index << factor) + factor];
+ ring_index = ring->cons & size_mask;
+ stamp_index = ring_index;
+
+ /* Process all completed CQEs */
+ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
+ cons_index & size)) {
+ /*
+ * make sure we read the CQE after we read the
+ * ownership bit
+ */
+ rmb();
+
+ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
+ MLX4_CQE_OPCODE_ERROR)) {
+ en_err(priv, "CQE completed in error - vendor syndrom: 0x%x syndrom: 0x%x\n",
+ ((struct mlx4_err_cqe *)cqe)->
+ vendor_err_syndrome,
+ ((struct mlx4_err_cqe *)cqe)->syndrome);
+ }
+
+ /* Skip over last polled CQE */
+ new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
+
do {
- /* Skip over last polled CQE */
- index = (index + ring->last_nr_txbb) & ring->size_mask;
txbbs_skipped += ring->last_nr_txbb;
-
- /* Poll next CQE */
+ ring_index = (ring_index + ring->last_nr_txbb) & size_mask;
+ /* free next descriptor */
ring->last_nr_txbb = mlx4_en_free_tx_desc(
- priv, ring, index,
- !!((ring->cons + txbbs_skipped) &
- ring->size));
- ++mcq->cons_index;
+ priv, ring, ring_index);
+ mlx4_en_stamp_wqe(priv, ring, stamp_index,
+ !!((ring->cons + txbbs_stamp) &
+ ring->size));
+ stamp_index = ring_index;
+ txbbs_stamp = txbbs_skipped;
+ } while (ring_index != new_index);
- } while (index != new_index);
+ ++cons_index;
+ index = cons_index & size_mask;
+ cqe = &buf[(index << factor) + factor];
+ }
- new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
- } while (index != new_index);
- AVG_PERF_COUNTER(priv->pstats.tx_coal_avg,
- (u32) (mcq->cons_index - cq_last_sav));
/*
* To prevent CQ overflow we first update CQ consumer and only then
* the ring consumer.
*/
+ mcq->cons_index = cons_index;
mlx4_cq_set_ci(mcq);
wmb();
ring->cons += txbbs_skipped;
- /* Wakeup Tx queue if this ring stopped it */
- if (unlikely(ring->blocked)) {
- if ((u32) (ring->prod - ring->cons) <=
- ring->size - HEADROOM - MAX_DESC_TXBBS) {
- ring->blocked = 0;
- if (atomic_fetchadd_int(&priv->blocked, -1) == 1)
- atomic_clear_int(&dev->if_drv_flags,
- IFF_DRV_OACTIVE);
- priv->port_stats.wake_queue++;
- }
+ /* Wakeup Tx queue if it was stopped and ring is not full */
+ if (unlikely(ring->blocked) && !mlx4_en_tx_ring_is_full(ring)) {
+ ring->blocked = 0;
+ if (atomic_fetchadd_int(&priv->blocked, -1) == 1)
+ atomic_clear_int(&dev->if_drv_flags ,IFF_DRV_OACTIVE);
+ ring->wake_queue++;
+ priv->port_stats.wake_queue++;
}
+ return (0);
}
void mlx4_en_tx_irq(struct mlx4_cq *mcq)
@@ -412,9 +456,9 @@
{
struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
struct mlx4_en_priv *priv = netdev_priv(cq->dev);
- struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
+ struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];
- if (!spin_trylock(&ring->comp_lock))
+ if (priv->port_up == 0 || !spin_trylock(&ring->comp_lock))
return;
mlx4_en_process_tx_cq(cq->dev, cq);
mod_timer(&cq->timer, jiffies + 1);
@@ -421,16 +465,17 @@
spin_unlock(&ring->comp_lock);
}
-
void mlx4_en_poll_tx_cq(unsigned long data)
{
struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data;
struct mlx4_en_priv *priv = netdev_priv(cq->dev);
- struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
+ struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];
u32 inflight;
INC_PERF_COUNTER(priv->pstats.tx_poll);
+ if (priv->port_up == 0)
+ return;
if (!spin_trylock(&ring->comp_lock)) {
mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
return;
@@ -447,39 +492,14 @@
spin_unlock(&ring->comp_lock);
}
-static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
- struct mlx4_en_tx_ring *ring,
- u32 index,
- unsigned int desc_size)
+static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind)
{
- u32 copy = (ring->size - index) * TXBB_SIZE;
- int i;
+ struct mlx4_en_cq *cq = priv->tx_cq[tx_ind];
+ struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind];
- for (i = desc_size - copy - 4; i >= 0; i -= 4) {
- if ((i & (TXBB_SIZE - 1)) == 0)
- wmb();
+ if (priv->port_up == 0)
+ return;
- *((u32 *) (ring->buf + i)) =
- *((u32 *) (ring->bounce_buf + copy + i));
- }
-
- for (i = copy - 4; i >= 4 ; i -= 4) {
- if ((i & (TXBB_SIZE - 1)) == 0)
- wmb();
-
- *((u32 *) (ring->buf + index * TXBB_SIZE + i)) =
- *((u32 *) (ring->bounce_buf + i));
- }
-
- /* Return real descriptor location */
- return ring->buf + index * TXBB_SIZE;
-}
-
-static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind)
-{
- struct mlx4_en_cq *cq = &priv->tx_cq[tx_ind];
- struct mlx4_en_tx_ring *ring = &priv->tx_ring[tx_ind];
-
/* If we don't have a pending timer, set one up to catch our recent
post in case the interface becomes idle */
if (!timer_pending(&cq->timer))
@@ -493,427 +513,478 @@
}
}
-static int is_inline(struct mbuf *mb)
+static u16
+mlx4_en_get_inline_hdr_size(struct mlx4_en_tx_ring *ring, struct mbuf *mb)
{
+ u16 retval;
- if (inline_thold && mb->m_pkthdr.len <= inline_thold &&
- (mb->m_pkthdr.csum_flags & CSUM_TSO) == 0)
- return 1;
+ /* only copy from first fragment, if possible */
+ retval = MIN(ring->inline_thold, mb->m_len);
- return 0;
+ /* check for too little data */
+ if (unlikely(retval < MIN_PKT_LEN))
+ retval = MIN(ring->inline_thold, mb->m_pkthdr.len);
+ return (retval);
}
-static int inline_size(struct mbuf *mb)
+static int
+mlx4_en_get_header_size(struct mbuf *mb)
{
- int len;
+ struct ether_vlan_header *eh;
+ struct tcphdr *th;
+ struct ip *ip;
+ int ip_hlen, tcp_hlen;
+ struct ip6_hdr *ip6;
+ uint16_t eth_type;
+ int eth_hdr_len;
- len = mb->m_pkthdr.len;
- if (len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
- <= MLX4_INLINE_ALIGN)
- return ALIGN(len + CTRL_SIZE +
- sizeof(struct mlx4_wqe_inline_seg), 16);
- else
- return ALIGN(len + CTRL_SIZE + 2 *
- sizeof(struct mlx4_wqe_inline_seg), 16);
-}
-
-static int get_head_size(struct mbuf *mb)
-{
- struct tcphdr *th;
- struct ip *ip;
- int ip_hlen, tcp_hlen;
- int len;
-
- len = ETHER_HDR_LEN;
- if (mb->m_len < len + sizeof(struct ip))
+ eh = mtod(mb, struct ether_vlan_header *);
+ if (mb->m_len < ETHER_HDR_LEN)
return (0);
- ip = (struct ip *)(mtod(mb, char *) + len);
- if (ip->ip_p != IPPROTO_TCP)
+ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ eth_type = ntohs(eh->evl_proto);
+ eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+ } else {
+ eth_type = ntohs(eh->evl_encap_proto);
+ eth_hdr_len = ETHER_HDR_LEN;
+ }
+ if (mb->m_len < eth_hdr_len)
return (0);
- ip_hlen = ip->ip_hl << 2;
- len += ip_hlen;
- if (mb->m_len < len + sizeof(struct tcphdr))
+ switch (eth_type) {
+ case ETHERTYPE_IP:
+ ip = (struct ip *)(mb->m_data + eth_hdr_len);
+ if (mb->m_len < eth_hdr_len + sizeof(*ip))
+ return (0);
+ if (ip->ip_p != IPPROTO_TCP)
+ return (0);
+ ip_hlen = ip->ip_hl << 2;
+ eth_hdr_len += ip_hlen;
+ break;
+ case ETHERTYPE_IPV6:
+ ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len);
+ if (mb->m_len < eth_hdr_len + sizeof(*ip6))
+ return (0);
+ if (ip6->ip6_nxt != IPPROTO_TCP)
+ return (0);
+ eth_hdr_len += sizeof(*ip6);
+ break;
+ default:
return (0);
- th = (struct tcphdr *)(mtod(mb, char *) + len);
+ }
+ if (mb->m_len < eth_hdr_len + sizeof(*th))
+ return (0);
+ th = (struct tcphdr *)(mb->m_data + eth_hdr_len);
tcp_hlen = th->th_off << 2;
- len += tcp_hlen;
- if (mb->m_len < len)
+ eth_hdr_len += tcp_hlen;
+ if (mb->m_len < eth_hdr_len)
return (0);
- return (len);
+ return (eth_hdr_len);
}
-static int get_real_size(struct mbuf *mb, struct net_device *dev, int *segsp,
- int *lso_header_size)
+static volatile struct mlx4_wqe_data_seg *
+mlx4_en_store_inline_data(volatile struct mlx4_wqe_data_seg *dseg,
+ struct mbuf *mb, int len, __be32 owner_bit)
{
- struct mbuf *m;
- int nr_segs;
+ uint8_t *inl = __DEVOLATILE(uint8_t *, dseg);
+ const int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - 4;
- nr_segs = 0;
- for (m = mb; m != NULL; m = m->m_next)
- if (m->m_len)
- nr_segs++;
-
- if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
- *lso_header_size = get_head_size(mb);
- if (*lso_header_size) {
- if (mb->m_len == *lso_header_size)
- nr_segs--;
- *segsp = nr_segs;
- return CTRL_SIZE + nr_segs * DS_SIZE +
- ALIGN(*lso_header_size + 4, DS_SIZE);
- }
- } else
- *lso_header_size = 0;
- *segsp = nr_segs;
- if (is_inline(mb))
- return inline_size(mb);
- return (CTRL_SIZE + nr_segs * DS_SIZE);
-}
-
-static struct mbuf *mb_copy(struct mbuf *mb, int *offp, char *data, int len)
-{
- int bytes;
- int off;
-
- off = *offp;
- while (len) {
- bytes = min(mb->m_len - off, len);
- if (bytes)
- memcpy(data, mb->m_data + off, bytes);
- len -= bytes;
- data += bytes;
- off += bytes;
- if (off == mb->m_len) {
- off = 0;
- mb = mb->m_next;
- }
+ if (unlikely(len < MIN_PKT_LEN)) {
+ m_copydata(mb, 0, len, inl + 4);
+ memset(inl + 4 + len, 0, MIN_PKT_LEN - len);
+ dseg += DIV_ROUND_UP(4 + MIN_PKT_LEN, DS_SIZE_ALIGNMENT);
+ } else if (len <= spc) {
+ m_copydata(mb, 0, len, inl + 4);
+ dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT);
+ } else {
+ m_copydata(mb, 0, spc, inl + 4);
+ m_copydata(mb, spc, len - spc, inl + 8 + spc);
+ dseg += DIV_ROUND_UP(8 + len, DS_SIZE_ALIGNMENT);
}
- *offp = off;
- return (mb);
+ return (dseg);
}
-static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct mbuf *mb,
- int real_size, u16 *vlan_tag, int tx_ind)
+static void
+mlx4_en_store_inline_header(volatile struct mlx4_wqe_data_seg *dseg,
+ int len, __be32 owner_bit)
{
- struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
- int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
- int len;
- int off;
+ uint8_t *inl = __DEVOLATILE(uint8_t *, dseg);
+ const int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - 4;
- off = 0;
- len = mb->m_pkthdr.len;
- if (len <= spc) {
- inl->byte_count = cpu_to_be32(1 << 31 | len);
- mb_copy(mb, &off, (void *)(inl + 1), len);
+ if (unlikely(len < MIN_PKT_LEN)) {
+ *(volatile uint32_t *)inl =
+ SET_BYTE_COUNT((1 << 31) | MIN_PKT_LEN);
+ } else if (len <= spc) {
+ *(volatile uint32_t *)inl =
+ SET_BYTE_COUNT((1 << 31) | len);
} else {
- inl->byte_count = cpu_to_be32(1 << 31 | spc);
- mb = mb_copy(mb, &off, (void *)(inl + 1), spc);
- inl = (void *) (inl + 1) + spc;
- mb_copy(mb, &off, (void *)(inl + 1), len - spc);
+ *(volatile uint32_t *)(inl + 4 + spc) =
+ SET_BYTE_COUNT((1 << 31) | (len - spc));
wmb();
- inl->byte_count = cpu_to_be32(1 << 31 | (len - spc));
+ *(volatile uint32_t *)inl =
+ SET_BYTE_COUNT((1 << 31) | spc);
}
- tx_desc->ctrl.vlan_tag = cpu_to_be16(*vlan_tag);
- tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!(*vlan_tag);
- tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
}
+static unsigned long hashrandom;
+static void hashrandom_init(void *arg)
+{
+ hashrandom = random();
+}
+SYSINIT(hashrandom_init, SI_SUB_KLD, SI_ORDER_SECOND, &hashrandom_init, NULL);
+
u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
- struct mlx4_en_tx_hash_entry *entry;
- struct ether_header *eth;
- struct tcphdr *th;
- struct ip *iph;
- u32 hash_index;
- int tx_ind = 0;
- u16 vlan_tag = 0;
- int len;
+ u32 rings_p_up = priv->num_tx_rings_p_up;
+ u32 up = 0;
+ u32 queue_index;
+#if (MLX4_EN_NUM_UP > 1)
/* Obtain VLAN information if present */
if (mb->m_flags & M_VLANTAG) {
- vlan_tag = mb->m_pkthdr.ether_vtag;
- /* Set the Tx ring to use according to vlan priority */
- tx_ind = priv->tx_prio_map[vlan_tag >> 13];
- if (tx_ind)
- return tx_ind;
+ u32 vlan_tag = mb->m_pkthdr.ether_vtag;
+ up = (vlan_tag >> 13) % MLX4_EN_NUM_UP;
}
- if (mb->m_len <
- ETHER_HDR_LEN + sizeof(struct ip) + sizeof(struct tcphdr))
- return MLX4_EN_NUM_HASH_RINGS;
- eth = mtod(mb, struct ether_header *);
- /* Hashing is only done for TCP/IP or UDP/IP packets */
- if (be16_to_cpu(eth->ether_type) != ETHERTYPE_IP)
- return MLX4_EN_NUM_HASH_RINGS;
- len = ETHER_HDR_LEN;
- iph = (struct ip *)(mtod(mb, char *) + len);
- len += iph->ip_hl << 2;
- th = (struct tcphdr *)(mtod(mb, char *) + len);
- hash_index = be32_to_cpu(iph->ip_dst.s_addr) & MLX4_EN_TX_HASH_MASK;
- switch(iph->ip_p) {
- case IPPROTO_UDP:
- break;
- case IPPROTO_TCP:
- if (mb->m_len < len + sizeof(struct tcphdr))
- return MLX4_EN_NUM_HASH_RINGS;
- hash_index =
- (hash_index ^ be16_to_cpu(th->th_dport ^ th->th_sport)) &
- MLX4_EN_TX_HASH_MASK;
- break;
- default:
- return MLX4_EN_NUM_HASH_RINGS;
- }
+#endif
+ /* hash mbuf */
+ queue_index = mlx4_en_hashmbuf(MLX4_F_HASHL3 | MLX4_F_HASHL4, mb, hashrandom);
- entry = &priv->tx_hash[hash_index];
- if(unlikely(!entry->cnt)) {
- tx_ind = hash_index & (MLX4_EN_NUM_HASH_RINGS / 2 - 1);
- if (2 * entry->small_pkts > entry->big_pkts)
- tx_ind += MLX4_EN_NUM_HASH_RINGS / 2;
- entry->small_pkts = entry->big_pkts = 0;
- entry->ring = tx_ind;
- }
+ return ((queue_index % rings_p_up) + (up * rings_p_up));
+}
- entry->cnt++;
- if (mb->m_pkthdr.len > MLX4_EN_SMALL_PKT_SIZE)
- entry->big_pkts++;
- else
- entry->small_pkts++;
- return entry->ring;
+static void mlx4_bf_copy(void __iomem *dst, volatile unsigned long *src, unsigned bytecnt)
+{
+ __iowrite64_copy(dst, __DEVOLATILE(void *, src), bytecnt / 8);
}
-static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt)
+static u64 mlx4_en_mac_to_u64(u8 *addr)
{
- __iowrite64_copy(dst, src, bytecnt / 8);
+ u64 mac = 0;
+ int i;
+
+ for (i = 0; i < ETHER_ADDR_LEN; i++) {
+ mac <<= 8;
+ mac |= addr[i];
+ }
+ return mac;
}
-static int mlx4_en_xmit(struct net_device *dev, int tx_ind, struct mbuf **mbp)
+static int mlx4_en_xmit(struct mlx4_en_priv *priv, int tx_ind, struct mbuf **mbp)
{
- struct mlx4_en_priv *priv = netdev_priv(dev);
- struct mlx4_en_dev *mdev = priv->mdev;
- struct mlx4_en_tx_ring *ring;
- struct mlx4_en_cq *cq;
- struct mlx4_en_tx_desc *tx_desc;
- struct mlx4_wqe_data_seg *data;
+ enum {
+ DS_FACT = TXBB_SIZE / DS_SIZE_ALIGNMENT,
+ CTRL_FLAGS = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
+ MLX4_WQE_CTRL_SOLICITED),
+ };
+ bus_dma_segment_t segs[MLX4_EN_TX_MAX_MBUF_FRAGS];
+ volatile struct mlx4_wqe_data_seg *dseg;
+ volatile struct mlx4_wqe_data_seg *dseg_inline;
+ volatile struct mlx4_en_tx_desc *tx_desc;
+ struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind];
+ struct ifnet *ifp = priv->dev;
struct mlx4_en_tx_info *tx_info;
+ struct mbuf *mb = *mbp;
struct mbuf *m;
- int nr_txbb;
+ __be32 owner_bit;
int nr_segs;
- int desc_size;
- int real_size;
- dma_addr_t dma;
- u32 index, bf_index;
- __be32 op_own;
- u16 vlan_tag = 0;
- int i;
- int lso_header_size;
- bool bounce = false;
- struct mbuf *mb;
- int defrag = 1;
+ int pad;
+ int err;
+ u32 bf_size;
+ u32 bf_prod;
+ u32 opcode;
+ u16 index;
+ u16 ds_cnt;
+ u16 ihs;
- ring = &priv->tx_ring[tx_ind];
- mb = *mbp;
- if (!priv->port_up)
+ if (unlikely(!priv->port_up)) {
+ err = EINVAL;
goto tx_drop;
-
-retry:
- real_size = get_real_size(mb, dev, &nr_segs, &lso_header_size);
- if (unlikely(!real_size))
- goto tx_drop;
-
- /* Allign descriptor to TXBB size */
- desc_size = ALIGN(real_size, TXBB_SIZE);
- nr_txbb = desc_size / TXBB_SIZE;
- if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
- if (defrag) {
- mb = m_defrag(*mbp, M_DONTWAIT);
- if (mb == NULL) {
- mb = *mbp;
- goto tx_drop;
- }
- *mbp = mb;
- defrag = 0;
- goto retry;
- }
- goto tx_drop;
}
- /* Check available TXBBs And 2K spare for prefetch */
- if (unlikely(((int)(ring->prod - ring->cons)) >
- ring->size - HEADROOM - MAX_DESC_TXBBS)) {
- /* every full Tx ring stops queue */
- if (ring->blocked == 0)
- atomic_add_int(&priv->blocked, 1);
- atomic_set_int(&dev->if_drv_flags, IFF_DRV_OACTIVE);
+ /* check if TX ring is full */
+ if (unlikely(mlx4_en_tx_ring_is_full(ring))) {
+ /* every full native Tx ring stops queue */
+ if (ring->blocked == 0)
+ atomic_add_int(&priv->blocked, 1);
+ /* Set HW-queue-is-full flag */
+ atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+ priv->port_stats.queue_stopped++;
ring->blocked = 1;
priv->port_stats.queue_stopped++;
+ ring->queue_stopped++;
/* Use interrupts to find out when queue opened */
- cq = &priv->tx_cq[tx_ind];
- mlx4_en_arm_cq(priv, cq);
- return EBUSY;
- }
+ mlx4_en_arm_cq(priv, priv->tx_cq[tx_ind]);
+ return (ENOBUFS);
+ }
+ /* sanity check we are not wrapping around */
+ KASSERT(((~ring->prod) & ring->size_mask) >=
+ (MLX4_EN_TX_WQE_MAX_WQEBBS - 1), ("Wrapping around TX ring"));
+
/* Track current inflight packets for performance analysis */
AVG_PERF_COUNTER(priv->pstats.inflight_avg,
(u32) (ring->prod - ring->cons - 1));
- /* Packet is good - grab an index and transmit it */
+ /* Track current mbuf packet header length */
+ AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, mb->m_pkthdr.len);
+
+ /* Grab an index and try to transmit packet */
+ owner_bit = (ring->prod & ring->size) ?
+ cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0;
index = ring->prod & ring->size_mask;
- bf_index = ring->prod;
+ tx_desc = (volatile struct mlx4_en_tx_desc *)
+ (ring->buf + index * TXBB_SIZE);
+ tx_info = &ring->tx_info[index];
+ dseg = &tx_desc->data;
- /* See if we have enough space for whole descriptor TXBB for setting
- * SW ownership on next descriptor; if not, use a bounce buffer. */
- if (likely(index + nr_txbb <= ring->size))
- tx_desc = ring->buf + index * TXBB_SIZE;
- else {
- tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
- bounce = true;
- }
+ /* send a copy of the frame to the BPF listener, if any */
+ if (ifp != NULL && ifp->if_bpf != NULL)
+ ETHER_BPF_MTAP(ifp, mb);
- /* Prepare ctrl segement apart opcode+ownership, which depends on
- * whether LSO is used */
- if (mb->m_flags & M_VLANTAG)
- vlan_tag = mb->m_pkthdr.ether_vtag;
- tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
- tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag;
- tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
- tx_desc->ctrl.srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
- MLX4_WQE_CTRL_SOLICITED);
- if (mb->m_pkthdr.csum_flags & (CSUM_IP|CSUM_TCP|CSUM_UDP)) {
- tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
- MLX4_WQE_CTRL_TCP_UDP_CSUM);
+ /* get default flags */
+ tx_desc->ctrl.srcrb_flags = CTRL_FLAGS;
+
+ if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO))
+ tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM);
+
+ if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP |
+ CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
+ tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM);
+
+ /* do statistics */
+ if (likely(tx_desc->ctrl.srcrb_flags != CTRL_FLAGS)) {
priv->port_stats.tx_chksum_offload++;
+ ring->tx_csum++;
}
- if (unlikely(priv->validate_loopback)) {
- /* Copy dst mac address to wqe */
- struct ether_header *ethh;
- u64 mac;
- u32 mac_l, mac_h;
+ /* check for VLAN tag */
+ if (mb->m_flags & M_VLANTAG) {
+ tx_desc->ctrl.vlan_tag = cpu_to_be16(mb->m_pkthdr.ether_vtag);
+ tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN;
+ } else {
+ tx_desc->ctrl.vlan_tag = 0;
+ tx_desc->ctrl.ins_vlan = 0;
+ }
- ethh = mtod(mb, struct ether_header *);
- mac = mlx4_en_mac_to_u64(ethh->ether_dhost);
- if (mac) {
- mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16);
- mac_l = (u32) (mac & 0xffffffff);
- tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h);
- tx_desc->ctrl.imm = cpu_to_be32(mac_l);
+ /* clear immediate field */
+ tx_desc->ctrl.imm = 0;
+
+ /* Handle LSO (TSO) packets */
+ if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
+ u32 payload_len;
+ u32 mss = mb->m_pkthdr.tso_segsz;
+ u32 num_pkts;
+
+ opcode = cpu_to_be32(MLX4_OPCODE_LSO | MLX4_WQE_CTRL_RR) |
+ owner_bit;
+ ihs = mlx4_en_get_header_size(mb);
+ if (unlikely(ihs > MAX_INLINE)) {
+ ring->oversized_packets++;
+ err = EINVAL;
+ goto tx_drop;
}
+ tx_desc->lso.mss_hdr_size = cpu_to_be32((mss << 16) | ihs);
+ payload_len = mb->m_pkthdr.len - ihs;
+ if (unlikely(payload_len == 0))
+ num_pkts = 1;
+ else
+ num_pkts = DIV_ROUND_UP(payload_len, mss);
+ ring->bytes += payload_len + (num_pkts * ihs);
+ ring->packets += num_pkts;
+ priv->port_stats.tso_packets++;
+ /* store pointer to inline header */
+ dseg_inline = dseg;
+ /* copy data inline */
+ dseg = mlx4_en_store_inline_lso_data(dseg,
+ mb, ihs, owner_bit);
+ } else {
+ opcode = cpu_to_be32(MLX4_OPCODE_SEND) |
+ owner_bit;
+ ihs = mlx4_en_get_inline_hdr_size(ring, mb);
+ ring->bytes += max_t (unsigned int,
+ mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
+ ring->packets++;
+ /* store pointer to inline header */
+ dseg_inline = dseg;
+ /* copy data inline */
+ dseg = mlx4_en_store_inline_data(dseg,
+ mb, ihs, owner_bit);
}
+ m_adj(mb, ihs);
- /* Handle LSO (TSO) packets */
- if (lso_header_size) {
- int segsz;
+ /* trim off empty mbufs */
+ while (mb->m_len == 0) {
+ mb = m_free(mb);
+ /* check if all data has been inlined */
+ if (mb == NULL) {
+ nr_segs = 0;
+ goto skip_dma;
+ }
+ }
- /* Mark opcode as LSO */
- op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) |
- ((ring->prod & ring->size) ?
- cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+ err = bus_dmamap_load_mbuf_sg(ring->dma_tag, tx_info->dma_map,
+ mb, segs, &nr_segs, BUS_DMA_NOWAIT);
+ if (unlikely(err == EFBIG)) {
+ /* Too many mbuf fragments */
+ m = m_defrag(mb, M_NOWAIT);
+ if (m == NULL) {
+ ring->oversized_packets++;
+ goto tx_drop;
+ }
+ mb = m;
+ /* Try again */
+ err = bus_dmamap_load_mbuf_sg(ring->dma_tag, tx_info->dma_map,
+ mb, segs, &nr_segs, BUS_DMA_NOWAIT);
+ }
+ /* catch errors */
+ if (unlikely(err != 0)) {
+ ring->oversized_packets++;
+ goto tx_drop;
+ }
+ /* make sure all mbuf data is written to RAM */
+ bus_dmamap_sync(ring->dma_tag, tx_info->dma_map,
+ BUS_DMASYNC_PREWRITE);
- /* Fill in the LSO prefix */
- tx_desc->lso.mss_hdr_size = cpu_to_be32(
- mb->m_pkthdr.tso_segsz << 16 | lso_header_size);
+skip_dma:
+ /* compute number of DS needed */
+ ds_cnt = (dseg - ((volatile struct mlx4_wqe_data_seg *)tx_desc)) + nr_segs;
- /* Copy headers;
- * note that we already verified that it is linear */
- memcpy(tx_desc->lso.header, mb->m_data, lso_header_size);
- data = ((void *) &tx_desc->lso +
- ALIGN(lso_header_size + 4, DS_SIZE));
+ /*
+ * Check if the next request can wrap around and fill the end
+ * of the current request with zero immediate data:
+ */
+ pad = DIV_ROUND_UP(ds_cnt, DS_FACT);
+ pad = (~(ring->prod + pad)) & ring->size_mask;
- priv->port_stats.tso_packets++;
- segsz = mb->m_pkthdr.tso_segsz;
- i = ((mb->m_pkthdr.len - lso_header_size) / segsz) +
- !!((mb->m_pkthdr.len - lso_header_size) % segsz);
- ring->bytes += mb->m_pkthdr.len + (i - 1) * lso_header_size;
- ring->packets += i;
- mb->m_data += lso_header_size;
- mb->m_len -= lso_header_size;
+ if (unlikely(pad < (MLX4_EN_TX_WQE_MAX_WQEBBS - 1))) {
+ /*
+ * Compute the least number of DS blocks we need to
+ * pad in order to achieve a TX ring wraparound:
+ */
+ pad = (DS_FACT * (pad + 1));
} else {
- /* Normal (Non LSO) packet */
- op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
- ((ring->prod & ring->size) ?
- cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
- data = &tx_desc->data;
- ring->bytes += max(mb->m_pkthdr.len,
- (unsigned int)ETHER_MIN_LEN - ETHER_CRC_LEN);
- ring->packets++;
-
+ /*
+ * The hardware will automatically jump to the next
+ * TXBB. No need for padding.
+ */
+ pad = 0;
}
- AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, mb->m_pkthdr.len);
- /* Save mb in tx_info ring */
- tx_info = &ring->tx_info[index];
+ /* compute total number of DS blocks */
+ ds_cnt += pad;
+ /*
+ * When modifying this code, please ensure that the following
+ * computation is always less than or equal to 0x3F:
+ *
+ * ((MLX4_EN_TX_WQE_MAX_WQEBBS - 1) * DS_FACT) +
+ * (MLX4_EN_TX_WQE_MAX_WQEBBS * DS_FACT)
+ *
+ * Else the "ds_cnt" variable can become too big.
+ */
+ tx_desc->ctrl.fence_size = (ds_cnt & 0x3f);
+
+ /* store pointer to mbuf */
tx_info->mb = mb;
- tx_info->nr_txbb = nr_txbb;
- tx_info->nr_segs = nr_segs;
- /* valid only for non inline segments */
- tx_info->data_offset = (void *) data - (void *) tx_desc;
+ tx_info->nr_txbb = DIV_ROUND_UP(ds_cnt, DS_FACT);
+ bf_size = ds_cnt * DS_SIZE_ALIGNMENT;
+ bf_prod = ring->prod;
- if (!is_inline(mb)) {
- for (i = 0, m = mb; i < nr_segs; i++, m = m->m_next) {
- if (m->m_len == 0) {
- i--;
- continue;
- }
- dma = pci_map_single(mdev->dev->pdev, m->m_data,
- m->m_len, PCI_DMA_TODEVICE);
- data->addr = cpu_to_be64(dma);
- data->lkey = cpu_to_be32(mdev->mr.key);
+ /* compute end of "dseg" array */
+ dseg += nr_segs + pad;
+
+ /* pad using zero immediate dseg */
+ while (pad--) {
+ dseg--;
+ dseg->addr = 0;
+ dseg->lkey = 0;
+ wmb();
+ dseg->byte_count = SET_BYTE_COUNT((1 << 31)|0);
+ }
+
+ /* fill segment list */
+ while (nr_segs--) {
+ if (unlikely(segs[nr_segs].ds_len == 0)) {
+ dseg--;
+ dseg->addr = 0;
+ dseg->lkey = 0;
wmb();
- data->byte_count = cpu_to_be32(m->m_len);
- data++;
+ dseg->byte_count = SET_BYTE_COUNT((1 << 31)|0);
+ } else {
+ dseg--;
+ dseg->addr = cpu_to_be64((uint64_t)segs[nr_segs].ds_addr);
+ dseg->lkey = cpu_to_be32(priv->mdev->mr.key);
+ wmb();
+ dseg->byte_count = SET_BYTE_COUNT((uint32_t)segs[nr_segs].ds_len);
}
- if (lso_header_size) {
- mb->m_data -= lso_header_size;
- mb->m_len += lso_header_size;
- }
- tx_info->inl = 0;
- } else {
- build_inline_wqe(tx_desc, mb, real_size, &vlan_tag, tx_ind);
- tx_info->inl = 1;
}
- ring->prod += nr_txbb;
+ wmb();
- /* If we used a bounce buffer then copy descriptor back into place */
- if (bounce)
- tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
+ /* write owner bits in reverse order */
+ if ((opcode & cpu_to_be32(0x1F)) == cpu_to_be32(MLX4_OPCODE_LSO))
+ mlx4_en_store_inline_lso_header(dseg_inline, ihs, owner_bit);
+ else
+ mlx4_en_store_inline_header(dseg_inline, ihs, owner_bit);
- if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) {
- *(u32 *) (&tx_desc->ctrl.vlan_tag) |= ring->doorbell_qpn;
- op_own |= htonl((bf_index & 0xffff) << 8);
- /* Ensure new descirptor hits memory
- * before setting ownership of this descriptor to HW */
- wmb();
- tx_desc->ctrl.owner_opcode = op_own;
+ if (unlikely(priv->validate_loopback)) {
+ /* Copy dst mac address to wqe */
+ struct ether_header *ethh;
+ u64 mac;
+ u32 mac_l, mac_h;
- wmb();
+ ethh = mtod(mb, struct ether_header *);
+ mac = mlx4_en_mac_to_u64(ethh->ether_dhost);
+ if (mac) {
+ mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16);
+ mac_l = (u32) (mac & 0xffffffff);
+ tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h);
+ tx_desc->ctrl.imm = cpu_to_be32(mac_l);
+ }
+ }
- mlx4_bf_copy(ring->bf.reg + ring->bf.offset, (unsigned long *) &tx_desc->ctrl,
- desc_size);
+ /* update producer counter */
+ ring->prod += tx_info->nr_txbb;
+ if (ring->bf_enabled && bf_size <= MAX_BF &&
+ (tx_desc->ctrl.ins_vlan != MLX4_WQE_CTRL_INS_VLAN)) {
+
+ /* store doorbell number */
+ *(volatile __be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn);
+
+ /* or in producer number for this WQE */
+ opcode |= cpu_to_be32((bf_prod & 0xffff) << 8);
+
+ /*
+ * Ensure the new descriptor hits memory before
+ * setting ownership of this descriptor to HW:
+ */
wmb();
-
+ tx_desc->ctrl.owner_opcode = opcode;
+ wmb();
+ mlx4_bf_copy(((u8 *)ring->bf.reg) + ring->bf.offset,
+ (volatile unsigned long *) &tx_desc->ctrl, bf_size);
+ wmb();
ring->bf.offset ^= ring->bf.buf_size;
} else {
- /* Ensure new descirptor hits memory
- * before setting ownership of this descriptor to HW */
+ /*
+ * Ensure the new descriptor hits memory before
+ * setting ownership of this descriptor to HW:
+ */
wmb();
- tx_desc->ctrl.owner_opcode = op_own;
+ tx_desc->ctrl.owner_opcode = opcode;
wmb();
- writel(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL);
+ writel(cpu_to_be32(ring->doorbell_qpn),
+ ((u8 *)ring->bf.uar->map) + MLX4_SEND_DOORBELL);
}
- return 0;
-
+ return (0);
tx_drop:
*mbp = NULL;
m_freem(mb);
- ring->errors++;
- return EINVAL;
+ return (err);
}
-
static int
mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m)
{
@@ -922,38 +993,39 @@
struct mbuf *next;
int enqueued, err = 0;
- ring = &priv->tx_ring[tx_ind];
+ ring = priv->tx_ring[tx_ind];
if ((dev->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING || priv->port_up == 0) {
if (m != NULL)
err = drbr_enqueue(dev, ring->br, m);
- return (err);
+ return (err);
}
enqueued = 0;
- if (m == NULL) {
- next = drbr_dequeue(dev, ring->br);
- } else if (drbr_needs_enqueue(dev, ring->br)) {
- if ((err = drbr_enqueue(dev, ring->br, m)) != 0)
- return (err);
- next = drbr_dequeue(dev, ring->br);
- } else
- next = m;
+ if (m != NULL)
+ /*
+ * If we can't insert mbuf into drbr, try to xmit anyway.
+ * We keep the error we got so we could return that after xmit.
+ */
+ err = drbr_enqueue(dev, ring->br, m);
/* Process the queue */
- while (next != NULL) {
- if ((err = mlx4_en_xmit(dev, tx_ind, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(dev, ring->br, next);
+ while ((next = drbr_peek(dev, ring->br)) != NULL) {
+ if (mlx4_en_xmit(priv, tx_ind, &next) != 0) {
+ if (next == NULL) {
+ drbr_advance(dev, ring->br);
+ } else {
+ drbr_putback(dev, ring->br, next);
+ }
break;
}
+ drbr_advance(dev, ring->br);
enqueued++;
- drbr_stats_update(dev, next->m_pkthdr.len, next->m_flags);
- /* Send a copy of the frame to the BPF listener */
- ETHER_BPF_MTAP(dev, next);
+ dev->if_obytes += next->m_pkthdr.len;
+ if (next->m_flags & M_MCAST)
+ dev->if_omcasts++;
if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
- next = drbr_dequeue(dev, ring->br);
}
if (enqueued > 0)
@@ -970,13 +1042,14 @@
struct net_device *dev;
struct mlx4_en_cq *cq;
int tx_ind;
-
cq = context;
dev = cq->dev;
priv = dev->if_softc;
tx_ind = cq->ring;
- ring = &priv->tx_ring[tx_ind];
- if (dev->if_drv_flags & IFF_DRV_RUNNING) {
+ ring = priv->tx_ring[tx_ind];
+
+ if (priv->port_up != 0 &&
+ (dev->if_drv_flags & IFF_DRV_RUNNING) != 0) {
mlx4_en_xmit_poll(priv, tx_ind);
spin_lock(&ring->tx_lock);
if (!drbr_empty(dev, ring->br))
@@ -991,16 +1064,22 @@
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_tx_ring *ring;
struct mlx4_en_cq *cq;
- int i = 0, err = 0;
+ int i, err = 0;
- /* Which queue to use */
- if ((m->m_flags & (M_FLOWID | M_VLANTAG)) == M_FLOWID)
- i = m->m_pkthdr.flowid % (MLX4_EN_NUM_HASH_RINGS - 1);
- else
+ if (priv->port_up == 0) {
+ m_freem(m);
+ return (ENETDOWN);
+ }
+
+ /* Compute which queue to use */
+ if (m->m_flags & M_FLOWID) {
+ i = (m->m_pkthdr.flowid % 128) % priv->tx_ring_num;
+ }
+ else {
i = mlx4_en_select_queue(dev, m);
+ }
- ring = &priv->tx_ring[i];
-
+ ring = priv->tx_ring[i];
if (spin_trylock(&ring->tx_lock)) {
err = mlx4_en_transmit_locked(dev, i, m);
spin_unlock(&ring->tx_lock);
@@ -1008,7 +1087,7 @@
mlx4_en_xmit_poll(priv, i);
} else {
err = drbr_enqueue(dev, ring->br, m);
- cq = &priv->tx_cq[i];
+ cq = priv->tx_cq[i];
taskqueue_enqueue(cq->tq, &cq->cq_task);
}
@@ -1022,10 +1101,14 @@
mlx4_en_qflush(struct ifnet *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
- struct mlx4_en_tx_ring *ring = priv->tx_ring;
+ struct mlx4_en_tx_ring *ring;
struct mbuf *m;
- for (int i = 0; i < priv->tx_ring_num; i++, ring++) {
+ if (priv->port_up == 0)
+ return;
+
+ for (int i = 0; i < priv->tx_ring_num; i++) {
+ ring = priv->tx_ring[i];
spin_lock(&ring->tx_lock);
while ((m = buf_ring_dequeue_sc(ring->br)) != NULL)
m_freem(m);
Modified: trunk/sys/ofed/drivers/net/mlx4/eq.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/eq.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/eq.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,8 +31,9 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/module.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
@@ -42,35 +43,15 @@
#include "fw.h"
enum {
+ MLX4_IRQNAME_SIZE = 32
+};
+
+enum {
MLX4_NUM_ASYNC_EQE = 0x100,
MLX4_NUM_SPARE_EQE = 0x80,
MLX4_EQ_ENTRY_SIZE = 0x20
};
-/*
- * Must be packed because start is 64 bits but only aligned to 32 bits.
- */
-struct mlx4_eq_context {
- __be32 flags;
- u16 reserved1[3];
- __be16 page_offset;
- u8 log_eq_size;
- u8 reserved2[4];
- u8 eq_period;
- u8 reserved3;
- u8 eq_max_count;
- u8 reserved4[3];
- u8 intr;
- u8 log_page_size;
- u8 reserved5[2];
- u8 mtt_base_addr_h;
- __be32 mtt_base_addr_l;
- u32 reserved6[2];
- __be32 consumer_index;
- __be32 producer_index;
- u32 reserved7[4];
-};
-
#define MLX4_EQ_STATUS_OK ( 0 << 28)
#define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28)
#define MLX4_EQ_OWNER_SW ( 0 << 24)
@@ -95,47 +76,23 @@
(1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
(1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
(1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
- (1ull << MLX4_EVENT_TYPE_CMD))
+ (1ull << MLX4_EVENT_TYPE_CMD) | \
+ (1ull << MLX4_EVENT_TYPE_OP_REQUIRED) | \
+ (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL) | \
+ (1ull << MLX4_EVENT_TYPE_FLR_EVENT) | \
+ (1ull << MLX4_EVENT_TYPE_FATAL_WARNING))
-struct mlx4_eqe {
- u8 reserved1;
- u8 type;
- u8 reserved2;
- u8 subtype;
- union {
- u32 raw[6];
- struct {
- __be32 cqn;
- } __attribute__((packed)) comp;
- struct {
- u16 reserved1;
- __be16 token;
- u32 reserved2;
- u8 reserved3[3];
- u8 status;
- __be64 out_param;
- } __attribute__((packed)) cmd;
- struct {
- __be32 qpn;
- } __attribute__((packed)) qp;
- struct {
- __be32 srqn;
- } __attribute__((packed)) srq;
- struct {
- __be32 cqn;
- u32 reserved1;
- u8 reserved2[3];
- u8 syndrome;
- } __attribute__((packed)) cq_err;
- struct {
- u32 reserved1[2];
- __be32 port;
- } __attribute__((packed)) port_change;
- } event;
- u8 reserved3[3];
- u8 owner;
-} __attribute__((packed));
+static u64 get_async_ev_mask(struct mlx4_dev *dev)
+{
+ u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK;
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
+ async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT);
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT)
+ async_ev_mask |= (1ull << MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT);
+ return async_ev_mask;
+}
+
static void eq_set_ci(struct mlx4_eq *eq, int req_not)
{
__raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) |
@@ -145,27 +102,359 @@
mb();
}
-static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
+static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
{
- unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
- return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
+ /* (entry & (eq->nent - 1)) gives us a cyclic array */
+ unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
+ /* CX3 is capable of extending the EQE from 32 to 64 bytes.
+ * When this feature is enabled, the first (in the lower addresses)
+ * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
+ * contain the legacy EQE information.
+ */
+ return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
}
-static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
+static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
{
- struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
+ struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
}
+static struct mlx4_eqe *next_slave_event_eqe(struct mlx4_slave_event_eq *slave_eq)
+{
+ struct mlx4_eqe *eqe =
+ &slave_eq->event_eqe[slave_eq->cons & (SLAVE_EVENT_EQ_SIZE - 1)];
+ return (!!(eqe->owner & 0x80) ^
+ !!(slave_eq->cons & SLAVE_EVENT_EQ_SIZE)) ?
+ eqe : NULL;
+}
+
+void mlx4_gen_slave_eqe(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work, struct mlx4_mfunc_master_ctx,
+ slave_event_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_slave_event_eq *slave_eq = &mfunc->master.slave_eq;
+ struct mlx4_eqe *eqe;
+ u8 slave;
+ int i;
+
+ for (eqe = next_slave_event_eqe(slave_eq); eqe;
+ eqe = next_slave_event_eqe(slave_eq)) {
+ slave = eqe->slave_id;
+
+ /* All active slaves need to receive the event */
+ if (slave == ALL_SLAVES) {
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (mlx4_GEN_EQE(dev, i, eqe))
+ mlx4_warn(dev, "Failed to generate "
+ "event for slave %d\n", i);
+ }
+ } else {
+ if (mlx4_GEN_EQE(dev, slave, eqe))
+ mlx4_warn(dev, "Failed to generate event "
+ "for slave %d\n", slave);
+ }
+ ++slave_eq->cons;
+ }
+}
+
+
+static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq;
+ struct mlx4_eqe *s_eqe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&slave_eq->event_lock, flags);
+ s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)];
+ if ((!!(s_eqe->owner & 0x80)) ^
+ (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) {
+ mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. "
+ "No free EQE on slave events queue\n", slave);
+ spin_unlock_irqrestore(&slave_eq->event_lock, flags);
+ return;
+ }
+
+ memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
+ s_eqe->slave_id = slave;
+ /* ensure all information is written before setting the ownersip bit */
+ wmb();
+ s_eqe->owner = !!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE) ? 0x0 : 0x80;
+ ++slave_eq->prod;
+
+ queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.slave_event_work);
+ spin_unlock_irqrestore(&slave_eq->event_lock, flags);
+}
+
+static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
+ struct mlx4_eqe *eqe)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (slave < 0 || slave >= dev->num_slaves ||
+ slave == dev->caps.function)
+ return;
+
+ if (!priv->mfunc.master.slave_state[slave].active)
+ return;
+
+ slave_event(dev, slave, eqe);
+}
+
+int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port)
+{
+ struct mlx4_eqe eqe;
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave];
+
+ if (!s_slave->active)
+ return 0;
+
+ memset(&eqe, 0, sizeof eqe);
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
+ eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE;
+ eqe.event.port_mgmt_change.port = port;
+
+ return mlx4_GEN_EQE(dev, slave, &eqe);
+}
+EXPORT_SYMBOL(mlx4_gen_pkey_eqe);
+
+int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
+{
+ struct mlx4_eqe eqe;
+
+ /*don't send if we don't have the that slave */
+ if (dev->num_vfs < slave)
+ return 0;
+ memset(&eqe, 0, sizeof eqe);
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
+ eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO;
+ eqe.event.port_mgmt_change.port = port;
+
+ return mlx4_GEN_EQE(dev, slave, &eqe);
+}
+EXPORT_SYMBOL(mlx4_gen_guid_change_eqe);
+
+int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
+ u8 port_subtype_change)
+{
+ struct mlx4_eqe eqe;
+
+ /*don't send if we don't have the that slave */
+ if (dev->num_vfs < slave)
+ return 0;
+ memset(&eqe, 0, sizeof eqe);
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE;
+ eqe.subtype = port_subtype_change;
+ eqe.event.port_change.port = cpu_to_be32(port << 28);
+
+ mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__,
+ port_subtype_change, slave, port);
+ return mlx4_GEN_EQE(dev, slave, &eqe);
+}
+EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe);
+
+enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
+ if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) {
+ pr_err("%s: Error: asking for slave:%d, port:%d\n",
+ __func__, slave, port);
+ return SLAVE_PORT_DOWN;
+ }
+ return s_state[slave].port_state[port];
+}
+EXPORT_SYMBOL(mlx4_get_slave_port_state);
+
+static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port,
+ enum slave_port_state state)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
+
+ if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
+ pr_err("%s: Error: asking for slave:%d, port:%d\n",
+ __func__, slave, port);
+ return -1;
+ }
+ s_state[slave].port_state[port] = state;
+
+ return 0;
+}
+
+static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
+{
+ int i;
+ enum slave_port_gen_event gen_event;
+
+ for (i = 0; i < dev->num_slaves; i++)
+ set_and_calc_slave_port_state(dev, i, port, event, &gen_event);
+}
+/**************************************************************************
+ The function get as input the new event to that port,
+ and according to the prev state change the slave's port state.
+ The events are:
+ MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
+ MLX4_PORT_STATE_DEV_EVENT_PORT_UP
+ MLX4_PORT_STATE_IB_EVENT_GID_VALID
+ MLX4_PORT_STATE_IB_EVENT_GID_INVALID
+***************************************************************************/
+int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave,
+ u8 port, int event,
+ enum slave_port_gen_event *gen_event)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *ctx = NULL;
+ unsigned long flags;
+ int ret = -1;
+ enum slave_port_state cur_state =
+ mlx4_get_slave_port_state(dev, slave, port);
+
+ *gen_event = SLAVE_PORT_GEN_EVENT_NONE;
+
+ if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
+ pr_err("%s: Error: asking for slave:%d, port:%d\n",
+ __func__, slave, port);
+ return ret;
+ }
+
+ ctx = &priv->mfunc.master.slave_state[slave];
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ switch (cur_state) {
+ case SLAVE_PORT_DOWN:
+ if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event)
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PENDING_UP);
+ break;
+ case SLAVE_PENDING_UP:
+ if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event)
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PORT_DOWN);
+ else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) {
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PORT_UP);
+ *gen_event = SLAVE_PORT_GEN_EVENT_UP;
+ }
+ break;
+ case SLAVE_PORT_UP:
+ if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) {
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PORT_DOWN);
+ *gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
+ } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID ==
+ event) {
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PENDING_UP);
+ *gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
+ }
+ break;
+ default:
+ pr_err("%s: BUG!!! UNKNOWN state: "
+ "slave:%d, port:%d\n", __func__, slave, port);
+ goto out;
+ }
+ ret = mlx4_get_slave_port_state(dev, slave, port);
+
+out:
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ return ret;
+}
+
+EXPORT_SYMBOL(set_and_calc_slave_port_state);
+
+int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr, u16 sm_lid, u8 sm_sl)
+{
+ struct mlx4_eqe eqe;
+
+ memset(&eqe, 0, sizeof eqe);
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
+ eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO;
+ eqe.event.port_mgmt_change.port = port;
+ eqe.event.port_mgmt_change.params.port_info.changed_attr =
+ cpu_to_be32((u32) attr);
+ if (attr & MSTR_SM_CHANGE_MASK) {
+ eqe.event.port_mgmt_change.params.port_info.mstr_sm_lid =
+ cpu_to_be16(sm_lid);
+ eqe.event.port_mgmt_change.params.port_info.mstr_sm_sl =
+ sm_sl;
+ }
+
+ slave_event(dev, ALL_SLAVES, &eqe);
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev);
+
+void mlx4_master_handle_slave_flr(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work, struct mlx4_mfunc_master_ctx,
+ slave_flr_event_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv =
+ container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ int i;
+ int err;
+ unsigned long flags;
+
+ mlx4_dbg(dev, "mlx4_handle_slave_flr\n");
+
+ for (i = 0 ; i < dev->num_slaves; i++) {
+
+ if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
+ mlx4_dbg(dev, "mlx4_handle_slave_flr: "
+ "clean slave: %d\n", i);
+
+ mlx4_delete_all_resources_for_slave(dev, i);
+ /*return the slave to running mode*/
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
+ slave_state[i].is_slave_going_down = 0;
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ /*notify the FW:*/
+ err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed to notify FW on "
+ "FLR done (slave:%d)\n", i);
+ }
+ }
+}
+
static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
{
+ struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_eqe *eqe;
int cqn;
int eqes_found = 0;
int set_ci = 0;
int port;
+ int slave = 0;
+ int ret;
+ u32 flr_slave;
+ u8 update_slave_state;
+ int i;
+ enum slave_port_gen_event gen_event;
+ unsigned long flags;
+ struct mlx4_vport_state *s_info;
- while ((eqe = next_eqe_sw(eq))) {
+ while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
@@ -186,14 +475,67 @@
case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
- mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
- eqe->type);
+ mlx4_dbg(dev, "event %d arrived\n", eqe->type);
+ if (mlx4_is_master(dev)) {
+ /* forward only to slave owning the QP */
+ ret = mlx4_get_slave_from_resource_id(dev,
+ RES_QP,
+ be32_to_cpu(eqe->event.qp.qpn)
+ & 0xffffff, &slave);
+ if (ret && ret != -ENOENT) {
+ mlx4_dbg(dev, "QP event %02x(%02x) on "
+ "EQ %d at index %u: could "
+ "not get slave id (%d)\n",
+ eqe->type, eqe->subtype,
+ eq->eqn, eq->cons_index, ret);
+ break;
+ }
+
+ if (!ret && slave != dev->caps.function) {
+ mlx4_slave_event(dev, slave, eqe);
+ break;
+ }
+
+ }
+ mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) &
+ 0xffffff, eqe->type);
break;
case MLX4_EVENT_TYPE_SRQ_LIMIT:
+ mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n",
+ __func__);
+ /* fall through */
case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
- mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
- eqe->type);
+ if (mlx4_is_master(dev)) {
+ /* forward only to slave owning the SRQ */
+ ret = mlx4_get_slave_from_resource_id(dev,
+ RES_SRQ,
+ be32_to_cpu(eqe->event.srq.srqn)
+ & 0xffffff,
+ &slave);
+ if (ret && ret != -ENOENT) {
+ mlx4_warn(dev, "SRQ event %02x(%02x) "
+ "on EQ %d at index %u: could"
+ " not get slave id (%d)\n",
+ eqe->type, eqe->subtype,
+ eq->eqn, eq->cons_index, ret);
+ break;
+ }
+ mlx4_dbg(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n",
+ __func__, slave,
+ be32_to_cpu(eqe->event.srq.srqn),
+ eqe->type, eqe->subtype);
+
+ if (!ret && slave != dev->caps.function) {
+ mlx4_dbg(dev, "%s: sending event %02x(%02x) to slave:%d\n",
+ __func__, eqe->type,
+ eqe->subtype, slave);
+ mlx4_slave_event(dev, slave, eqe);
+ break;
+ }
+ }
+ mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) &
+ 0xffffff, eqe->type);
break;
case MLX4_EVENT_TYPE_CMD:
@@ -209,10 +551,50 @@
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN,
port);
mlx4_priv(dev)->sense.do_sense_port[port] = 1;
+ if (!mlx4_is_master(dev))
+ break;
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN"
+ " to slave: %d, port:%d\n",
+ __func__, i, port);
+ s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state;
+ if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state)
+ mlx4_slave_event(dev, i, eqe);
+ } else { /* IB port */
+ set_and_calc_slave_port_state(dev, i, port,
+ MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
+ &gen_event);
+ /*we can be in pending state, then do not send port_down event*/
+ if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ mlx4_slave_event(dev, i, eqe);
+ }
+ }
+ }
} else {
- mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP,
- port);
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port);
+
mlx4_priv(dev)->sense.do_sense_port[port] = 0;
+
+ if (!mlx4_is_master(dev))
+ break;
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state;
+ if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state)
+ mlx4_slave_event(dev, i, eqe);
+ }
+ else /* IB port */
+ /* port-up event will be sent to a slave when the
+ * slave's alias-guid is set. This is done in alias_GUID.c
+ */
+ set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP);
}
break;
@@ -221,7 +603,28 @@
eqe->event.cq_err.syndrome == 1 ?
"overrun" : "access violation",
be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
- mlx4_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
+ if (mlx4_is_master(dev)) {
+ ret = mlx4_get_slave_from_resource_id(dev,
+ RES_CQ,
+ be32_to_cpu(eqe->event.cq_err.cqn)
+ & 0xffffff, &slave);
+ if (ret && ret != -ENOENT) {
+ mlx4_dbg(dev, "CQ event %02x(%02x) on "
+ "EQ %d at index %u: could "
+ "not get slave id (%d)\n",
+ eqe->type, eqe->subtype,
+ eq->eqn, eq->cons_index, ret);
+ break;
+ }
+
+ if (!ret && slave != dev->caps.function) {
+ mlx4_slave_event(dev, slave, eqe);
+ break;
+ }
+ }
+ mlx4_cq_event(dev,
+ be32_to_cpu(eqe->event.cq_err.cqn)
+ & 0xffffff,
eqe->type);
break;
@@ -229,11 +632,127 @@
mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
break;
+ case MLX4_EVENT_TYPE_OP_REQUIRED:
+ atomic_inc(&priv->opreq_count);
+ /* FW commands can't be executed from interrupt context
+ working in deferred task */
+ queue_work(mlx4_wq, &priv->opreq_task);
+ break;
+
+ case MLX4_EVENT_TYPE_COMM_CHANNEL:
+ if (!mlx4_is_master(dev)) {
+ mlx4_warn(dev, "Received comm channel event "
+ "for non master device\n");
+ break;
+ }
+
+ memcpy(&priv->mfunc.master.comm_arm_bit_vector,
+ eqe->event.comm_channel_arm.bit_vec,
+ sizeof eqe->event.comm_channel_arm.bit_vec);
+
+ if (!queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.comm_work))
+ mlx4_warn(dev, "Failed to queue comm channel work\n");
+
+ if (!queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.arm_comm_work))
+ mlx4_warn(dev, "Failed to queue arm comm channel work\n");
+ break;
+
+ case MLX4_EVENT_TYPE_FLR_EVENT:
+ flr_slave = be32_to_cpu(eqe->event.flr_event.slave_id);
+ if (!mlx4_is_master(dev)) {
+ mlx4_warn(dev, "Non-master function received"
+ "FLR event\n");
+ break;
+ }
+
+ mlx4_dbg(dev, "FLR event for slave: %d\n", flr_slave);
+
+ if (flr_slave >= dev->num_slaves) {
+ mlx4_warn(dev,
+ "Got FLR for unknown function: %d\n",
+ flr_slave);
+ update_slave_state = 0;
+ } else
+ update_slave_state = 1;
+
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ if (update_slave_state) {
+ priv->mfunc.master.slave_state[flr_slave].active = false;
+ priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR;
+ priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
+ }
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.slave_flr_event_work);
+ break;
+
+ case MLX4_EVENT_TYPE_FATAL_WARNING:
+ if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) {
+ if (mlx4_is_master(dev))
+ for (i = 0; i < dev->num_slaves; i++) {
+ mlx4_dbg(dev, "%s: Sending "
+ "MLX4_FATAL_WARNING_SUBTYPE_WARMING"
+ " to slave: %d\n", __func__, i);
+ if (i == dev->caps.function)
+ continue;
+ mlx4_slave_event(dev, i, eqe);
+ }
+ mlx4_err(dev, "Temperature Threshold was reached! "
+ "Threshold: %d celsius degrees; "
+ "Current Temperature: %d\n",
+ be16_to_cpu(eqe->event.warming.warning_threshold),
+ be16_to_cpu(eqe->event.warming.current_temperature));
+ } else
+ mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), "
+ "subtype %02x on EQ %d at index %u. owner=%x, "
+ "nent=0x%x, slave=%x, ownership=%s\n",
+ eqe->type, eqe->subtype, eq->eqn,
+ eq->cons_index, eqe->owner, eq->nent,
+ eqe->slave_id,
+ !!(eqe->owner & 0x80) ^
+ !!(eq->cons_index & eq->nent) ? "HW" : "SW");
+
+ break;
+
+ case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT:
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
+ (unsigned long) eqe);
+ break;
+
+ case MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT:
+ switch (eqe->subtype) {
+ case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE:
+ mlx4_warn(dev, "Bad cable detected on port %u\n",
+ eqe->event.bad_cable.port);
+ break;
+ case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE:
+ mlx4_warn(dev, "Unsupported cable detected\n");
+ break;
+ default:
+ mlx4_dbg(dev, "Unhandled recoverable error event "
+ "detected: %02x(%02x) on EQ %d at index %u. "
+ "owner=%x, nent=0x%x, ownership=%s\n",
+ eqe->type, eqe->subtype, eq->eqn,
+ eq->cons_index, eqe->owner, eq->nent,
+ !!(eqe->owner & 0x80) ^
+ !!(eq->cons_index & eq->nent) ? "HW" : "SW");
+ break;
+ }
+ break;
+
case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
case MLX4_EVENT_TYPE_ECC_DETECT:
default:
- mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at index %u\n",
- eqe->type, eqe->subtype, eq->eqn, eq->cons_index);
+ mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at "
+ "index %u. owner=%x, nent=0x%x, slave=%x, "
+ "ownership=%s\n",
+ eqe->type, eqe->subtype, eq->eqn,
+ eq->cons_index, eqe->owner, eq->nent,
+ eqe->slave_id,
+ !!(eqe->owner & 0x80) ^
+ !!(eq->cons_index & eq->nent) ? "HW" : "SW");
break;
};
@@ -285,25 +804,55 @@
return IRQ_HANDLED;
}
+int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_event_eq_info *event_eq =
+ priv->mfunc.master.slave_state[slave].event_eq;
+ u32 in_modifier = vhcr->in_modifier;
+ u32 eqn = in_modifier & 0x3FF;
+ u64 in_param = vhcr->in_param;
+ int err = 0;
+ int i;
+
+ if (slave == dev->caps.function)
+ err = mlx4_cmd(dev, in_param, (in_modifier & 0x80000000) | eqn,
+ 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+ if (!err)
+ for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i)
+ if (in_param & (1LL << i))
+ event_eq[i].eqn = in_modifier >> 31 ? -1 : eqn;
+
+ return err;
+}
+
static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
int eq_num)
{
return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
- 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B);
+ 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int eq_num)
{
- return mlx4_cmd(dev, mailbox->dma, eq_num, 0, MLX4_CMD_SW2HW_EQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd(dev, mailbox->dma, eq_num, 0,
+ MLX4_CMD_SW2HW_EQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int eq_num)
{
- return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, 0, MLX4_CMD_HW2SW_EQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num,
+ 0, MLX4_CMD_HW2SW_EQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_num_eq_uar(struct mlx4_dev *dev)
@@ -313,8 +862,8 @@
* we need to map, take the difference of highest index and
* the lowest index we'll use and add 1.
*/
- return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 -
- dev->caps.reserved_eqs / 4 + 1;
+ return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs +
+ dev->caps.comp_pool)/4 - dev->caps.reserved_eqs/4 + 1;
}
static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
@@ -339,6 +888,18 @@
return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
}
+static void mlx4_unmap_uar(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i;
+
+ for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
+ if (priv->eq_table.uar_map[i]) {
+ iounmap(priv->eq_table.uar_map[i]);
+ priv->eq_table.uar_map[i] = NULL;
+ }
+}
+
static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
u8 intr, struct mlx4_eq *eq)
{
@@ -354,7 +915,8 @@
eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2));
- npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
+ /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
+ npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL);
@@ -431,7 +993,7 @@
mlx4_mtt_cleanup(dev, &eq->mtt);
err_out_free_eq:
- mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
+ mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
err_out_free_pages:
for (i = 0; i < npages; ++i)
@@ -456,8 +1018,9 @@
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
int err;
- int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
int i;
+ /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
+ int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -471,21 +1034,21 @@
mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) {
if (i % 4 == 0)
- printk("[%02x] ", i * 4);
- printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
+ pr_cont("[%02x] ", i * 4);
+ pr_cont(" %08x", be32_to_cpup(mailbox->buf + i * 4));
if ((i + 1) % 4 == 0)
- printk("\n");
+ pr_cont("\n");
}
}
mlx4_mtt_cleanup(dev, &eq->mtt);
for (i = 0; i < npages; ++i)
- pci_free_consistent(dev->pdev, PAGE_SIZE,
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
eq->page_list[i].buf,
eq->page_list[i].map);
kfree(eq->page_list);
- mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
+ mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
mlx4_free_cmd_mailbox(dev, mailbox);
}
@@ -492,10 +1055,12 @@
static void mlx4_free_irqs(struct mlx4_dev *dev)
{
struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table;
- int i;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, vec;
if (eq_table->have_irq)
free_irq(dev->pdev->irq, dev);
+
for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
if (eq_table->eq[i].have_irq) {
free_irq(eq_table->eq[i].irq, eq_table->eq + i);
@@ -502,6 +1067,20 @@
eq_table->eq[i].have_irq = 0;
}
+ for (i = 0; i < dev->caps.comp_pool; i++) {
+ /*
+ * Freeing the assigned irq's
+ * all bits should be 0, but we need to validate
+ */
+ if (priv->msix_ctl.pool_bm & 1ULL << i) {
+ /* NO need protecting*/
+ vec = dev->caps.num_comp_vectors + 1 + i;
+ free_irq(priv->eq_table.eq[vec].irq,
+ &priv->eq_table.eq[vec]);
+ }
+ }
+
+
kfree(eq_table->irq_names);
}
@@ -549,8 +1128,9 @@
int err;
int i;
- priv->eq_table.uar_map = kcalloc(sizeof *priv->eq_table.uar_map,
- mlx4_num_eq_uar(dev), GFP_KERNEL);
+ priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev),
+ sizeof *priv->eq_table.uar_map,
+ GFP_KERNEL);
if (!priv->eq_table.uar_map) {
err = -ENOMEM;
goto err_out_free;
@@ -564,23 +1144,30 @@
for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
priv->eq_table.uar_map[i] = NULL;
- err = mlx4_map_clr_int(dev);
- if (err)
- goto err_out_bitmap;
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_map_clr_int(dev);
+ if (err)
+ goto err_out_bitmap;
- priv->eq_table.clr_mask =
- swab32(1 << (priv->eq_table.inta_pin & 31));
- priv->eq_table.clr_int = priv->clr_base +
- (priv->eq_table.inta_pin < 32 ? 4 : 0);
+ priv->eq_table.clr_mask =
+ swab32(1 << (priv->eq_table.inta_pin & 31));
+ priv->eq_table.clr_int = priv->clr_base +
+ (priv->eq_table.inta_pin < 32 ? 4 : 0);
+ }
- priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL);
+ priv->eq_table.irq_names =
+ kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1 +
+ dev->caps.comp_pool),
+ GFP_KERNEL);
if (!priv->eq_table.irq_names) {
err = -ENOMEM;
- goto err_out_bitmap;
+ goto err_out_clr_int;
}
for (i = 0; i < dev->caps.num_comp_vectors; ++i) {
- err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
+ err = mlx4_create_eq(dev, dev->caps.num_cqs -
+ dev->caps.reserved_cqs +
+ MLX4_NUM_SPARE_EQE,
(dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
&priv->eq_table.eq[i]);
if (err) {
@@ -595,18 +1182,42 @@
if (err)
goto err_out_comp;
+ /*if additional completion vectors poolsize is 0 this loop will not run*/
+ for (i = dev->caps.num_comp_vectors + 1;
+ i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) {
+
+ err = mlx4_create_eq(dev, dev->caps.num_cqs -
+ dev->caps.reserved_cqs +
+ MLX4_NUM_SPARE_EQE,
+ (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
+ &priv->eq_table.eq[i]);
+ if (err) {
+ --i;
+ goto err_out_unmap;
+ }
+ }
+
+
if (dev->flags & MLX4_FLAG_MSI_X) {
- static const char async_eq_name[] = DRV_NAME "(async)";
const char *eq_name;
for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
if (i < dev->caps.num_comp_vectors) {
- snprintf(priv->eq_table.irq_names + i * 16, 16,
- "eth-mlx4-%d", i);
- eq_name = priv->eq_table.irq_names + i * 16;
- } else
- eq_name = async_eq_name;
+ snprintf(priv->eq_table.irq_names +
+ i * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE,
+ "mlx4-comp-%d at pci:%s", i,
+ pci_name(dev->pdev));
+ } else {
+ snprintf(priv->eq_table.irq_names +
+ i * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE,
+ "mlx4-async at pci:%s",
+ pci_name(dev->pdev));
+ }
+ eq_name = priv->eq_table.irq_names +
+ i * MLX4_IRQNAME_SIZE;
err = request_irq(priv->eq_table.eq[i].irq,
mlx4_msi_x_interrupt, 0, eq_name,
priv->eq_table.eq + i);
@@ -616,8 +1227,12 @@
priv->eq_table.eq[i].have_irq = 1;
}
} else {
+ snprintf(priv->eq_table.irq_names,
+ MLX4_IRQNAME_SIZE,
+ DRV_NAME "@pci:%s",
+ pci_name(dev->pdev));
err = request_irq(dev->pdev->irq, mlx4_interrupt,
- IRQF_SHARED, DRV_NAME, dev);
+ IRQF_SHARED, priv->eq_table.irq_names, dev);
if (err)
goto err_out_async;
@@ -624,7 +1239,7 @@
priv->eq_table.have_irq = 1;
}
- err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+ err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
if (err)
mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
@@ -646,10 +1261,14 @@
mlx4_free_eq(dev, &priv->eq_table.eq[i]);
--i;
}
- mlx4_unmap_clr_int(dev);
mlx4_free_irqs(dev);
+err_out_clr_int:
+ if (!mlx4_is_slave(dev))
+ mlx4_unmap_clr_int(dev);
+
err_out_bitmap:
+ mlx4_unmap_uar(dev);
mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
err_out_free:
@@ -663,20 +1282,18 @@
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
- mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
+ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1,
priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
mlx4_free_irqs(dev);
- for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
+ for (i = 0; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i)
mlx4_free_eq(dev, &priv->eq_table.eq[i]);
- mlx4_unmap_clr_int(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_unmap_clr_int(dev);
- for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
- if (priv->eq_table.uar_map[i])
- iounmap(priv->eq_table.uar_map[i]);
-
+ mlx4_unmap_uar(dev);
mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
kfree(priv->eq_table.uar_map);
@@ -694,7 +1311,7 @@
err = mlx4_NOP(dev);
/* When not in MSI_X, there is only one irq to check */
- if (!(dev->flags & MLX4_FLAG_MSI_X))
+ if (!(dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(dev))
return err;
/* A loop over all completion vectors, for each vector we will check
@@ -706,7 +1323,7 @@
mlx4_cmd_use_polling(dev);
/* Map the new eq to handle all asyncronous events */
- err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+ err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
priv->eq_table.eq[i].eqn);
if (err) {
mlx4_warn(dev, "Failed mapping eq for interrupt test\n");
@@ -720,8 +1337,70 @@
}
/* Return to default */
- mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
return err;
}
EXPORT_SYMBOL(mlx4_test_interrupts);
+
+int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector)
+{
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int vec = 0, err = 0, i;
+
+ mutex_lock(&priv->msix_ctl.pool_lock);
+ for (i = 0; !vec && i < dev->caps.comp_pool; i++) {
+ if (~priv->msix_ctl.pool_bm & 1ULL << i) {
+ priv->msix_ctl.pool_bm |= 1ULL << i;
+ vec = dev->caps.num_comp_vectors + 1 + i;
+ snprintf(priv->eq_table.irq_names +
+ vec * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE, "%s", name);
+ err = request_irq(priv->eq_table.eq[vec].irq,
+ mlx4_msi_x_interrupt, 0,
+ &priv->eq_table.irq_names[vec<<5],
+ priv->eq_table.eq + vec);
+ if (err) {
+ /*zero out bit by fliping it*/
+ priv->msix_ctl.pool_bm ^= 1 << i;
+ vec = 0;
+ continue;
+ /*we dont want to break here*/
+ }
+ eq_set_ci(&priv->eq_table.eq[vec], 1);
+ }
+ }
+ mutex_unlock(&priv->msix_ctl.pool_lock);
+
+ if (vec) {
+ *vector = vec;
+ } else {
+ *vector = 0;
+ err = (i == dev->caps.comp_pool) ? -ENOSPC : err;
+ }
+ return err;
+}
+EXPORT_SYMBOL(mlx4_assign_eq);
+
+void mlx4_release_eq(struct mlx4_dev *dev, int vec)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ /*bm index*/
+ int i = vec - dev->caps.num_comp_vectors - 1;
+
+ if (likely(i >= 0)) {
+ /*sanity check , making sure were not trying to free irq's
+ Belonging to a legacy EQ*/
+ mutex_lock(&priv->msix_ctl.pool_lock);
+ if (priv->msix_ctl.pool_bm & 1ULL << i) {
+ free_irq(priv->eq_table.eq[vec].irq,
+ &priv->eq_table.eq[vec]);
+ priv->msix_ctl.pool_bm &= ~(1ULL << i);
+ }
+ mutex_unlock(&priv->msix_ctl.pool_lock);
+ }
+
+}
+EXPORT_SYMBOL(mlx4_release_eq);
+
Modified: trunk/sys/ofed/drivers/net/mlx4/fw.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/fw.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/fw.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,7 +32,10 @@
* SOFTWARE.
*/
+#include <linux/etherdevice.h>
#include <linux/mlx4/cmd.h>
+#include <linux/module.h>
+#include <linux/cache.h>
#include "fw.h"
#include "icm.h"
@@ -46,14 +49,10 @@
extern void __buggy_use_of_MLX4_GET(void);
extern void __buggy_use_of_MLX4_PUT(void);
-static int enable_qos;
+static bool enable_qos;
module_param(enable_qos, bool, 0444);
MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA (default: off)");
-static int mlx4_pre_t11_mode = 0;
-module_param_named(enable_pre_t11_mode, mlx4_pre_t11_mode, int, 0644);
-MODULE_PARM_DESC(enable_pre_t11_mode, "For FCoXX, enable pre-t11 mode if non-zero (default: 0)");
-
#define MLX4_GET(dest, source, offset) \
do { \
void *__p = (char *) (source) + (offset); \
@@ -93,6 +92,7 @@
[ 9] = "Q_Key violation counter",
[10] = "VMM",
[12] = "DPDP",
+ [15] = "Big LSO headers",
[16] = "MW support",
[17] = "APM support",
[18] = "Atomic ops support",
@@ -102,8 +102,18 @@
[24] = "Demand paging support",
[25] = "Router support",
[30] = "IBoE support",
- [48] = "Basic counters support",
- [49] = "Extended counters support",
+ [32] = "Unicast loopback support",
+ [34] = "FCS header control",
+ [38] = "Wake On LAN support",
+ [40] = "UDP RSS support",
+ [41] = "Unicast VEP steering support",
+ [42] = "Multicast VEP steering support",
+ [44] = "Cross-channel (sync_qp) operations support",
+ [48] = "Counters support",
+ [59] = "Port management change event support",
+ [60] = "eSwitch support",
+ [61] = "64 byte EQE support",
+ [62] = "64 byte CQE support",
};
int i;
@@ -113,6 +123,32 @@
mlx4_dbg(dev, " %s\n", fname[i]);
}
+static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
+{
+ static const char * const fname[] = {
+ [0] = "RSS support",
+ [1] = "RSS Toeplitz Hash Function support",
+ [2] = "RSS XOR Hash Function support",
+ [3] = "Device manage flow steering support",
+ [4] = "FSM (MAC unti-spoofing) support",
+ [5] = "VST (control vlan insertion/stripping) support",
+ [6] = "Dynamic QP updates support",
+ [7] = "Loopback source checks support",
+ [8] = "Device managed flow steering IPoIB support",
+ [9] = "ETS configuration support",
+ [10] = "ETH backplane autoneg report",
+ [11] = "Ethernet Flow control statistics support",
+ [12] = "Recoverable error events support",
+ [13] = "Time stamping support",
+ [14] = "Report driver version to FW support"
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(fname); ++i)
+ if (fname[i] && (flags & (1LL << i)))
+ mlx4_dbg(dev, " %s\n", fname[i]);
+}
+
int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg)
{
struct mlx4_cmd_mailbox *mailbox;
@@ -135,25 +171,317 @@
MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET);
err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
+int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u8 field, port;
+ u32 size;
+ int err = 0;
+
+#define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0
+#define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1
+#define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4
+#define QUERY_FUNC_CAP_FMR_OFFSET 0x8
+#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP 0x10
+#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP 0x14
+#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP 0x18
+#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP 0x20
+#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP 0x24
+#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP 0x28
+#define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c
+#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30
+
+#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x50
+#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x54
+#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x58
+#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x60
+#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x64
+#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x68
+
+#define QUERY_FUNC_CAP_FMR_FLAG 0x80
+#define QUERY_FUNC_CAP_FLAG_RDMA 0x40
+#define QUERY_FUNC_CAP_FLAG_ETH 0x80
+#define QUERY_FUNC_CAP_FLAG_QUOTAS 0x10
+
+/* when opcode modifier = 1 */
+#define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3
+#define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8
+#define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc
+#define QUERY_FUNC_CAP_COUNTER_INDEX_OFFSET 0xd
+
+#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10
+#define QUERY_FUNC_CAP_QP0_PROXY 0x14
+#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18
+#define QUERY_FUNC_CAP_QP1_PROXY 0x1c
+
+#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40
+#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80
+#define QUERY_FUNC_CAP_PROPS_DEF_COUNTER 0x20
+
+#define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80
+
+ if (vhcr->op_modifier == 1) {
+ port = vhcr->in_modifier; /* phys-port = logical-port */
+ MLX4_PUT(outbox->buf, port, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
+
+ field = 0;
+ /* ensure that phy_wqe_gid bit is not set */
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET);
+
+ /* ensure force vlan and force mac bits are not set
+ * and that default counter bit is set
+ */
+ field = QUERY_FUNC_CAP_PROPS_DEF_COUNTER; /* def counter */
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
+
+ /* There is always default counter legal or sink counter */
+ field = mlx4_get_default_counter_index(dev, slave, vhcr->in_modifier);
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_COUNTER_INDEX_OFFSET);
+
+ /* size is now the QP number */
+ size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
+
+ size += 2;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
+
+ size = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY);
+
+ size += 2;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
+
+ } else if (vhcr->op_modifier == 0) {
+ /* enable rdma and ethernet interfaces, and new quota locations */
+ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA |
+ QUERY_FUNC_CAP_FLAG_QUOTAS);
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET);
+
+ field = dev->caps.num_ports;
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
+
+ size = dev->caps.function_caps; /* set PF behaviours */
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
+
+ field = 0; /* protected FMR support not available as yet */
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
+ size = dev->caps.num_qps;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
+ size = dev->caps.num_srqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
+ size = dev->caps.num_cqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP);
+
+ size = dev->caps.num_eqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+
+ size = dev->caps.reserved_eqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
+ size = dev->caps.num_mpts;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
+ size = dev->caps.num_mtts;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP);
+
+ size = dev->caps.num_mgms + dev->caps.num_amgms;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP);
+
+ } else
+ err = -EINVAL;
+
+ return err;
+}
+
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
+ struct mlx4_func_cap *func_cap)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ u8 field, op_modifier;
+ u32 size;
+ int err = 0, quotas = 0;
+
+ op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier,
+ MLX4_CMD_QUERY_FUNC_CAP,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ outbox = mailbox->buf;
+
+ if (!op_modifier) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET);
+ if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) {
+ mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+ func_cap->flags = field;
+ quotas = !!(func_cap->flags & QUERY_FUNC_CAP_FLAG_QUOTAS);
+
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
+ func_cap->num_ports = field;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
+ func_cap->pf_context_behaviour = size;
+
+ if (quotas) {
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
+ func_cap->qp_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
+ func_cap->srq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
+ func_cap->cq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
+ func_cap->mpt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
+ func_cap->mtt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
+ func_cap->mcg_quota = size & 0xFFFFFF;
+
+ } else {
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP);
+ func_cap->qp_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP);
+ func_cap->srq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP);
+ func_cap->cq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP);
+ func_cap->mpt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP);
+ func_cap->mtt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP);
+ func_cap->mcg_quota = size & 0xFFFFFF;
+ }
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+ func_cap->max_eq = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+ func_cap->reserved_eq = size & 0xFFFFFF;
+
+ goto out;
+ }
+
+ /* logical port query */
+ if (gen_or_port > dev->caps.num_ports) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET);
+ if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) {
+ mlx4_err(dev, "VLAN is enforced on this port\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+
+ if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) {
+ mlx4_err(dev, "Force mac is enabled on this port\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+ } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
+ if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) {
+ mlx4_err(dev, "phy_wqe_gid is "
+ "enforced on this ib port\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+ }
+
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
+ func_cap->physical_port = field;
+ if (func_cap->physical_port != gen_or_port) {
+ err = -ENOSYS;
+ goto out;
+ }
+
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET);
+ if (field & QUERY_FUNC_CAP_PROPS_DEF_COUNTER) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_COUNTER_INDEX_OFFSET);
+ func_cap->def_counter_index = field;
+ } else {
+ func_cap->def_counter_index = MLX4_SINK_COUNTER_INDEX;
+ }
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
+ func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY);
+ func_cap->qp0_proxy_qpn = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL);
+ func_cap->qp1_tunnel_qpn = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY);
+ func_cap->qp1_proxy_qpn = size & 0xFFFFFF;
+
+ /* All other resources are allocated by the master, but we still report
+ * 'num' and 'reserved' capabilities as follows:
+ * - num remains the maximum resource index
+ * - 'num - reserved' is the total available objects of a resource, but
+ * resource indices may be less than 'reserved'
+ * TODO: set per-resource quotas */
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
struct mlx4_cmd_mailbox *mailbox;
u32 *outbox;
u8 field;
- u32 field32;
+ u32 field32, flags, ext_flags;
u16 size;
u16 stat_rate;
int err;
int i;
- u32 in_modifier;
- u64 out_param;
- u32 tmp1, tmp2;
#define QUERY_DEV_CAP_OUT_SIZE 0x100
#define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10
@@ -178,8 +506,8 @@
#define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29
#define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b
#define QUERY_DEV_CAP_MAX_GSO_OFFSET 0x2d
+#define QUERY_DEV_CAP_RSS_OFFSET 0x2e
#define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f
-#define QUERY_DEV_CAP_STAT_CFG_INL_OFFSET 0x31
#define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33
#define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35
#define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36
@@ -187,10 +515,10 @@
#define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38
#define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b
#define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c
+#define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e
#define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f
#define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40
-#define QUERY_DEV_CAP_UDP_RSS_OFFSET 0x42
-#define QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET 0x43
+#define QUERY_DEV_CAP_SYNC_QP_OFFSET 0x42
#define QUERY_DEV_CAP_FLAGS_OFFSET 0x44
#define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48
#define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49
@@ -210,6 +538,13 @@
#define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65
#define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66
#define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67
+#define QUERY_DEV_CAP_MAX_BASIC_COUNTERS_OFFSET 0x68
+#define QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET 0x6c
+#define QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET 0x70
+#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76
+#define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70
+#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74
+#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77
#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82
#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84
@@ -222,10 +557,10 @@
#define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92
#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94
#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
+#define QUERY_DEV_CAP_ETS_CFG_OFFSET 0x9c
#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
-#define QUERY_DEV_CAP_MAX_BASIC_CNT_OFFSET 0x68
-#define QUERY_DEV_CAP_MAX_EXT_CNT_OFFSET 0x6c
+ dev_cap->flags2 = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
@@ -232,7 +567,7 @@
outbox = mailbox->buf;
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (err)
goto out;
@@ -253,7 +588,7 @@
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
dev_cap->max_mpts = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
- dev_cap->reserved_eqs = 1 << (field & 0xf);
+ dev_cap->reserved_eqs = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
dev_cap->max_eqs = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
@@ -275,6 +610,17 @@
else
dev_cap->max_gso_sz = 1 << field;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSS_OFFSET);
+ if (field & 0x20)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_XOR;
+ if (field & 0x10)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_TOP;
+ field &= 0xf;
+ if (field) {
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS;
+ dev_cap->max_rss_tbl_sz = 1 << field;
+ } else
+ dev_cap->max_rss_tbl_sz = 0;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET);
dev_cap->max_rdma_global = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
@@ -283,16 +629,28 @@
dev_cap->num_ports = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET);
dev_cap->max_msg_sz = 1 << (field & 0x1f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET);
+ if (field & 0x10)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB;
+ dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
+ dev_cap->fs_max_num_qp_per_entry = field;
MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
dev_cap->stat_rate_support = stat_rate;
- MLX4_GET(field, outbox, QUERY_DEV_CAP_UDP_RSS_OFFSET);
- dev_cap->udp_rss = field & 0x1;
- MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET);
- dev_cap->loopback_support = field & 0x1;
- dev_cap->wol = field & 0x40;
- MLX4_GET(tmp1, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
- MLX4_GET(tmp2, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
- dev_cap->flags = tmp2 | (u64)tmp1 << 32;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_TS;
+ MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
+ dev_cap->flags = flags | (u64)ext_flags << 32;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_SYNC_QP_OFFSET);
+ dev_cap->sync_qp = field & 0x10;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
dev_cap->reserved_uars = field >> 4;
MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET);
@@ -305,10 +663,8 @@
MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET);
dev_cap->bf_reg_size = 1 << (field & 0x1f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET);
- if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size)) {
- mlx4_dbg(dev, "log blue flame is invalid (%d), forcing 3\n", field & 0x1f);
+ if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size))
field = 3;
- }
dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
@@ -332,7 +688,6 @@
dev_cap->reserved_pds = field >> 4;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
dev_cap->max_pds = 1 << (field & 0x3f);
-
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET);
dev_cap->reserved_xrcds = field >> 4;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET);
@@ -363,8 +718,6 @@
dev_cap->max_srq_sz = 1 << field;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET);
dev_cap->max_qp_sz = 1 << field;
- MLX4_GET(field, outbox, QUERY_DEV_CAP_STAT_CFG_INL_OFFSET);
- dev_cap->inline_cfg = field & 1;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET);
dev_cap->resize_srq = field & 1;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET);
@@ -376,13 +729,37 @@
QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
MLX4_GET(dev_cap->reserved_lkey, outbox,
QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
+ MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETS_CFG_OFFSET);
+ if (field32 & (1 << 0))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP;
+ if (field32 & (1 << 7))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT;
+ if (field32 & (1 << 8))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW;
+ if (field32 & (1 << 13))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
+
MLX4_GET(dev_cap->max_icm_sz, outbox,
QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
- MLX4_GET(dev_cap->max_basic_counters, outbox,
- QUERY_DEV_CAP_MAX_BASIC_CNT_OFFSET);
- MLX4_GET(dev_cap->max_ext_counters, outbox,
- QUERY_DEV_CAP_MAX_EXT_CNT_OFFSET);
+ if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
+ MLX4_GET(dev_cap->max_basic_counters, outbox,
+ QUERY_DEV_CAP_MAX_BASIC_COUNTERS_OFFSET);
+ /* FW reports 256 however real value is 255 */
+ dev_cap->max_basic_counters = min_t(u32, dev_cap->max_basic_counters, 255);
+ if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT)
+ MLX4_GET(dev_cap->max_extended_counters, outbox,
+ QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET);
+ MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ if (field32 & (1 << 16))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
+ if (field32 & (1 << 19))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK;
+ if (field32 & (1 << 20))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM;
+ if (field32 & (1 << 26))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL;
+
if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
for (i = 1; i <= dev_cap->num_ports; ++i) {
MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
@@ -408,19 +785,16 @@
#define QUERY_PORT_WAVELENGTH_OFFSET 0x1c
#define QUERY_PORT_TRANS_CODE_OFFSET 0x20
-#define STAT_CFG_PORT_MODE (1 << 28)
-#define STAT_CFG_PORT_OFFSET 0x8
-#define STAT_CFG_PORT_MASK (1 << 20)
-#define STAT_CFG_MOD_INLINE 0x3
-
for (i = 1; i <= dev_cap->num_ports; ++i) {
err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
if (err)
goto out;
MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET);
dev_cap->supported_port_types[i] = field & 3;
+ dev_cap->suggested_type[i] = (field >> 3) & 1;
+ dev_cap->default_sense[i] = (field >> 4) & 1;
MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET);
dev_cap->ib_mtu[i] = field & 0xf;
MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET);
@@ -440,20 +814,6 @@
dev_cap->vendor_oui[i] = field32 & 0xffffff;
MLX4_GET(dev_cap->wavelength[i], outbox, QUERY_PORT_WAVELENGTH_OFFSET);
MLX4_GET(dev_cap->trans_code[i], outbox, QUERY_PORT_TRANS_CODE_OFFSET);
-
- /* Query stat cfg for port enablement */
- if (dev_cap->inline_cfg) {
- in_modifier = STAT_CFG_PORT_MODE | i << 8 |
- STAT_CFG_PORT_OFFSET;
- err = mlx4_cmd_imm(dev, 0, &out_param,
- in_modifier,
- STAT_CFG_MOD_INLINE,
- MLX4_CMD_MOD_STAT_CFG,
- MLX4_CMD_TIME_CLASS_B);
- if (!err)
- if (!(out_param & STAT_CFG_PORT_MASK))
- dev_cap->supported_port_types[i] = 0;
- }
}
}
@@ -494,8 +854,12 @@
mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n",
dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg);
mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz);
+ mlx4_dbg(dev, "Max basic counters: %d\n", dev_cap->max_basic_counters);
+ mlx4_dbg(dev, "Max extended counters: %d\n", dev_cap->max_extended_counters);
+ mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz);
dump_dev_cap_flags(dev, dev_cap->flags);
+ dump_dev_cap_flags2(dev, dev_cap->flags2);
out:
mlx4_free_cmd_mailbox(dev, mailbox);
@@ -502,6 +866,134 @@
return err;
}
+int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u64 flags;
+ int err = 0;
+ u8 field;
+
+ err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ /* add port mng change event capability unconditionally to slaves */
+ MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV;
+ MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+
+ /* For guests, report Blueflame disabled */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET);
+ field &= 0x7f;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET);
+
+ /* turn off device-managed steering capability if not enabled */
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ MLX4_GET(field, outbox->buf,
+ QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+ field &= 0x7f;
+ MLX4_PUT(outbox->buf, field,
+ QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+ }
+ return 0;
+}
+
+int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u64 def_mac;
+ u8 port_type;
+ u16 short_field;
+ int err;
+ int admin_link_state;
+
+#define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0
+#define MLX4_PORT_LINK_UP_MASK 0x80
+#define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c
+#define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e
+
+ err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ if (!err && dev->caps.function != slave) {
+ /* set slave default_mac address to be zero MAC */
+ def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac;
+ MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET);
+
+ /* get port type - currently only eth is enabled */
+ MLX4_GET(port_type, outbox->buf,
+ QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+
+ /* No link sensing allowed */
+ port_type &= MLX4_VF_PORT_NO_LINK_SENSE_MASK;
+ /* set port type to currently operating port type */
+ port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3);
+
+ admin_link_state = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.link_state;
+ if (IFLA_VF_LINK_STATE_ENABLE == admin_link_state)
+ port_type |= MLX4_PORT_LINK_UP_MASK;
+ else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state)
+ port_type &= ~MLX4_PORT_LINK_UP_MASK;
+
+ MLX4_PUT(outbox->buf, port_type,
+ QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+
+ if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH)
+ short_field = mlx4_get_slave_num_gids(dev, slave);
+ else
+ short_field = 1; /* slave max gids */
+ MLX4_PUT(outbox->buf, short_field,
+ QUERY_PORT_CUR_MAX_GID_OFFSET);
+
+ short_field = dev->caps.pkey_table_len[vhcr->in_modifier];
+ MLX4_PUT(outbox->buf, short_field,
+ QUERY_PORT_CUR_MAX_PKEY_OFFSET);
+ }
+
+ return err;
+}
+
+int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port,
+ int *gid_tbl_len, int *pkey_tbl_len)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ u16 field;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ outbox = mailbox->buf;
+
+ MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_GID_OFFSET);
+ *gid_tbl_len = field;
+
+ MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_PKEY_OFFSET);
+ *pkey_tbl_len = field;
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_get_slave_pkey_gid_tbl_len);
+
int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
{
struct mlx4_cmd_mailbox *mailbox;
@@ -551,7 +1043,8 @@
if (++nent == MLX4_MAILBOX_SIZE / 16) {
err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
if (err)
goto out;
nent = 0;
@@ -560,7 +1053,8 @@
}
if (nent)
- err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B);
+ err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
if (err)
goto out;
@@ -589,13 +1083,15 @@
int mlx4_UNMAP_FA(struct mlx4_dev *dev)
{
- return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B);
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
int mlx4_RUN_FW(struct mlx4_dev *dev)
{
- return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
int mlx4_QUERY_FW(struct mlx4_dev *dev)
@@ -611,7 +1107,7 @@
#define QUERY_FW_OUT_SIZE 0x100
#define QUERY_FW_VER_OFFSET 0x00
-#define MC_PROMISC_VER 0x2000702bcull
+#define QUERY_FW_PPF_ID 0x09
#define QUERY_FW_CMD_IF_REV_OFFSET 0x0a
#define QUERY_FW_MAX_CMD_OFFSET 0x0f
#define QUERY_FW_ERR_START_OFFSET 0x30
@@ -622,6 +1118,12 @@
#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20
#define QUERY_FW_CLR_INT_BAR_OFFSET 0x28
+#define QUERY_FW_COMM_BASE_OFFSET 0x40
+#define QUERY_FW_COMM_BAR_OFFSET 0x48
+
+#define QUERY_FW_CLOCK_OFFSET 0x50
+#define QUERY_FW_CLOCK_BAR 0x58
+
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
@@ -628,7 +1130,7 @@
outbox = mailbox->buf;
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (err)
goto out;
@@ -640,11 +1142,14 @@
dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) |
((fw_ver & 0xffff0000ull) >> 16) |
((fw_ver & 0x0000ffffull) << 16);
- if (dev->caps.fw_ver < MC_PROMISC_VER)
- dev->caps.mc_promisc_mode = 2;
- else
- dev->caps.mc_promisc_mode = 1;
+ MLX4_GET(lg, outbox, QUERY_FW_PPF_ID);
+ dev->caps.function = lg;
+
+ if (mlx4_is_slave(dev))
+ goto out;
+
+
MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET);
if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV ||
cmd_if_rev > MLX4_COMMAND_INTERFACE_MAX_REV) {
@@ -686,8 +1191,19 @@
MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET);
fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2;
+ MLX4_GET(fw->comm_base, outbox, QUERY_FW_COMM_BASE_OFFSET);
+ MLX4_GET(fw->comm_bar, outbox, QUERY_FW_COMM_BAR_OFFSET);
+ fw->comm_bar = (fw->comm_bar >> 6) * 2;
+ mlx4_dbg(dev, "Communication vector bar:%d offset:0x%llx\n",
+ fw->comm_bar, (unsigned long long)fw->comm_base);
mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2);
+ MLX4_GET(fw->clock_offset, outbox, QUERY_FW_CLOCK_OFFSET);
+ MLX4_GET(fw->clock_bar, outbox, QUERY_FW_CLOCK_BAR);
+ fw->clock_bar = (fw->clock_bar >> 6) * 2;
+ mlx4_dbg(dev, "Internal clock bar:%d offset:0x%llx\n",
+ fw->comm_bar, (unsigned long long)fw->comm_base);
+
/*
* Round up number of system pages needed in case
* MLX4_ICM_PAGE_SIZE < PAGE_SIZE.
@@ -704,8 +1220,32 @@
return err;
}
-static void get_board_id(void *vsd, char *board_id)
+int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
{
+ u8 *outbuf;
+ int err;
+
+ outbuf = outbox->buf;
+ err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ /* for slaves, set pci PPF ID to invalid and zero out everything
+ * else except FW version */
+ outbuf[0] = outbuf[1] = 0;
+ memset(&outbuf[8], 0, QUERY_FW_OUT_SIZE - 8);
+ outbuf[QUERY_FW_PPF_ID] = MLX4_INVALID_SLAVE_ID;
+
+ return 0;
+}
+
+static void get_board_id(void *vsd, char *board_id, char *vsdstr)
+{
int i;
#define VSD_OFFSET_SIG1 0x00
@@ -712,9 +1252,16 @@
#define VSD_OFFSET_SIG2 0xde
#define VSD_OFFSET_MLX_BOARD_ID 0xd0
#define VSD_OFFSET_TS_BOARD_ID 0x20
+#define VSD_LEN 0xd0
#define VSD_SIGNATURE_TOPSPIN 0x5ad
+ memset(vsdstr, 0, MLX4_VSD_LEN);
+
+ for (i = 0; i < VSD_LEN / 4; i++)
+ ((u32 *)vsdstr)[i] =
+ swab32(*(u32 *)(vsd + i * 4));
+
memset(board_id, 0, MLX4_BOARD_ID_LEN);
if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN &&
@@ -741,6 +1288,7 @@
#define QUERY_ADAPTER_OUT_SIZE 0x100
#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
#define QUERY_ADAPTER_VSD_OFFSET 0x20
+#define QUERY_ADAPTER_VSD_VENDOR_ID_OFFSET 0x1e
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -748,14 +1296,17 @@
outbox = mailbox->buf;
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (err)
goto out;
MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
+ adapter->vsd_vendor_id = be16_to_cpup((u16 *)outbox +
+ QUERY_ADAPTER_VSD_VENDOR_ID_OFFSET / 2);
+
get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4,
- adapter->board_id);
+ adapter->board_id, adapter->vsd);
out:
mlx4_free_cmd_mailbox(dev, mailbox);
@@ -766,14 +1317,16 @@
{
struct mlx4_cmd_mailbox *mailbox;
__be32 *inbox;
+ u32 mw_enable;
int err;
#define INIT_HCA_IN_SIZE 0x200
+#define INIT_HCA_DRV_NAME_FOR_FW_MAX_SIZE 64
#define INIT_HCA_VERSION_OFFSET 0x000
#define INIT_HCA_VERSION 2
#define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e
-#define INIT_HCA_X86_64_BYTE_CACHELINE_SZ 0x40
#define INIT_HCA_FLAGS_OFFSET 0x014
+#define INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET 0x018
#define INIT_HCA_QPC_OFFSET 0x020
#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
@@ -781,6 +1334,7 @@
#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f)
#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
+#define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38)
#define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
@@ -791,9 +1345,22 @@
#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00)
#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16)
+#define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18)
#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
+#define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6
+#define INIT_HCA_DRIVER_VERSION_OFFSET 0x140
+#define INIT_HCA_FS_PARAM_OFFSET 0x1d0
+#define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00)
+#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12)
+#define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b)
+#define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21)
+#define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22)
+#define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25)
+#define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26)
#define INIT_HCA_TPT_OFFSET 0x0f0
#define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00)
+#define INIT_HCA_TPT_MW_OFFSET (INIT_HCA_TPT_OFFSET + 0x08)
+#define INIT_HCA_TPT_MW_ENABLE (1 << 31)
#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b)
#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10)
#define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18)
@@ -809,10 +1376,10 @@
memset(inbox, 0, INIT_HCA_IN_SIZE);
*((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION;
-#if defined(__x86_64__) || defined(__PPC64__)
- *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = INIT_HCA_X86_64_BYTE_CACHELINE_SZ;
-#endif
+ *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) =
+ ((ilog2(cache_line_size()) - 4) << 5) | (1 << 4);
+
#if defined(__LITTLE_ENDIAN)
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
#elif defined(__BIG_ENDIAN)
@@ -831,10 +1398,43 @@
if (enable_qos)
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2);
- /* counters mode */
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |=
- cpu_to_be32(dev->caps.counters_mode << 4);
+ /* Enable fast drop performance optimization */
+ if (dev->caps.fast_drop)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 7);
+ /* enable counters */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
+
+ /* CX3 is capable of extending CQEs\EQEs from 32 to 64 bytes */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
+ *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
+ dev->caps.eqe_size = 64;
+ dev->caps.eqe_factor = 1;
+ } else {
+ dev->caps.eqe_size = 32;
+ dev->caps.eqe_factor = 0;
+ }
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
+ *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
+ dev->caps.cqe_size = 64;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+ } else {
+ dev->caps.cqe_size = 32;
+ }
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT)
+ *(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1 << 31);
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW) {
+ strncpy((u8 *)mailbox->buf + INIT_HCA_DRIVER_VERSION_OFFSET,
+ DRV_NAME_FOR_FW,
+ INIT_HCA_DRV_NAME_FOR_FW_MAX_SIZE - 1);
+ mlx4_dbg(dev, "Reporting Driver Version to FW: %s\n",
+ (u8 *)mailbox->buf + INIT_HCA_DRIVER_VERSION_OFFSET);
+ }
+
/* QPC/EEC/CQC/EQC/RDMARC attributes */
MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
@@ -850,16 +1450,50 @@
MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET);
MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET);
- /* multicast attributes */
+ /* steering attributes */
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |=
+ cpu_to_be32(1 <<
+ INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN);
- MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
- MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
- MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
- MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_entry_sz,
+ INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_table_sz,
+ INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+ /* Enable Ethernet flow steering
+ * with udp unicast and tcp unicast
+ */
+ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN),
+ INIT_HCA_FS_ETH_BITS_OFFSET);
+ MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
+ INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET);
+ /* Enable IPoIB flow steering
+ * with udp unicast and tcp unicast
+ */
+ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN),
+ INIT_HCA_FS_IB_BITS_OFFSET);
+ MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
+ INIT_HCA_FS_IB_NUM_ADDRS_OFFSET);
+ } else {
+ MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_entry_sz,
+ INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_hash_sz,
+ INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_table_sz,
+ INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0)
+ MLX4_PUT(inbox, (u8) (1 << 3),
+ INIT_HCA_UC_STEERING_OFFSET);
+ }
/* TPT attributes */
MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET);
+ mw_enable = param->mw_enable ? INIT_HCA_TPT_MW_ENABLE : 0;
+ MLX4_PUT(inbox, mw_enable, INIT_HCA_TPT_MW_OFFSET);
MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET);
MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET);
MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET);
@@ -866,14 +1500,12 @@
/* UAR attributes */
- MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
+ MLX4_PUT(inbox, param->uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET);
MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
- if (!mlx4_pre_t11_mode && dev->caps.flags & (u32) MLX4_DEV_CAP_FLAG_FC_T11)
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 10);
+ err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000,
+ MLX4_CMD_NATIVE);
- err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000);
-
if (err)
mlx4_err(dev, "INIT_HCA returns %d\n", err);
@@ -881,6 +1513,157 @@
return err;
}
+int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+ struct mlx4_init_hca_param *param)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ __be32 *outbox;
+ u32 dword_field;
+ u32 mw_enable;
+ int err;
+ u8 byte_field;
+
+#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04
+#define QUERY_HCA_CORE_CLOCK_OFFSET 0x0c
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0,
+ MLX4_CMD_QUERY_HCA,
+ MLX4_CMD_TIME_CLASS_B,
+ !mlx4_is_slave(dev));
+ if (err)
+ goto out;
+
+ MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET);
+ MLX4_GET(param->hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET);
+
+ /* QPC/EEC/CQC/EQC/RDMARC attributes */
+
+ MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET);
+ MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET);
+ MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET);
+ MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET);
+ MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET);
+ MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET);
+ MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET);
+ MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET);
+ MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET);
+ MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET);
+ MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET);
+ MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
+
+ MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET);
+ if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) {
+ param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
+ } else {
+ MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET);
+ if (byte_field & 0x8)
+ param->steering_mode = MLX4_STEERING_MODE_B0;
+ else
+ param->steering_mode = MLX4_STEERING_MODE_A0;
+ }
+ /* steering attributes */
+ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET);
+ MLX4_GET(param->log_mc_entry_sz, outbox,
+ INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+ MLX4_GET(param->log_mc_table_sz, outbox,
+ INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+ } else {
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET);
+ MLX4_GET(param->log_mc_entry_sz, outbox,
+ INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ MLX4_GET(param->log_mc_hash_sz, outbox,
+ INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+ MLX4_GET(param->log_mc_table_sz, outbox,
+ INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ }
+
+ /* CX3 is capable of extending CQEs\EQEs from 32 to 64 bytes */
+ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
+ if (byte_field & 0x20) /* 64-bytes eqe enabled */
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+ if (byte_field & 0x40) /* 64-bytes cqe enabled */
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+
+ /* TPT attributes */
+
+ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);
+ MLX4_GET(mw_enable, outbox, INIT_HCA_TPT_MW_OFFSET);
+ param->mw_enable = (mw_enable & INIT_HCA_TPT_MW_ENABLE) ==
+ INIT_HCA_TPT_MW_ENABLE;
+ MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
+ MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET);
+ MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET);
+
+ /* UAR attributes */
+
+ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+ MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+
+/* for IB-type ports only in SRIOV mode. Checks that both proxy QP0
+ * and real QP0 are active, so that the paravirtualized QP0 is ready
+ * to operate */
+static int check_qp0_state(struct mlx4_dev *dev, int function, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ /* irrelevant if not infiniband */
+ if (priv->mfunc.master.qp0_state[port].proxy_qp0_active &&
+ priv->mfunc.master.qp0_state[port].qp0_active)
+ return 1;
+ return 0;
+}
+
+int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port = vhcr->in_modifier;
+ int err;
+
+ if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))
+ return 0;
+
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
+ /* Enable port only if it was previously disabled */
+ if (!priv->mfunc.master.init_port_ref[port]) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ }
+ priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
+ } else {
+ if (slave == mlx4_master_func_num(dev)) {
+ if (check_qp0_state(dev, slave, port) &&
+ !priv->mfunc.master.qp0_state[port].port_active) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ priv->mfunc.master.qp0_state[port].port_active = 1;
+ priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
+ }
+ } else
+ priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
+ }
+ ++priv->mfunc.master.init_port_ref[port];
+ return 0;
+}
+
int mlx4_INIT_PORT(struct mlx4_dev *dev, int port)
{
struct mlx4_cmd_mailbox *mailbox;
@@ -924,26 +1707,69 @@
MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET);
err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
} else
err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_INIT_PORT);
+int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port = vhcr->in_modifier;
+ int err;
+
+ if (!(priv->mfunc.master.slave_state[slave].init_port_mask &
+ (1 << port)))
+ return 0;
+
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
+ if (priv->mfunc.master.init_port_ref[port] == 1) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
+ 1000, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ }
+ priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
+ } else {
+ /* infiniband port */
+ if (slave == mlx4_master_func_num(dev)) {
+ if (!priv->mfunc.master.qp0_state[port].qp0_active &&
+ priv->mfunc.master.qp0_state[port].port_active) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
+ 1000, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
+ priv->mfunc.master.qp0_state[port].port_active = 0;
+ }
+ } else
+ priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
+ }
+ --priv->mfunc.master.init_port_ref[port];
+ return 0;
+}
+
int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port)
{
- return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000);
+ return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000,
+ MLX4_CMD_WRAPPED);
}
EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT);
int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic)
{
- return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000);
+ return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000,
+ MLX4_CMD_NATIVE);
}
int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
@@ -950,7 +1776,7 @@
{
int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0,
MLX4_CMD_SET_ICM_SIZE,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (ret)
return ret;
@@ -967,9 +1793,53 @@
int mlx4_NOP(struct mlx4_dev *dev)
{
/* Input modifier of 0x1f means "finish as soon as possible." */
- return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100);
+ return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
+int mlx4_query_diag_counters(struct mlx4_dev *dev, int array_length,
+ u8 op_modifier, u32 in_offset[],
+ u32 counter_out[])
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ int ret;
+ int i;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ ret = mlx4_cmd_box(dev, 0, mailbox->dma, 0, op_modifier,
+ MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < array_length; i++) {
+ if (in_offset[i] > MLX4_MAILBOX_SIZE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ MLX4_GET(counter_out[i], outbox, in_offset[i]);
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_query_diag_counters);
+
+int mlx4_MOD_STAT_CFG_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ return -EPERM;
+}
+
#define MLX4_WOL_SETUP_MODE (5 << 28)
int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port)
{
@@ -976,7 +1846,8 @@
u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3,
- MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
}
EXPORT_SYMBOL_GPL(mlx4_wol_read);
@@ -985,46 +1856,99 @@
u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
EXPORT_SYMBOL_GPL(mlx4_wol_write);
-int mlx4_query_diag_counters(struct mlx4_dev *dev, int array_length,
- u8 op_modifier, u32 in_offset[], u32 counter_out[])
+enum {
+ ADD_TO_MCG = 0x26,
+};
+
+
+void mlx4_opreq_action(struct work_struct *work)
{
+ struct mlx4_priv *priv = container_of(work, struct mlx4_priv, opreq_task);
+ struct mlx4_dev *dev = &priv->dev;
+ int num_tasks = atomic_read(&priv->opreq_count);
struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
u32 *outbox;
- int ret;
+ u32 modifier;
+ u16 token;
+ u16 type_m;
+ u16 type;
+ int err;
+ u32 num_qps;
+ struct mlx4_qp qp;
int i;
+ u8 rem_mcg;
+ u8 prot;
+#define GET_OP_REQ_MODIFIER_OFFSET 0x08
+#define GET_OP_REQ_TOKEN_OFFSET 0x14
+#define GET_OP_REQ_TYPE_OFFSET 0x1a
+#define GET_OP_REQ_DATA_OFFSET 0x20
+
mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
+ if (IS_ERR(mailbox)) {
+ mlx4_err(dev, "Failed to allocate mailbox for GET_OP_REQ\n");
+ return;
+ }
outbox = mailbox->buf;
- ret = mlx4_cmd_box(dev, 0, mailbox->dma, 0, op_modifier,
- MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A);
- if (ret)
- goto out;
+ while (num_tasks) {
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0,
+ MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_err(dev, "Failed to retreive required operation: %d\n", err);
+ return;
+ }
+ MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET);
+ MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET);
+ MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET);
+ type_m = type >> 12;
+ type &= 0xfff;
- for (i=0; i < array_length; i++) {
- if (in_offset[i] > MLX4_MAILBOX_SIZE) {
- ret = -EINVAL;
+ switch (type) {
+ case ADD_TO_MCG:
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_warn(dev, "ADD MCG operation is not supported in "
+ "DEVICE_MANAGED steerign mode\n");
+ err = EPERM;
+ break;
+ }
+ mgm = (struct mlx4_mgm *) ((u8 *) (outbox) + GET_OP_REQ_DATA_OFFSET);
+ num_qps = be32_to_cpu(mgm->members_count) & MGM_QPN_MASK;
+ rem_mcg = ((u8 *) (&mgm->members_count))[0] & 1;
+ prot = ((u8 *) (&mgm->members_count))[0] >> 6;
+
+ for (i = 0; i < num_qps; i++) {
+ qp.qpn = be32_to_cpu(mgm->qp[i]);
+ if (rem_mcg)
+ err = mlx4_multicast_detach(dev, &qp, mgm->gid, prot, 0);
+ else
+ err = mlx4_multicast_attach(dev, &qp, mgm->gid, mgm->gid[5] ,0, prot, NULL);
+ if (err)
+ break;
+ }
+ break;
+ default:
+ mlx4_warn(dev, "Bad type for required operation\n");
+ err = EINVAL;
+ break;
+ }
+ err = mlx4_cmd(dev, 0, ((u32) err | cpu_to_be32(token) << 16), 1,
+ MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_err(dev, "Failed to acknowledge required request: %d\n", err);
goto out;
}
-
- MLX4_GET(counter_out[i], outbox, in_offset[i]);
+ memset(outbox, 0, 0xffc);
+ num_tasks = atomic_dec_return(&priv->opreq_count);
}
out:
mlx4_free_cmd_mailbox(dev, mailbox);
- return ret;
}
-EXPORT_SYMBOL_GPL(mlx4_query_diag_counters);
-
-void mlx4_get_fc_t11_settings(struct mlx4_dev *dev, int *enable_pre_t11, int *t11_supported)
-{
- *enable_pre_t11 = !!mlx4_pre_t11_mode;
- *t11_supported = !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FC_T11);
-}
-EXPORT_SYMBOL_GPL(mlx4_get_fc_t11_settings);
Modified: trunk/sys/ofed/drivers/net/mlx4/fw.h
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/fw.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/fw.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -78,10 +78,10 @@
u16 wavelength[MLX4_MAX_PORTS + 1];
u64 trans_code[MLX4_MAX_PORTS + 1];
u16 stat_rate_support;
- int udp_rss;
- int loopback_support;
- int wol;
+ int fs_log_max_ucast_qp_range_size;
+ int fs_max_num_qp_per_entry;
u64 flags;
+ u64 flags2;
int reserved_uars;
int uar_size;
int min_page_sz;
@@ -108,21 +108,48 @@
int dmpt_entry_sz;
int cmpt_entry_sz;
int mtt_entry_sz;
- int inline_cfg;
int resize_srq;
u32 bmme_flags;
u32 reserved_lkey;
u64 max_icm_sz;
int max_gso_sz;
+ int max_rss_tbl_sz;
u8 supported_port_types[MLX4_MAX_PORTS + 1];
+ u8 suggested_type[MLX4_MAX_PORTS + 1];
+ u8 default_sense[MLX4_MAX_PORTS + 1];
u8 log_max_macs[MLX4_MAX_PORTS + 1];
u8 log_max_vlans[MLX4_MAX_PORTS + 1];
u32 max_basic_counters;
- u32 max_ext_counters;
+ u32 sync_qp;
+ u8 timestamp_support;
+ u32 max_extended_counters;
};
+struct mlx4_func_cap {
+ u8 num_ports;
+ u8 flags;
+ u32 pf_context_behaviour;
+ int qp_quota;
+ int cq_quota;
+ int srq_quota;
+ int mpt_quota;
+ int mtt_quota;
+ int max_eq;
+ int reserved_eq;
+ int mcg_quota;
+ u32 qp0_tunnel_qpn;
+ u32 qp0_proxy_qpn;
+ u32 qp1_tunnel_qpn;
+ u32 qp1_proxy_qpn;
+ u8 physical_port;
+ u8 port_flags;
+ u8 def_counter_index;
+};
+
struct mlx4_adapter {
+ u16 vsd_vendor_id;
char board_id[MLX4_BOARD_ID_LEN];
+ char vsd[MLX4_VSD_LEN];
u8 inta_pin;
};
@@ -138,8 +165,10 @@
u64 dmpt_base;
u64 cmpt_base;
u64 mtt_base;
+ u64 global_caps;
u16 log_mc_entry_sz;
u16 log_mc_hash_sz;
+ u16 hca_core_clock;
u8 log_num_qps;
u8 log_num_srqs;
u8 log_num_cqs;
@@ -148,6 +177,11 @@
u8 log_mc_table_sz;
u8 log_mpt_sz;
u8 log_uar_sz;
+ u8 uar_page_sz; /* log pg sz in 4k chunks */
+ u8 mw_enable; /* Enable memory windows */
+ u8 fs_hash_enable_bits;
+ u8 steering_mode; /* for QUERY_HCA */
+ u64 dev_cap_enabled;
};
struct mlx4_init_ib_param {
@@ -172,6 +206,13 @@
};
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
+ struct mlx4_func_cap *func_cap);
+int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_UNMAP_FA(struct mlx4_dev *dev);
int mlx4_RUN_FW(struct mlx4_dev *dev);
@@ -178,10 +219,12 @@
int mlx4_QUERY_FW(struct mlx4_dev *dev);
int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter);
int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
+int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic);
int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt);
int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
int mlx4_NOP(struct mlx4_dev *dev);
int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg);
+void mlx4_opreq_action(struct work_struct *work);
#endif /* MLX4_FW_H */
Property changes on: trunk/sys/ofed/drivers/net/mlx4/fw.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/net/mlx4/icm.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/icm.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/icm.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,10 +31,11 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/math64.h>
#include <linux/mlx4/cmd.h>
@@ -93,13 +94,17 @@
kfree(icm);
}
-static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
+static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order,
+ gfp_t gfp_mask, int node)
{
struct page *page;
- page = alloc_pages(gfp_mask, order);
- if (!page)
- return -ENOMEM;
+ page = alloc_pages_node(node, gfp_mask, order);
+ if (!page) {
+ page = alloc_pages(gfp_mask, order);
+ if (!page)
+ return -ENOMEM;
+ }
sg_set_page(mem, page, PAGE_SIZE << order, 0);
return 0;
@@ -130,9 +135,13 @@
/* We use sg_set_buf for coherent allocs, which assumes low memory */
BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
- icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!icm)
- return NULL;
+ icm = kmalloc_node(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
+ dev->numa_node);
+ if (!icm) {
+ icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!icm)
+ return NULL;
+ }
icm->refcount = 0;
INIT_LIST_HEAD(&icm->chunk_list);
@@ -141,10 +150,15 @@
while (npages > 0) {
if (!chunk) {
- chunk = kmalloc(sizeof *chunk,
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!chunk)
- goto fail;
+ chunk = kmalloc_node(sizeof *chunk,
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
+ dev->numa_node);
+ if (!chunk) {
+ chunk = kmalloc(sizeof *chunk,
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!chunk)
+ goto fail;
+ }
sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
chunk->npages = 0;
@@ -161,31 +175,33 @@
cur_order, gfp_mask);
else
ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages],
- cur_order, gfp_mask);
+ cur_order, gfp_mask,
+ dev->numa_node);
- if (!ret) {
- ++chunk->npages;
+ if (ret) {
+ if (--cur_order < 0)
+ goto fail;
+ else
+ continue;
+ }
- if (coherent)
- ++chunk->nsg;
- else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
- chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
- chunk->npages,
- PCI_DMA_BIDIRECTIONAL);
+ ++chunk->npages;
- if (chunk->nsg <= 0)
- goto fail;
- }
+ if (coherent)
+ ++chunk->nsg;
+ else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
+ chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+ chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
- if (chunk->npages == MLX4_ICM_CHUNK_LEN)
- chunk = NULL;
-
- npages -= 1 << cur_order;
- } else {
- --cur_order;
- if (cur_order < 0)
+ if (chunk->nsg <= 0)
goto fail;
}
+
+ if (chunk->npages == MLX4_ICM_CHUNK_LEN)
+ chunk = NULL;
+
+ npages -= 1 << cur_order;
}
if (!coherent && chunk) {
@@ -209,38 +225,12 @@
return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM, icm, virt);
}
-int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
+static int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
{
return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
-int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt)
-{
- struct mlx4_cmd_mailbox *mailbox;
- __be64 *inbox;
- int err;
-
- mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
- inbox = mailbox->buf;
-
- inbox[0] = cpu_to_be64(virt);
- inbox[1] = cpu_to_be64(dma_addr);
-
- err = mlx4_cmd(dev, mailbox->dma, 1, 0, MLX4_CMD_MAP_ICM,
- MLX4_CMD_TIME_CLASS_B);
-
- mlx4_free_cmd_mailbox(dev, mailbox);
-
- if (!err)
- mlx4_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
- (unsigned long long) dma_addr, (unsigned long long) virt);
-
- return err;
-}
-
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
{
return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM_AUX, icm, -1);
@@ -248,12 +238,14 @@
int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
{
- return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX, MLX4_CMD_TIME_CLASS_B);
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
{
- int i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
+ u32 i = (obj & (table->num_obj - 1)) /
+ (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
int ret = 0;
mutex_lock(&table->mutex);
@@ -286,9 +278,10 @@
return ret;
}
-void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
+void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
{
- int i;
+ u32 i;
+ u64 offset;
i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
@@ -295,18 +288,25 @@
mutex_lock(&table->mutex);
if (--table->icm[i]->refcount == 0) {
- mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
- MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
- mlx4_free_icm(dev, table->icm[i], table->coherent);
- table->icm[i] = NULL;
+ offset = (u64) i * MLX4_TABLE_CHUNK_SIZE;
+
+ if (!mlx4_UNMAP_ICM(dev, table->virt + offset,
+ MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE)) {
+ mlx4_free_icm(dev, table->icm[i], table->coherent);
+ table->icm[i] = NULL;
+ } else {
+ pr_warn("mlx4_core: mlx4_UNMAP_ICM failed.\n");
+ }
}
mutex_unlock(&table->mutex);
}
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle)
+void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
+ dma_addr_t *dma_handle)
{
- int idx, offset, dma_offset, i;
+ int offset, dma_offset, i;
+ u64 idx;
struct mlx4_icm_chunk *chunk;
struct mlx4_icm *icm;
struct page *page = NULL;
@@ -316,7 +316,7 @@
mutex_lock(&table->mutex);
- idx = (obj & (table->num_obj - 1)) * table->obj_size;
+ idx = (u64) (obj & (table->num_obj - 1)) * table->obj_size;
icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
@@ -350,10 +350,11 @@
}
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- int start, int end)
+ u32 start, u32 end)
{
int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size;
- int i, err;
+ int err;
+ u32 i;
for (i = start; i <= end; i += inc) {
err = mlx4_table_get(dev, table, i);
@@ -373,9 +374,9 @@
}
void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- int start, int end)
+ u32 start, u32 end)
{
- int i;
+ u32 i;
for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size)
mlx4_table_put(dev, table, i);
@@ -382,7 +383,7 @@
}
int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- u64 virt, int obj_size, int nobj, int reserved,
+ u64 virt, int obj_size, u64 nobj, int reserved,
int use_lowmem, int use_coherent)
{
int obj_per_chunk;
@@ -389,9 +390,10 @@
int num_icm;
unsigned chunk_size;
int i;
+ u64 size;
obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
- num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
+ num_icm = div_u64((nobj + obj_per_chunk - 1), obj_per_chunk);
table->icm = kcalloc(num_icm, sizeof *table->icm, GFP_KERNEL);
if (!table->icm)
@@ -404,10 +406,12 @@
table->coherent = use_coherent;
mutex_init(&table->mutex);
+ size = (u64) nobj * obj_size;
for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
chunk_size = MLX4_TABLE_CHUNK_SIZE;
- if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > nobj * obj_size)
- chunk_size = PAGE_ALIGN(nobj * obj_size - i * MLX4_TABLE_CHUNK_SIZE);
+ if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > size)
+ chunk_size = PAGE_ALIGN(size -
+ i * MLX4_TABLE_CHUNK_SIZE);
table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
@@ -432,10 +436,16 @@
err:
for (i = 0; i < num_icm; ++i)
if (table->icm[i]) {
- mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE,
- MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
- mlx4_free_icm(dev, table->icm[i], use_coherent);
+ if (!mlx4_UNMAP_ICM(dev,
+ virt + i * MLX4_TABLE_CHUNK_SIZE,
+ MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE)) {
+ mlx4_free_icm(dev, table->icm[i], use_coherent);
+ } else {
+ pr_warn("mlx4_core: mlx4_UNMAP_ICM failed.\n");
+ return -ENOMEM;
+ }
}
+ kfree(table->icm);
return -ENOMEM;
}
@@ -442,14 +452,22 @@
void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
{
- int i;
+ int i, err = 0;
for (i = 0; i < table->num_icm; ++i)
if (table->icm[i]) {
- mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
- MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
- mlx4_free_icm(dev, table->icm[i], table->coherent);
+ err = mlx4_UNMAP_ICM(dev,
+ table->virt + i * MLX4_TABLE_CHUNK_SIZE,
+ MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
+ if (!err) {
+ mlx4_free_icm(dev, table->icm[i],
+ table->coherent);
+ } else {
+ pr_warn("mlx4_core: mlx4_UNMAP_ICM failed.\n");
+ break;
+ }
}
- kfree(table->icm);
+ if (!err)
+ kfree(table->icm);
}
Modified: trunk/sys/ofed/drivers/net/mlx4/icm.h
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/icm.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/icm.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -37,6 +37,7 @@
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/mutex.h>
+#include <linux/scatterlist.h>
#define MLX4_ICM_CHUNK_LEN \
((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
@@ -71,17 +72,17 @@
gfp_t gfp_mask, int coherent);
void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u32 start, u32 end);
+void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u32 start, u32 end);
int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- u64 virt, int obj_size, int nobj, int reserved,
+ u64 virt, int obj_size, u64 nobj, int reserved,
int use_lowmem, int use_coherent);
void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
-void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle);
-int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- int start, int end);
-void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- int start, int end);
+void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, dma_addr_t *dma_handle);
static inline void mlx4_icm_first(struct mlx4_icm *icm,
struct mlx4_icm_iter *iter)
@@ -122,8 +123,6 @@
return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
}
-int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count);
-int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt);
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
Property changes on: trunk/sys/ofed/drivers/net/mlx4/icm.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/net/mlx4/intf.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/intf.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/intf.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,6 +31,9 @@
* SOFTWARE.
*/
+#include <linux/slab.h>
+#include <linux/module.h>
+
#include "mlx4.h"
struct mlx4_device_context {
@@ -112,38 +115,9 @@
}
EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
-struct mlx4_dev *mlx4_query_interface(void *int_dev, int *port)
+void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
+ unsigned long param)
{
- struct mlx4_priv *priv;
- struct mlx4_device_context *dev_ctx;
- enum mlx4_query_reply r;
- unsigned long flags;
-
- mutex_lock(&intf_mutex);
-
- list_for_each_entry(priv, &dev_list, dev_list) {
- spin_lock_irqsave(&priv->ctx_lock, flags);
- list_for_each_entry(dev_ctx, &priv->ctx_list, list) {
- if (!dev_ctx->intf->query)
- continue;
- r = dev_ctx->intf->query(dev_ctx->context, int_dev);
- if (r != MLX4_QUERY_NOT_MINE) {
- *port = r;
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
- mutex_unlock(&intf_mutex);
- return &priv->dev;
- }
- }
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
- }
-
- mutex_unlock(&intf_mutex);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(mlx4_query_interface);
-
-void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port)
-{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_device_context *dev_ctx;
unsigned long flags;
@@ -152,7 +126,7 @@
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf->event)
- dev_ctx->intf->event(dev, dev_ctx->context, type, port);
+ dev_ctx->intf->event(dev, dev_ctx->context, type, param);
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
@@ -169,7 +143,8 @@
mlx4_add_device(intf, priv);
mutex_unlock(&intf_mutex);
- mlx4_start_catas_poll(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_start_catas_poll(dev);
return 0;
}
@@ -179,18 +154,19 @@
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
- mlx4_stop_catas_poll(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_stop_catas_poll(dev);
mutex_lock(&intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx4_remove_device(intf, priv);
- list_del(&priv->dev_list);
+ list_del_init(&priv->dev_list);
mutex_unlock(&intf_mutex);
}
-void *mlx4_find_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port)
+void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_device_context *dev_ctx;
@@ -200,13 +176,13 @@
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
- if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_prot_dev) {
- result = dev_ctx->intf->get_prot_dev(dev, dev_ctx->context, port);
+ if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_dev) {
+ result = dev_ctx->intf->get_dev(dev, dev_ctx->context, port);
break;
- }
+ }
spin_unlock_irqrestore(&priv->ctx_lock, flags);
return result;
}
-
+EXPORT_SYMBOL_GPL(mlx4_get_protocol_dev);
Modified: trunk/sys/ofed/drivers/net/mlx4/main.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/main.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/main.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,12 +33,21 @@
* SOFTWARE.
*/
+#include <linux/kmod.h>
+/*
+ * kmod.h must be included before module.h since it includes (indirectly) sys/module.h
+ * To use the FBSD macro sys/module.h should define MODULE_VERSION before linux/module does.
+*/
#include <linux/module.h>
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
+#include <linux/slab.h>
#include <linux/io-mapping.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/fs.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
@@ -46,11 +55,9 @@
#include "mlx4.h"
#include "fw.h"
#include "icm.h"
+#include "mlx4_stats.h"
-MODULE_AUTHOR("Roland Dreier");
-MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
+/* Mellanox ConnectX HCA low-level driver */
struct workqueue_struct *mlx4_wq;
@@ -62,15 +69,11 @@
#endif /* CONFIG_MLX4_DEBUG */
-int mlx4_blck_lb=1;
-module_param_named(block_loopback, mlx4_blck_lb, int, 0644);
-MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0");
-
#ifdef CONFIG_PCI_MSI
static int msi_x = 1;
module_param(msi_x, int, 0444);
-MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
+MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)");
#else /* CONFIG_PCI_MSI */
@@ -78,124 +81,483 @@
#endif /* CONFIG_PCI_MSI */
-static char mlx4_version[] __devinitdata =
- DRV_NAME ": Mellanox ConnectX core driver v"
- DRV_VERSION " (" DRV_RELDATE ")\n";
+static int enable_sys_tune = 0;
+module_param(enable_sys_tune, int, 0444);
+MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)");
-struct mutex drv_mutex;
+int mlx4_blck_lb = 1;
+module_param_named(block_loopback, mlx4_blck_lb, int, 0644);
+MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 "
+ "(default: 1)");
+enum {
+ DEFAULT_DOMAIN = 0,
+ BDF_STR_SIZE = 8, /* bb:dd.f- */
+ DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */
+};
-static struct mlx4_profile default_profile = {
- .num_qp = 1 << 18,
- .num_srq = 1 << 16,
- .rdmarc_per_qp = 1 << 4,
- .num_cq = 1 << 16,
- .num_mcg = 1 << 13,
- .num_mpt = 1 << 19,
- .num_mtt = 1 << 20,
+enum {
+ NUM_VFS,
+ PROBE_VF,
+ PORT_TYPE_ARRAY
};
-static int log_num_mac = 2;
+enum {
+ VALID_DATA,
+ INVALID_DATA,
+ INVALID_STR
+};
+
+struct param_data {
+ int id;
+ struct mlx4_dbdf2val_lst dbdf2val;
+};
+
+static struct param_data num_vfs = {
+ .id = NUM_VFS,
+ .dbdf2val = {
+ .name = "num_vfs param",
+ .num_vals = 1,
+ .def_val = {0},
+ .range = {0, MLX4_MAX_NUM_VF}
+ }
+};
+module_param_string(num_vfs, num_vfs.dbdf2val.str,
+ sizeof(num_vfs.dbdf2val.str), 0444);
+MODULE_PARM_DESC(num_vfs,
+ "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n"
+ "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n"
+ "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15).");
+
+static struct param_data probe_vf = {
+ .id = PROBE_VF,
+ .dbdf2val = {
+ .name = "probe_vf param",
+ .num_vals = 1,
+ .def_val = {0},
+ .range = {0, MLX4_MAX_NUM_VF}
+ }
+};
+module_param_string(probe_vf, probe_vf.dbdf2val.str,
+ sizeof(probe_vf.dbdf2val.str), 0444);
+MODULE_PARM_DESC(probe_vf,
+ "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n"
+ "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n"
+ "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13).");
+
+int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+
+module_param_named(log_num_mgm_entry_size,
+ mlx4_log_num_mgm_entry_size, int, 0444);
+MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
+ " of qp per mcg, for example:"
+ " 10 gives 248.range: 7 <="
+ " log_num_mgm_entry_size <= 12."
+ " To activate device managed"
+ " flow steering when available, set to -1");
+
+static int high_rate_steer;
+module_param(high_rate_steer, int, 0444);
+MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate"
+ " (default off)");
+
+static int fast_drop;
+module_param_named(fast_drop, fast_drop, int, 0444);
+MODULE_PARM_DESC(fast_drop,
+ "Enable fast packet drop when no recieve WQEs are posted");
+
+int mlx4_enable_64b_cqe_eqe = 1;
+module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644);
+MODULE_PARM_DESC(enable_64b_cqe_eqe,
+ "Enable 64 byte CQEs/EQEs when the the FW supports this if non-zero (default: 1)");
+
+#define HCA_GLOBAL_CAP_MASK 0
+
+#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE
+
+static char mlx4_version[] __devinitdata =
+ DRV_NAME ": Mellanox ConnectX VPI driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static int log_num_mac = 7;
module_param_named(log_num_mac, log_num_mac, int, 0444);
MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
-static int use_prio;
-module_param_named(use_prio, use_prio, bool, 0444);
-MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
- "(0/1, default 0)");
+static int log_num_vlan;
+module_param_named(log_num_vlan, log_num_vlan, int, 0444);
+MODULE_PARM_DESC(log_num_vlan,
+ "(Obsolete) Log2 max number of VLANs per ETH port (0-7)");
+/* Log2 max number of VLANs per ETH port (0-7) */
+#define MLX4_LOG_NUM_VLANS 7
-static struct mlx4_profile mod_param_profile = { 0 };
+int log_mtts_per_seg = ilog2(1);
+module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
+ "(0-7) (default: 0)");
+static struct param_data port_type_array = {
+ .id = PORT_TYPE_ARRAY,
+ .dbdf2val = {
+ .name = "port_type_array param",
+ .num_vals = 2,
+ .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH},
+ .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA}
+ }
+};
+module_param_string(port_type_array, port_type_array.dbdf2val.str,
+ sizeof(port_type_array.dbdf2val.str), 0444);
+MODULE_PARM_DESC(port_type_array,
+ "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n"
+ "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n"
+ "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n"
+ "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4').");
+
+
+struct mlx4_port_config {
+ struct list_head list;
+ enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
+ struct pci_dev *pdev;
+};
+
+#define MLX4_LOG_NUM_MTT 20
+/* We limit to 30 as of a bit map issue which uses int and not uint.
+ see mlx4_buddy_init -> bitmap_zero which gets int.
+*/
+#define MLX4_MAX_LOG_NUM_MTT 30
+static struct mlx4_profile mod_param_profile = {
+ .num_qp = 19,
+ .num_srq = 16,
+ .rdmarc_per_qp = 4,
+ .num_cq = 16,
+ .num_mcg = 13,
+ .num_mpt = 19,
+ .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */
+};
+
module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444);
-MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA");
+MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)");
module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444);
-MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA");
+MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA "
+ "(default: 16)");
-module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 0444);
-MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP");
+module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int,
+ 0444);
+MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP "
+ "(default: 4)");
module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444);
-MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA");
+MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)");
module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444);
-MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA");
+MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA "
+ "(default: 13)");
module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444);
MODULE_PARM_DESC(log_num_mpt,
- "log maximum number of memory protection table entries per HCA");
+ "log maximum number of memory protection table entries per "
+ "HCA (default: 19)");
-module_param_named(log_num_mtt, mod_param_profile.num_mtt, int, 0444);
+module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444);
MODULE_PARM_DESC(log_num_mtt,
- "log maximum number of memory translation table segments per HCA");
+ "log maximum number of memory translation table segments per "
+ "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))");
-static int log_mtts_per_seg = 0;
-module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
-MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
+enum {
+ MLX4_IF_STATE_BASIC,
+ MLX4_IF_STATE_EXTENDED
+};
-static void process_mod_param_profile(void)
+static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn)
{
- default_profile.num_qp = (mod_param_profile.num_qp ?
- 1 << mod_param_profile.num_qp :
- default_profile.num_qp);
- default_profile.num_srq = (mod_param_profile.num_srq ?
- 1 << mod_param_profile.num_srq :
- default_profile.num_srq);
- default_profile.rdmarc_per_qp = (mod_param_profile.rdmarc_per_qp ?
- 1 << mod_param_profile.rdmarc_per_qp :
- default_profile.rdmarc_per_qp);
- default_profile.num_cq = (mod_param_profile.num_cq ?
- 1 << mod_param_profile.num_cq :
- default_profile.num_cq);
- default_profile.num_mcg = (mod_param_profile.num_mcg ?
- 1 << mod_param_profile.num_mcg :
- default_profile.num_mcg);
- default_profile.num_mpt = (mod_param_profile.num_mpt ?
- 1 << mod_param_profile.num_mpt :
- default_profile.num_mpt);
- default_profile.num_mtt = (mod_param_profile.num_mtt ?
- 1 << mod_param_profile.num_mtt :
- default_profile.num_mtt);
+ return (domain << 20) | (bus << 12) | (dev << 4) | fn;
}
-struct mlx4_port_config
+static inline void pr_bdf_err(const char *dbdf, const char *pname)
{
- struct list_head list;
- enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
- struct pci_dev *pdev;
-};
-static LIST_HEAD(config_list);
+ pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname);
+}
-static void mlx4_config_cleanup(void)
+static inline void pr_val_err(const char *dbdf, const char *pname,
+ const char *val)
{
- struct mlx4_port_config *config, *tmp;
+ pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n"
+ , val, dbdf, pname);
+}
- list_for_each_entry_safe(config, tmp, &config_list, list) {
- list_del(&config->list);
- kfree(config);
+static inline void pr_out_of_range_bdf(const char *dbdf, int val,
+ struct mlx4_dbdf2val_lst *dbdf2val)
+{
+ pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n"
+ , val, dbdf, dbdf2val->name , dbdf2val->range.min,
+ dbdf2val->range.max);
+}
+
+static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val)
+{
+ pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n"
+ , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max);
+}
+
+static inline int is_in_range(int val, struct mlx4_range *r)
+{
+ return (val >= r->min && val <= r->max);
+}
+
+static int update_defaults(struct param_data *pdata)
+{
+ long int val[MLX4_MAX_BDF_VALS];
+ int ret;
+ char *t, *p = pdata->dbdf2val.str;
+ char sval[32];
+ int val_len;
+
+ if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';'))
+ return INVALID_STR;
+
+ switch (pdata->id) {
+ case PORT_TYPE_ARRAY:
+ t = strchr(p, ',');
+ if (!t || t == p || (t - p) > sizeof(sval))
+ return INVALID_STR;
+
+ val_len = t - p;
+ strncpy(sval, p, val_len);
+ sval[val_len] = 0;
+
+ ret = kstrtol(sval, 0, &val[0]);
+ if (ret == -EINVAL)
+ return INVALID_STR;
+ if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) {
+ pr_out_of_range(&pdata->dbdf2val);
+ return INVALID_DATA;
+ }
+
+ ret = kstrtol(t + 1, 0, &val[1]);
+ if (ret == -EINVAL)
+ return INVALID_STR;
+ if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) {
+ pr_out_of_range(&pdata->dbdf2val);
+ return INVALID_DATA;
+ }
+
+ pdata->dbdf2val.tbl[0].val[0] = val[0];
+ pdata->dbdf2val.tbl[0].val[1] = val[1];
+ break;
+
+ case NUM_VFS:
+ case PROBE_VF:
+ ret = kstrtol(p, 0, &val[0]);
+ if (ret == -EINVAL)
+ return INVALID_STR;
+ if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) {
+ pr_out_of_range(&pdata->dbdf2val);
+ return INVALID_DATA;
+ }
+ pdata->dbdf2val.tbl[0].val[0] = val[0];
+ break;
}
+ pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL;
+
+ return VALID_DATA;
}
-void *mlx4_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port)
+int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst)
{
- return mlx4_find_get_prot_dev(dev, proto, port);
+ int domain, bus, dev, fn;
+ u64 dbdf;
+ char *p, *t, *v;
+ char tmp[32];
+ char sbdf[32];
+ char sep = ',';
+ int j, k, str_size, i = 1;
+ int prfx_size;
+
+ p = dbdf2val_lst->str;
+
+ for (j = 0; j < dbdf2val_lst->num_vals; j++)
+ dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j];
+ dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL;
+
+ str_size = strlen(dbdf2val_lst->str);
+
+ if (str_size == 0)
+ return 0;
+
+ while (strlen(p)) {
+ prfx_size = BDF_STR_SIZE;
+ sbdf[prfx_size] = 0;
+ strncpy(sbdf, p, prfx_size);
+ domain = DEFAULT_DOMAIN;
+ if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) {
+ prfx_size = DBDF_STR_SIZE;
+ sbdf[prfx_size] = 0;
+ strncpy(sbdf, p, prfx_size);
+ if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus,
+ &dev, &fn) != 4) {
+ pr_bdf_err(sbdf, dbdf2val_lst->name);
+ goto err;
+ }
+ sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev,
+ fn);
+ } else {
+ sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn);
+ }
+
+ if (strnicmp(sbdf, tmp, sizeof(tmp))) {
+ pr_bdf_err(sbdf, dbdf2val_lst->name);
+ goto err;
+ }
+
+ dbdf = dbdf_to_u64(domain, bus, dev, fn);
+
+ for (j = 1; j < i; j++)
+ if (dbdf2val_lst->tbl[j].dbdf == dbdf) {
+ pr_warn("mlx4_core: in '%s', %s appears multiple times\n"
+ , dbdf2val_lst->name, sbdf);
+ goto err;
+ }
+
+ if (i >= MLX4_DEVS_TBL_SIZE) {
+ pr_warn("mlx4_core: Too many devices in '%s'\n"
+ , dbdf2val_lst->name);
+ goto err;
+ }
+
+ p += prfx_size;
+ t = strchr(p, sep);
+ t = t ? t : p + strlen(p);
+ if (p >= t) {
+ pr_val_err(sbdf, dbdf2val_lst->name, "");
+ goto err;
+ }
+
+ for (k = 0; k < dbdf2val_lst->num_vals; k++) {
+ char sval[32];
+ long int val;
+ int ret, val_len;
+ char vsep = ';';
+
+ v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep);
+ if (!v || v > t || v == p || (v - p) > sizeof(sval)) {
+ pr_val_err(sbdf, dbdf2val_lst->name, p);
+ goto err;
+ }
+ val_len = v - p;
+ strncpy(sval, p, val_len);
+ sval[val_len] = 0;
+
+ ret = kstrtol(sval, 0, &val);
+ if (ret) {
+ if (strchr(p, vsep))
+ pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n"
+ , sbdf, dbdf2val_lst->name);
+ else
+ pr_val_err(sbdf, dbdf2val_lst->name,
+ sval);
+ goto err;
+ }
+ if (!is_in_range(val, &dbdf2val_lst->range)) {
+ pr_out_of_range_bdf(sbdf, val, dbdf2val_lst);
+ goto err;
+ }
+
+ dbdf2val_lst->tbl[i].val[k] = val;
+ p = v;
+ if (p[0] == vsep)
+ p++;
+ }
+
+ dbdf2val_lst->tbl[i].dbdf = dbdf;
+ if (strlen(p)) {
+ if (p[0] != sep) {
+ pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n"
+ , sep, p, dbdf2val_lst->name);
+ goto err;
+ }
+ p++;
+ }
+ i++;
+ if (i < MLX4_DEVS_TBL_SIZE)
+ dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL;
+ }
+
+ return 0;
+
+err:
+ dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL;
+ pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n"
+ , dbdf2val_lst->name);
+
+ return -EINVAL;
}
-EXPORT_SYMBOL(mlx4_get_prot_dev);
+EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl);
-void mlx4_set_iboe_counter(struct mlx4_dev *dev, int index, u8 port)
+int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx,
+ int *val)
{
- struct mlx4_priv *priv = mlx4_priv(dev);
+ u64 dbdf;
+ int i = 1;
- priv->iboe_counter_index[port - 1] = index;
+ *val = tbl[0].val[idx];
+ if (!pdev)
+ return -EINVAL;
+
+ dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev),
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+ while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) {
+ if (tbl[i].dbdf == dbdf) {
+ *val = tbl[i].val[idx];
+ return 0;
+ }
+ i++;
+ }
+
+ return 0;
}
-EXPORT_SYMBOL(mlx4_set_iboe_counter);
+EXPORT_SYMBOL(mlx4_get_val);
-int mlx4_get_iboe_counter(struct mlx4_dev *dev, u8 port)
+static void process_mod_param_profile(struct mlx4_profile *profile)
{
- struct mlx4_priv *priv = mlx4_priv(dev);
+ vm_size_t hwphyssz;
+ hwphyssz = 0;
+ TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz);
- return priv->iboe_counter_index[port - 1];
+ profile->num_qp = 1 << mod_param_profile.num_qp;
+ profile->num_srq = 1 << mod_param_profile.num_srq;
+ profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp;
+ profile->num_cq = 1 << mod_param_profile.num_cq;
+ profile->num_mcg = 1 << mod_param_profile.num_mcg;
+ profile->num_mpt = 1 << mod_param_profile.num_mpt;
+ /*
+ * We want to scale the number of MTTs with the size of the
+ * system memory, since it makes sense to register a lot of
+ * memory on a system with a lot of memory. As a heuristic,
+ * make sure we have enough MTTs to register twice the system
+ * memory (with PAGE_SIZE entries).
+ *
+ * This number has to be a power of two and fit into 32 bits
+ * due to device limitations. We cap this at 2^30 as of bit map
+ * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero)
+ * That limits us to 4TB of memory registration per HCA with
+ * 4KB pages, which is probably OK for the next few months.
+ */
+ if (mod_param_profile.num_mtt_segs)
+ profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs;
+ else {
+ profile->num_mtt_segs =
+ roundup_pow_of_two(max_t(unsigned,
+ 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg),
+ min(1UL <<
+ (MLX4_MAX_LOG_NUM_MTT -
+ log_mtts_per_seg),
+ (hwphyssz << 1)
+ >> log_mtts_per_seg)));
+ /* set the actual value, so it will be reflected to the user
+ using the sysfs */
+ mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs);
+ }
}
-EXPORT_SYMBOL(mlx4_get_iboe_counter);
int mlx4_check_port_params(struct mlx4_dev *dev,
enum mlx4_port_type *port_type)
@@ -209,9 +571,6 @@
"on this HCA, aborting.\n");
return -EINVAL;
}
- if (port_type[i] == MLX4_PORT_TYPE_ETH &&
- port_type[i + 1] == MLX4_PORT_TYPE_IB)
- return -EINVAL;
}
}
@@ -233,19 +592,6 @@
dev->caps.port_mask[i] = dev->caps.port_type[i];
}
-static u8 get_counters_mode(u64 flags)
-{
- switch (flags >> 48 & 3) {
- case 2:
- case 3:
- return MLX4_CUNTERS_EXT;
- case 1:
- return MLX4_CUNTERS_BASIC;
- default:
- return MLX4_CUNTERS_DISABLED;
- }
-}
-
static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
int err;
@@ -260,7 +606,7 @@
if (dev_cap->min_page_sz > PAGE_SIZE) {
mlx4_err(dev, "HCA minimum page size of %d bigger than "
"kernel PAGE_SIZE of %d, aborting.\n",
- dev_cap->min_page_sz, PAGE_SIZE);
+ dev_cap->min_page_sz, (int)PAGE_SIZE);
return -ENODEV;
}
if (dev_cap->num_ports > MLX4_MAX_PORTS) {
@@ -279,9 +625,14 @@
}
dev->caps.num_ports = dev_cap->num_ports;
+ dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM;
for (i = 1; i <= dev->caps.num_ports; ++i) {
dev->caps.vl_cap[i] = dev_cap->max_vl[i];
dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i];
+ dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i];
+ dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i];
+ /* set gid and pkey table operating lengths by default
+ * to non-sriov values */
dev->caps.gid_table_len[i] = dev_cap->max_gids[i];
dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
@@ -288,6 +639,8 @@
dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i];
dev->caps.def_mac[i] = dev_cap->def_mac[i];
dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
+ dev->caps.suggested_type[i] = dev_cap->suggested_type[i];
+ dev->caps.default_sense[i] = dev_cap->default_sense[i];
dev->caps.trans_type[i] = dev_cap->trans_type[i];
dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i];
dev->caps.wavelength[i] = dev_cap->wavelength[i];
@@ -294,6 +647,7 @@
dev->caps.trans_code[i] = dev_cap->trans_code[i];
}
+ dev->caps.uar_page_size = PAGE_SIZE;
dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE;
dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
dev->caps.bf_reg_size = dev_cap->bf_reg_size;
@@ -307,52 +661,112 @@
dev->caps.reserved_srqs = dev_cap->reserved_srqs;
dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz;
dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz;
- dev->caps.num_qp_per_mgm = MLX4_QP_PER_MGM;
/*
* Subtract 1 from the limit because we need to allocate a
- * spare CQE so the HCA HW can tell the difference between an
- * empty CQ and a full CQ.
+ * spare CQE to enable resizing the CQ
*/
dev->caps.max_cqes = dev_cap->max_cq_sz - 1;
dev->caps.reserved_cqs = dev_cap->reserved_cqs;
dev->caps.reserved_eqs = dev_cap->reserved_eqs;
- dev->caps.mtts_per_seg = 1 << log_mtts_per_seg;
- dev->caps.reserved_mtts = DIV_ROUND_UP(dev_cap->reserved_mtts,
- dev->caps.mtts_per_seg);
+ dev->caps.reserved_mtts = dev_cap->reserved_mtts;
dev->caps.reserved_mrws = dev_cap->reserved_mrws;
- dev->caps.reserved_uars = dev_cap->reserved_uars;
+
+ /* The first 128 UARs are used for EQ doorbells */
+ dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars);
dev->caps.reserved_pds = dev_cap->reserved_pds;
- dev->caps.mtt_entry_sz = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
+ dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+ dev_cap->reserved_xrcds : 0;
+ dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+ dev_cap->max_xrcds : 0;
+ dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz;
+
dev->caps.max_msg_sz = dev_cap->max_msg_sz;
dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1);
dev->caps.flags = dev_cap->flags;
+ dev->caps.flags2 = dev_cap->flags2;
dev->caps.bmme_flags = dev_cap->bmme_flags;
dev->caps.reserved_lkey = dev_cap->reserved_lkey;
dev->caps.stat_rate_support = dev_cap->stat_rate_support;
- dev->caps.udp_rss = dev_cap->udp_rss;
- dev->caps.loopback_support = dev_cap->loopback_support;
- dev->caps.wol = dev_cap->wol;
+ dev->caps.cq_timestamp = dev_cap->timestamp_support;
dev->caps.max_gso_sz = dev_cap->max_gso_sz;
- dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
- dev_cap->reserved_xrcds : 0;
- dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
- dev_cap->max_xrcds : 0;
+ dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz;
+ /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
+ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
+ dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
+ /* Don't do sense port on multifunction devices (for now at least) */
+ if (mlx4_is_mfunc(dev))
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
+
dev->caps.log_num_macs = log_num_mac;
- dev->caps.log_num_prios = use_prio ? 3 : 0;
+ dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
+ dev->caps.fast_drop = fast_drop ?
+ !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) :
+ 0;
+
for (i = 1; i <= dev->caps.num_ports; ++i) {
dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
if (dev->caps.supported_type[i]) {
- if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH)
+ /* if only ETH is supported - assign ETH */
+ if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
+ dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+ /* if only IB is supported, assign IB */
+ else if (dev->caps.supported_type[i] ==
+ MLX4_PORT_TYPE_IB)
dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
- else
- dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+ else {
+ /*
+ * if IB and ETH are supported, we set the port
+ * type according to user selection of port type;
+ * if there is no user selection, take the FW hint
+ */
+ int pta;
+ mlx4_get_val(port_type_array.dbdf2val.tbl,
+ pci_physfn(dev->pdev), i - 1,
+ &pta);
+ if (pta == MLX4_PORT_TYPE_NONE) {
+ dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
+ MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
+ } else if (pta == MLX4_PORT_TYPE_NA) {
+ mlx4_err(dev, "Port %d is valid port. "
+ "It is not allowed to configure its type to N/A(%d)\n",
+ i, MLX4_PORT_TYPE_NA);
+ return -EINVAL;
+ } else {
+ dev->caps.port_type[i] = pta;
+ }
+ }
}
- dev->caps.possible_type[i] = dev->caps.port_type[i];
+ /*
+ * Link sensing is allowed on the port if 3 conditions are true:
+ * 1. Both protocols are supported on the port.
+ * 2. Different types are supported on the port
+ * 3. FW declared that it supports link sensing
+ */
mlx4_priv(dev)->sense.sense_allowed[i] =
- dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
+ ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
+ /* Disablling auto sense for default Eth ports support */
+ mlx4_priv(dev)->sense.sense_allowed[i] = 0;
+
+ /*
+ * If "default_sense" bit is set, we move the port to "AUTO" mode
+ * and perform sense_port FW command to try and set the correct
+ * port type from beginning
+ */
+ if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
+ enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
+ dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
+ mlx4_SENSE_PORT(dev, i, &sensed_port);
+ if (sensed_port != MLX4_PORT_TYPE_NONE)
+ dev->caps.port_type[i] = sensed_port;
+ } else {
+ dev->caps.possible_type[i] = dev->caps.port_type[i];
+ }
+
if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
dev->caps.log_num_macs = dev_cap->log_max_macs[i];
mlx4_warn(dev, "Requested number of MACs is too much "
@@ -359,12 +773,21 @@
"for port %d, reducing to %d.\n",
i, 1 << dev->caps.log_num_macs);
}
- dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
+ if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
+ dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
+ mlx4_warn(dev, "Requested number of VLANs is too much "
+ "for port %d, reducing to %d.\n",
+ i, 1 << dev->caps.log_num_vlans);
+ }
}
- dev->caps.counters_mode = get_counters_mode(dev_cap->flags);
- dev->caps.max_basic_counters = 1 << ilog2(dev_cap->max_basic_counters);
- dev->caps.max_ext_counters = 1 << ilog2(dev_cap->max_ext_counters);
+ dev->caps.max_basic_counters = dev_cap->max_basic_counters;
+ dev->caps.max_extended_counters = dev_cap->max_extended_counters;
+ /* support extended counters if available */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT)
+ dev->caps.max_counters = dev->caps.max_extended_counters;
+ else
+ dev->caps.max_counters = dev->caps.max_basic_counters;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
@@ -371,42 +794,336 @@
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
(1 << dev->caps.log_num_macs) *
(1 << dev->caps.log_num_vlans) *
- (1 << dev->caps.log_num_prios) *
dev->caps.num_ports;
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
- dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR];
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
+ dev->caps.sync_qp = dev_cap->sync_qp;
+ if (dev->pdev->device == 0x1003)
+ dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO;
+
+ dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
+
+ if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
+ if (dev_cap->flags &
+ (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
+ mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+ }
+ }
+
+ if ((dev->caps.flags &
+ (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
+ mlx4_is_master(dev))
+ dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
+
+ if (!mlx4_is_slave(dev)) {
+ for (i = 0; i < dev->caps.num_ports; ++i)
+ dev->caps.def_counter_index[i] = i << 1;
+ }
+
return 0;
}
-
-static int mlx4_save_config(struct mlx4_dev *dev)
+/*The function checks if there are live vf, return the num of them*/
+static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
{
- struct mlx4_port_config *config;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state;
int i;
+ int ret = 0;
- list_for_each_entry(config, &config_list, list) {
- if (config->pdev == dev->pdev) {
- for (i = 1; i <= dev->caps.num_ports; i++)
- config->port_type[i] = dev->caps.possible_type[i];
- return 0;
+ for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
+ s_state = &priv->mfunc.master.slave_state[i];
+ if (s_state->active && s_state->last_cmd !=
+ MLX4_COMM_CMD_RESET) {
+ mlx4_warn(dev, "%s: slave: %d is still active\n",
+ __func__, i);
+ ret++;
}
}
+ return ret;
+}
- config = kmalloc(sizeof(struct mlx4_port_config), GFP_KERNEL);
- if (!config)
- return -ENOMEM;
+int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
+{
+ u32 qk = MLX4_RESERVED_QKEY_BASE;
- config->pdev = dev->pdev;
- for (i = 1; i <= dev->caps.num_ports; i++)
- config->port_type[i] = dev->caps.possible_type[i];
+ if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
+ qpn < dev->phys_caps.base_proxy_sqpn)
+ return -EINVAL;
- list_add_tail(&config->list, &config_list);
+ if (qpn >= dev->phys_caps.base_tunnel_sqpn)
+ /* tunnel qp */
+ qk += qpn - dev->phys_caps.base_tunnel_sqpn;
+ else
+ qk += qpn - dev->phys_caps.base_proxy_sqpn;
+ *qkey = qk;
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_get_parav_qkey);
+void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ priv->virt2phys_pkey[slave][port - 1][i] = val;
+}
+EXPORT_SYMBOL(mlx4_sync_pkey_table);
+
+void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ priv->slave_node_guids[slave] = guid;
+}
+EXPORT_SYMBOL(mlx4_put_slave_node_guid);
+
+__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return 0;
+
+ return priv->slave_node_guids[slave];
+}
+EXPORT_SYMBOL(mlx4_get_slave_node_guid);
+
+int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_slave;
+
+ if (!mlx4_is_master(dev))
+ return 0;
+
+ s_slave = &priv->mfunc.master.slave_state[slave];
+ return !!s_slave->active;
+}
+EXPORT_SYMBOL(mlx4_is_slave_active);
+
+static void slave_adjust_steering_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap,
+ struct mlx4_init_hca_param *hca_param)
+{
+ dev->caps.steering_mode = hca_param->steering_mode;
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
+ else
+ dev->caps.num_qp_per_mgm =
+ 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
+
+ mlx4_dbg(dev, "Steering mode is: %s\n",
+ mlx4_steering_mode_str(dev->caps.steering_mode));
+}
+
+static int mlx4_slave_cap(struct mlx4_dev *dev)
+{
+ int err;
+ u32 page_size;
+ struct mlx4_dev_cap dev_cap;
+ struct mlx4_func_cap func_cap;
+ struct mlx4_init_hca_param hca_param;
+ int i;
+
+ memset(&hca_param, 0, sizeof(hca_param));
+ err = mlx4_QUERY_HCA(dev, &hca_param);
+ if (err) {
+ mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
+ return err;
+ }
+
+ /*fail if the hca has an unknown capability */
+ if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
+ HCA_GLOBAL_CAP_MASK) {
+ mlx4_err(dev, "Unknown hca global capabilities\n");
+ return -ENOSYS;
+ }
+
+ mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
+
+ dev->caps.hca_core_clock = hca_param.hca_core_clock;
+
+ memset(&dev_cap, 0, sizeof(dev_cap));
+ dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
+ err = mlx4_dev_cap(dev, &dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+ return err;
+ }
+
+ err = mlx4_QUERY_FW(dev);
+ if (err)
+ mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n");
+
+ if (!hca_param.mw_enable) {
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW;
+ dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN;
+ }
+
+ page_size = ~dev->caps.page_size_cap + 1;
+ mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
+ if (page_size > PAGE_SIZE) {
+ mlx4_err(dev, "HCA minimum page size of %d bigger than "
+ "kernel PAGE_SIZE of %d, aborting.\n",
+ page_size, (int)PAGE_SIZE);
+ return -ENODEV;
+ }
+
+ /* slave gets uar page size from QUERY_HCA fw command */
+ dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
+
+ /* TODO: relax this assumption */
+ if (dev->caps.uar_page_size != PAGE_SIZE) {
+ mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n",
+ dev->caps.uar_page_size, (int)PAGE_SIZE);
+ return -ENODEV;
+ }
+
+ memset(&func_cap, 0, sizeof(func_cap));
+ err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
+ err);
+ return err;
+ }
+
+ if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
+ PF_CONTEXT_BEHAVIOUR_MASK) {
+ mlx4_err(dev, "Unknown pf context behaviour\n");
+ return -ENOSYS;
+ }
+
+ dev->caps.num_ports = func_cap.num_ports;
+ dev->quotas.qp = func_cap.qp_quota;
+ dev->quotas.srq = func_cap.srq_quota;
+ dev->quotas.cq = func_cap.cq_quota;
+ dev->quotas.mpt = func_cap.mpt_quota;
+ dev->quotas.mtt = func_cap.mtt_quota;
+ dev->caps.num_qps = 1 << hca_param.log_num_qps;
+ dev->caps.num_srqs = 1 << hca_param.log_num_srqs;
+ dev->caps.num_cqs = 1 << hca_param.log_num_cqs;
+ dev->caps.num_mpts = 1 << hca_param.log_mpt_sz;
+ dev->caps.num_eqs = func_cap.max_eq;
+ dev->caps.reserved_eqs = func_cap.reserved_eq;
+ dev->caps.num_pds = MLX4_NUM_PDS;
+ dev->caps.num_mgms = 0;
+ dev->caps.num_amgms = 0;
+
+ if (dev->caps.num_ports > MLX4_MAX_PORTS) {
+ mlx4_err(dev, "HCA has %d ports, but we only support %d, "
+ "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
+ return -ENODEV;
+ }
+
+ dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+
+ if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
+ !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ for (i = 1; i <= dev->caps.num_ports; ++i) {
+ err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
+ " port %d, aborting (%d).\n", i, err);
+ goto err_mem;
+ }
+ dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
+ dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
+ dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
+ dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
+ dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index;
+
+ dev->caps.port_mask[i] = dev->caps.port_type[i];
+ err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
+ &dev->caps.gid_table_len[i],
+ &dev->caps.pkey_table_len[i]);
+ if (err)
+ goto err_mem;
+ }
+
+ if (dev->caps.uar_page_size * (dev->caps.num_uars -
+ dev->caps.reserved_uars) >
+ pci_resource_len(dev->pdev, 2)) {
+ mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
+ "PCI resource 2 size of 0x%llx, aborting.\n",
+ dev->caps.uar_page_size * dev->caps.num_uars,
+ (unsigned long long) pci_resource_len(dev->pdev, 2));
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
+ dev->caps.eqe_size = 64;
+ dev->caps.eqe_factor = 1;
+ } else {
+ dev->caps.eqe_size = 32;
+ dev->caps.eqe_factor = 0;
+ }
+
+ if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
+ dev->caps.cqe_size = 64;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+ } else {
+ dev->caps.cqe_size = 32;
+ }
+
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
+ mlx4_warn(dev, "Timestamping is not supported in slave mode.\n");
+
+ slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
+
return 0;
+
+err_mem:
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
+ dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+
+ return err;
}
+static void mlx4_request_modules(struct mlx4_dev *dev)
+{
+ int port;
+ int has_ib_port = false;
+ int has_eth_port = false;
+#define EN_DRV_NAME "mlx4_en"
+#define IB_DRV_NAME "mlx4_ib"
+
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
+ has_ib_port = true;
+ else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
+ has_eth_port = true;
+ }
+
+ if (has_ib_port)
+ request_module_nowait(IB_DRV_NAME);
+ if (has_eth_port)
+ request_module_nowait(EN_DRV_NAME);
+}
+
/*
* Change the port configuration of the device.
* Every user of this function must hold the port mutex.
@@ -421,16 +1138,15 @@
for (port = 0; port < dev->caps.num_ports; port++) {
/* Change the port type only if the new type is different
* from the current, and not set to Auto */
- if (port_types[port] != dev->caps.port_type[port + 1]) {
+ if (port_types[port] != dev->caps.port_type[port + 1])
change = 1;
- dev->caps.port_type[port + 1] = port_types[port];
- }
}
if (change) {
mlx4_unregister_device(dev);
for (port = 1; port <= dev->caps.num_ports; port++) {
mlx4_CLOSE_PORT(dev, port);
- err = mlx4_SET_PORT(dev, port);
+ dev->caps.port_type[port] = port_types[port - 1];
+ err = mlx4_SET_PORT(dev, port, -1);
if (err) {
mlx4_err(dev, "Failed to set port %d, "
"aborting\n", port);
@@ -438,8 +1154,12 @@
}
}
mlx4_set_port_mask(dev);
- mlx4_save_config(dev);
err = mlx4_register_device(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to register device\n");
+ goto out;
+ }
+ mlx4_request_modules(dev);
}
out:
@@ -490,6 +1210,13 @@
return -EINVAL;
}
+ if ((info->tmp_type & mdev->caps.supported_type[info->port]) !=
+ info->tmp_type) {
+ mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n",
+ info->port);
+ return -EINVAL;
+ }
+
mlx4_stop_sense(mdev);
mutex_lock(&priv->port_mutex);
/* Possible type is always the one that was delivered */
@@ -502,14 +1229,8 @@
types[i] = mdev->caps.port_type[i+1];
}
- if (priv->trig) {
- if (++priv->changed_ports < mdev->caps.num_ports)
- goto out;
- else
- priv->trig = priv->changed_ports = 0;
- }
-
- if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+ if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+ !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
for (i = 1; i <= mdev->caps.num_ports; i++) {
if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
mdev->caps.possible_type[i] = mdev->caps.port_type[i];
@@ -544,27 +1265,140 @@
return err ? err : count;
}
-static ssize_t trigger_port(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+enum ibta_mtu {
+ IB_MTU_256 = 1,
+ IB_MTU_512 = 2,
+ IB_MTU_1024 = 3,
+ IB_MTU_2048 = 4,
+ IB_MTU_4096 = 5
+};
+
+static inline int int_to_ibta_mtu(int mtu)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct mlx4_dev *mdev = pci_get_drvdata(pdev);
- struct mlx4_priv *priv = container_of(mdev, struct mlx4_priv, dev);
+ switch (mtu) {
+ case 256: return IB_MTU_256;
+ case 512: return IB_MTU_512;
+ case 1024: return IB_MTU_1024;
+ case 2048: return IB_MTU_2048;
+ case 4096: return IB_MTU_4096;
+ default: return -1;
+ }
+}
- if (!priv)
- return -ENODEV;
+static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
+{
+ switch (mtu) {
+ case IB_MTU_256: return 256;
+ case IB_MTU_512: return 512;
+ case IB_MTU_1024: return 1024;
+ case IB_MTU_2048: return 2048;
+ case IB_MTU_4096: return 4096;
+ default: return -1;
+ }
+}
+static ssize_t
+show_board(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info,
+ board_attr);
+ struct mlx4_dev *mdev = info->dev;
+
+ return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
+ mdev->board_id);
+}
+
+static ssize_t
+show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info,
+ hca_attr);
+ struct mlx4_dev *mdev = info->dev;
+
+ return sprintf(buf, "MT%d\n", mdev->pdev->device);
+}
+
+static ssize_t
+show_firmware_version(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info,
+ firmware_attr);
+ struct mlx4_dev *mdev = info->dev;
+
+ return sprintf(buf, "%d.%d.%d\n", (int)(mdev->caps.fw_ver >> 32),
+ (int)(mdev->caps.fw_ver >> 16) & 0xffff,
+ (int)mdev->caps.fw_ver & 0xffff);
+}
+
+static ssize_t show_port_ib_mtu(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_mtu_attr);
+ struct mlx4_dev *mdev = info->dev;
+
+ /* When port type is eth, port mtu value isn't used. */
+ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
+ return -EINVAL;
+
+ sprintf(buf, "%d\n",
+ ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
+ return strlen(buf);
+}
+
+static ssize_t set_port_ib_mtu(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_mtu_attr);
+ struct mlx4_dev *mdev = info->dev;
+ struct mlx4_priv *priv = mlx4_priv(mdev);
+ int err, port, mtu, ibta_mtu = -1;
+
+ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
+ mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
+ return -EINVAL;
+ }
+
+ mtu = (int) simple_strtol(buf, NULL, 0);
+ ibta_mtu = int_to_ibta_mtu(mtu);
+
+ if (ibta_mtu < 0) {
+ mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
+ return -EINVAL;
+ }
+
+ mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
+
+ mlx4_stop_sense(mdev);
mutex_lock(&priv->port_mutex);
- priv->trig = 1;
+ mlx4_unregister_device(mdev);
+ for (port = 1; port <= mdev->caps.num_ports; port++) {
+ mlx4_CLOSE_PORT(mdev, port);
+ err = mlx4_SET_PORT(mdev, port, -1);
+ if (err) {
+ mlx4_err(mdev, "Failed to set port %d, "
+ "aborting\n", port);
+ goto err_set_port;
+ }
+ }
+ err = mlx4_register_device(mdev);
+err_set_port:
mutex_unlock(&priv->port_mutex);
- return count;
+ mlx4_start_sense(mdev);
+ return err ? err : count;
}
-DEVICE_ATTR(port_trigger, S_IWUGO, NULL, trigger_port);
static int mlx4_load_fw(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- int err;
+ int err, unmap_flag = 0;
priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
GFP_HIGHUSER | __GFP_NOWARN, 0);
@@ -588,10 +1422,13 @@
return 0;
err_unmap_fa:
- mlx4_UNMAP_FA(dev);
+ unmap_flag = mlx4_UNMAP_FA(dev);
+ if (unmap_flag)
+ pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n");
err_free:
- mlx4_free_icm(dev, priv->fw.fw_icm, 0);
+ if (!unmap_flag)
+ mlx4_free_icm(dev, priv->fw.fw_icm, 0);
return err;
}
@@ -600,6 +1437,7 @@
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
+ int num_eqs;
err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
cmpt_base +
@@ -629,12 +1467,13 @@
if (err)
goto err_srq;
+ num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
+ dev->caps.num_eqs;
err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
cmpt_base +
((u64) (MLX4_CMPT_TYPE_EQ *
cmpt_entry_sz) << MLX4_CMPT_SHIFT),
- cmpt_entry_sz,
- dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
+ cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
if (err)
goto err_cq;
@@ -658,7 +1497,8 @@
{
struct mlx4_priv *priv = mlx4_priv(dev);
u64 aux_pages;
- int err;
+ int num_eqs;
+ int err, unmap_flag = 0;
err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
if (err) {
@@ -689,10 +1529,12 @@
goto err_unmap_aux;
}
+
+ num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
+ dev->caps.num_eqs;
err = mlx4_init_icm_table(dev, &priv->eq_table.table,
init_hca->eqc_base, dev_cap->eqc_entry_sz,
- dev->caps.num_eqs, dev->caps.num_eqs,
- 0, 0);
+ num_eqs, num_eqs, 0, 0);
if (err) {
mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
goto err_unmap_cmpt;
@@ -712,7 +1554,7 @@
err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
init_hca->mtt_base,
dev->caps.mtt_entry_sz,
- dev->caps.num_mtt_segs,
+ dev->caps.num_mtts,
dev->caps.reserved_mtts, 1, 0);
if (err) {
mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
@@ -794,12 +1636,15 @@
}
/*
- * It's not strictly required, but for simplicity just map the
- * whole multicast group table now. The table isn't very big
- * and it's a lot easier than trying to track ref counts.
+ * For flow steering device managed mode it is required to use
+ * mlx4_init_icm_table. For B0 steering mode it's not strictly
+ * required, but for simplicity just map the whole multicast
+ * group table now. The table isn't very big and it's a lot
+ * easier than trying to track ref counts.
*/
err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
- init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
+ init_hca->mc_base,
+ mlx4_get_mgm_entry_size(dev),
dev->caps.num_mgms + dev->caps.num_amgms,
dev->caps.num_mgms + dev->caps.num_amgms,
0, 0);
@@ -844,10 +1689,13 @@
mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
err_unmap_aux:
- mlx4_UNMAP_ICM_AUX(dev);
+ unmap_flag = mlx4_UNMAP_ICM_AUX(dev);
+ if (unmap_flag)
+ pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n");
err_free_aux:
- mlx4_free_icm(dev, priv->fw.aux_icm, 0);
+ if (!unmap_flag)
+ mlx4_free_icm(dev, priv->fw.aux_icm, 0);
return err;
}
@@ -871,10 +1719,22 @@
mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
- mlx4_UNMAP_ICM_AUX(dev);
- mlx4_free_icm(dev, priv->fw.aux_icm, 0);
+ if (!mlx4_UNMAP_ICM_AUX(dev))
+ mlx4_free_icm(dev, priv->fw.aux_icm, 0);
+ else
+ pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n");
}
+static void mlx4_slave_exit(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
+ mlx4_warn(dev, "Failed to close slave function.\n");
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+}
+
static int map_bf_area(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -882,8 +1742,13 @@
resource_size_t bf_len;
int err = 0;
- bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT);
- bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT);
+ if (!dev->caps.bf_reg_size)
+ return -ENXIO;
+
+ bf_start = pci_resource_start(dev->pdev, 2) +
+ (dev->caps.num_uars << PAGE_SHIFT);
+ bf_len = pci_resource_len(dev->pdev, 2) -
+ (dev->caps.num_uars << PAGE_SHIFT);
priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
if (!priv->bf_mapping)
err = -ENOMEM;
@@ -897,112 +1762,406 @@
io_mapping_free(mlx4_priv(dev)->bf_mapping);
}
+int mlx4_read_clock(struct mlx4_dev *dev)
+{
+ u32 clockhi, clocklo, clockhi1;
+ cycle_t cycles;
+ int i;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (!priv->clock_mapping)
+ return -ENOTSUPP;
+
+ for (i = 0; i < 10; i++) {
+ clockhi = swab32(readl(priv->clock_mapping));
+ clocklo = swab32(readl(priv->clock_mapping + 4));
+ clockhi1 = swab32(readl(priv->clock_mapping));
+ if (clockhi == clockhi1)
+ break;
+ }
+
+ cycles = (u64) clockhi << 32 | (u64) clocklo;
+
+ return cycles;
+}
+EXPORT_SYMBOL_GPL(mlx4_read_clock);
+
+
+static int map_internal_clock(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ priv->clock_mapping = ioremap(pci_resource_start(dev->pdev,
+ priv->fw.clock_bar) +
+ priv->fw.clock_offset, MLX4_CLOCK_SIZE);
+
+ if (!priv->clock_mapping)
+ return -ENOMEM;
+
+ return 0;
+}
+
+
+int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
+ struct mlx4_clock_params *params)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (mlx4_is_slave(dev))
+ return -ENOTSUPP;
+ if (!params)
+ return -EINVAL;
+
+ params->bar = priv->fw.clock_bar;
+ params->offset = priv->fw.clock_offset;
+ params->size = MLX4_CLOCK_SIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
+
+static void unmap_internal_clock(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (priv->clock_mapping)
+ iounmap(priv->clock_mapping);
+}
+
static void mlx4_close_hca(struct mlx4_dev *dev)
{
+ unmap_internal_clock(dev);
unmap_bf_area(dev);
- mlx4_CLOSE_HCA(dev, 0);
- mlx4_free_icms(dev);
- mlx4_UNMAP_FA(dev);
- mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+ if (mlx4_is_slave(dev)) {
+ mlx4_slave_exit(dev);
+ } else {
+ mlx4_CLOSE_HCA(dev, 0);
+ mlx4_free_icms(dev);
+
+ if (!mlx4_UNMAP_FA(dev))
+ mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+ else
+ pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n");
+ }
}
+static int mlx4_init_slave(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u64 dma = (u64) priv->mfunc.vhcr_dma;
+ int num_of_reset_retries = NUM_OF_RESET_RETRIES;
+ int ret_from_reset = 0;
+ u32 slave_read;
+ u32 cmd_channel_ver;
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ priv->cmd.max_cmds = 1;
+ mlx4_warn(dev, "Sending reset\n");
+ ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
+ MLX4_COMM_TIME);
+ /* if we are in the middle of flr the slave will try
+ * NUM_OF_RESET_RETRIES times before leaving.*/
+ if (ret_from_reset) {
+ if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
+ msleep(SLEEP_TIME_IN_RESET);
+ while (ret_from_reset && num_of_reset_retries) {
+ mlx4_warn(dev, "slave is currently in the"
+ "middle of FLR. retrying..."
+ "(try num:%d)\n",
+ (NUM_OF_RESET_RETRIES -
+ num_of_reset_retries + 1));
+ ret_from_reset =
+ mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET,
+ 0, MLX4_COMM_TIME);
+ num_of_reset_retries = num_of_reset_retries - 1;
+ }
+ } else
+ goto err;
+ }
+
+ /* check the driver version - the slave I/F revision
+ * must match the master's */
+ slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+ cmd_channel_ver = mlx4_comm_get_version();
+
+ if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
+ MLX4_COMM_GET_IF_REV(slave_read)) {
+ mlx4_err(dev, "slave driver version is not supported"
+ " by the master\n");
+ goto err;
+ }
+
+ mlx4_warn(dev, "Sending vhcr0\n");
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
+ MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
+ MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
+ MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
+ goto err;
+
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return 0;
+
+err:
+ mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return -EIO;
+}
+
+static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
+{
+ int i;
+
+ for (i = 1; i <= dev->caps.num_ports; i++) {
+ if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
+ dev->caps.gid_table_len[i] =
+ mlx4_get_slave_num_gids(dev, 0);
+ else
+ dev->caps.gid_table_len[i] = 1;
+ dev->caps.pkey_table_len[i] =
+ dev->phys_caps.pkey_phys_table_len[i] - 1;
+ }
+}
+
+static int choose_log_fs_mgm_entry_size(int qp_per_entry)
+{
+ int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
+
+ for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
+ i++) {
+ if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
+ break;
+ }
+
+ return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
+}
+
+static void choose_steering_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap)
+{
+ int nvfs;
+
+ mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs);
+ if (high_rate_steer && !mlx4_is_mfunc(dev)) {
+ dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER |
+ MLX4_DEV_CAP_FLAG_VEP_UC_STEER);
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN;
+ }
+
+ if (mlx4_log_num_mgm_entry_size == -1 &&
+ dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
+ (!mlx4_is_mfunc(dev) ||
+ (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) &&
+ choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
+ dev->oper_log_mgm_entry_size =
+ choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
+ dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
+ dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
+ } else {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
+ dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
+ else {
+ dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
+ mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags "
+ "set to use B0 steering. Falling back to A0 steering mode.\n");
+ }
+ dev->oper_log_mgm_entry_size =
+ mlx4_log_num_mgm_entry_size > 0 ?
+ mlx4_log_num_mgm_entry_size :
+ MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+ dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
+ }
+ mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, "
+ "log_num_mgm_entry_size = %d\n",
+ mlx4_steering_mode_str(dev->caps.steering_mode),
+ dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size);
+}
+
static int mlx4_init_hca(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_dev_cap *dev_cap = NULL;
struct mlx4_adapter adapter;
- struct mlx4_dev_cap dev_cap;
struct mlx4_mod_stat_cfg mlx4_cfg;
struct mlx4_profile profile;
struct mlx4_init_hca_param init_hca;
- struct mlx4_port_config *config;
u64 icm_size;
int err;
- int i;
- err = mlx4_QUERY_FW(dev);
- if (err) {
- if (err == -EACCES)
- mlx4_info(dev, "non-primary physical function, skipping.\n");
- else
- mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
- return err;
- }
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_QUERY_FW(dev);
+ if (err) {
+ if (err == -EACCES)
+ mlx4_info(dev, "non-primary physical function, skipping.\n");
+ else
+ mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
+ return err;
+ }
- err = mlx4_load_fw(dev);
- if (err) {
- mlx4_err(dev, "Failed to start FW, aborting.\n");
- return err;
- }
+ err = mlx4_load_fw(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to start FW, aborting.\n");
+ return err;
+ }
- mlx4_cfg.log_pg_sz_m = 1;
- mlx4_cfg.log_pg_sz = 0;
- err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
- if (err)
- mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
+ mlx4_cfg.log_pg_sz_m = 1;
+ mlx4_cfg.log_pg_sz = 0;
+ err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
+ if (err)
+ mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
- err = mlx4_dev_cap(dev, &dev_cap);
- if (err) {
- mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
- goto err_stop_fw;
- }
+ dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL);
+ if (!dev_cap) {
+ mlx4_err(dev, "Failed to allocate memory for dev_cap\n");
+ err = -ENOMEM;
+ goto err_stop_fw;
+ }
- process_mod_param_profile();
- profile = default_profile;
+ err = mlx4_dev_cap(dev, dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+ goto err_stop_fw;
+ }
- list_for_each_entry(config, &config_list, list) {
- if (config->pdev == dev->pdev) {
- for (i = 1; i <= dev->caps.num_ports; i++) {
- dev->caps.possible_type[i] = config->port_type[i];
- if (config->port_type[i] != MLX4_PORT_TYPE_AUTO)
- dev->caps.port_type[i] = config->port_type[i];
+ choose_steering_mode(dev, dev_cap);
+
+ if (mlx4_is_master(dev))
+ mlx4_parav_master_pf_caps(dev);
+
+ process_mod_param_profile(&profile);
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ profile.num_mcg = MLX4_FS_NUM_MCG;
+
+ icm_size = mlx4_make_profile(dev, &profile, dev_cap,
+ &init_hca);
+ if ((long long) icm_size < 0) {
+ err = icm_size;
+ goto err_stop_fw;
+ }
+
+ dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
+
+ init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
+ init_hca.uar_page_sz = PAGE_SHIFT - 12;
+
+ err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size);
+ if (err)
+ goto err_stop_fw;
+
+ init_hca.mw_enable = 1;
+
+ err = mlx4_INIT_HCA(dev, &init_hca);
+ if (err) {
+ mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
+ goto err_free_icm;
+ }
+
+ /*
+ * Read HCA frequency by QUERY_HCA command
+ */
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
+ memset(&init_hca, 0, sizeof(init_hca));
+ err = mlx4_QUERY_HCA(dev, &init_hca);
+ if (err) {
+ mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n");
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
+ } else {
+ dev->caps.hca_core_clock =
+ init_hca.hca_core_clock;
}
+
+ /* In case we got HCA frequency 0 - disable timestamping
+ * to avoid dividing by zero
+ */
+ if (!dev->caps.hca_core_clock) {
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
+ mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported.");
+ } else if (map_internal_clock(dev)) {
+ /* Map internal clock,
+ * in case of failure disable timestamping
+ */
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
+ mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n");
+ }
}
- }
+ } else {
+ err = mlx4_init_slave(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize slave\n");
+ return err;
+ }
- mlx4_set_port_mask(dev);
- icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
- if ((long long) icm_size < 0) {
- err = icm_size;
- goto err_stop_fw;
+ err = mlx4_slave_cap(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to obtain slave caps\n");
+ goto err_close;
+ }
}
if (map_bf_area(dev))
- mlx4_dbg(dev, "Kernel support for blue flame is not available for kernels < 2.6.28\n");
+ mlx4_dbg(dev, "Failed to map blue flame area\n");
- init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
+ /* Only the master set the ports, all the rest got it from it.*/
+ if (!mlx4_is_slave(dev))
+ mlx4_set_port_mask(dev);
- err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
- if (err)
- goto err_stop_fw;
-
- err = mlx4_INIT_HCA(dev, &init_hca);
- if (err) {
- mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
- goto err_free_icm;
- }
-
err = mlx4_QUERY_ADAPTER(dev, &adapter);
if (err) {
mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
- goto err_close;
+ goto unmap_bf;
}
priv->eq_table.inta_pin = adapter.inta_pin;
memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
+ memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd));
+ dev->vsd_vendor_id = adapter.vsd_vendor_id;
+ if (!mlx4_is_slave(dev))
+ kfree(dev_cap);
+
return 0;
+unmap_bf:
+ if (!mlx4_is_slave(dev))
+ unmap_internal_clock(dev);
+ unmap_bf_area(dev);
+
+ if (mlx4_is_slave(dev)) {
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ }
+
err_close:
- mlx4_CLOSE_HCA(dev, 0);
+ if (mlx4_is_slave(dev))
+ mlx4_slave_exit(dev);
+ else
+ mlx4_CLOSE_HCA(dev, 0);
err_free_icm:
- mlx4_free_icms(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_free_icms(dev);
err_stop_fw:
- unmap_bf_area(dev);
- mlx4_UNMAP_FA(dev);
- mlx4_free_icm(dev, priv->fw.fw_icm, 0);
-
+ if (!mlx4_is_slave(dev)) {
+ if (!mlx4_UNMAP_FA(dev))
+ mlx4_free_icm(dev, priv->fw.fw_icm, 0);
+ else
+ pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n");
+ kfree(dev_cap);
+ }
return err;
}
@@ -1009,68 +2168,539 @@
static int mlx4_init_counters_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- int err;
- int nent;
+ int nent_pow2, port_indx, vf_index, num_counters;
+ int res, index = 0;
+ struct counter_index *new_counter_index;
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- nent = dev->caps.max_basic_counters;
- break;
- case MLX4_CUNTERS_EXT:
- nent = dev->caps.max_ext_counters;
- break;
- default:
+
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
return -ENOENT;
+
+ if (!mlx4_is_slave(dev) &&
+ dev->caps.max_counters == dev->caps.max_extended_counters) {
+ res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0,
+ MLX4_CMD_SET_IF_STAT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (res) {
+ mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res);
+ return res;
+ }
}
- err = mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
- if (err)
- return err;
+ mutex_init(&priv->counters_table.mutex);
+
+ if (mlx4_is_slave(dev)) {
+ for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) {
+ INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]);
+ if (dev->caps.def_counter_index[port_indx] != 0xFF) {
+ new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
+ if (!new_counter_index)
+ return -ENOMEM;
+ new_counter_index->index = dev->caps.def_counter_index[port_indx];
+ list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]);
+ }
+ }
+ mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n",
+ __func__, dev->caps.num_ports, dev->caps.num_ports);
+ return 0;
+ }
+
+ nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
+
+ for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) {
+ INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]);
+ /* allocating 2 counters per port for PFs */
+ /* For the PF, the ETH default counters are 0,2; */
+ /* and the RoCE default counters are 1,3 */
+ for (num_counters = 0; num_counters < 2; num_counters++, index++) {
+ new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
+ if (!new_counter_index)
+ return -ENOMEM;
+ new_counter_index->index = index;
+ list_add_tail(&new_counter_index->list,
+ &priv->counters_table.global_port_list[port_indx]);
+ }
+ }
+
+ if (mlx4_is_master(dev)) {
+ for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) {
+ for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) {
+ INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]);
+ new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
+ if (!new_counter_index)
+ return -ENOMEM;
+ if (index < nent_pow2 - 2) {
+ new_counter_index->index = index;
+ index++;
+ } else {
+ new_counter_index->index = MLX4_SINK_COUNTER_INDEX;
+ }
+
+ list_add_tail(&new_counter_index->list,
+ &priv->counters_table.vf_list[vf_index][port_indx]);
+ }
+ }
+
+ res = mlx4_bitmap_init(&priv->counters_table.bitmap,
+ nent_pow2, nent_pow2 - 1,
+ index, 1);
+ mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n",
+ __func__, index, dev->num_vfs);
+ } else {
+ res = mlx4_bitmap_init(&priv->counters_table.bitmap,
+ nent_pow2, nent_pow2 - 1,
+ index, 1);
+ mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n",
+ __func__, index, dev->caps.num_ports);
+ }
+
return 0;
+
}
static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
{
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- case MLX4_CUNTERS_EXT:
- mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
- break;
- default:
- break;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, j;
+ struct counter_index *port, *tmp_port;
+ struct counter_index *vf, *tmp_vf;
+
+ mutex_lock(&priv->counters_table.mutex);
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) {
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ list_for_each_entry_safe(port, tmp_port,
+ &priv->counters_table.global_port_list[i],
+ list) {
+ list_del(&port->list);
+ kfree(port);
+ }
+ }
+ if (!mlx4_is_slave(dev)) {
+ for (i = 0; i < dev->num_vfs; i++) {
+ for (j = 0; j < dev->caps.num_ports; j++) {
+ list_for_each_entry_safe(vf, tmp_vf,
+ &priv->counters_table.vf_list[i][j],
+ list) {
+ /* clear the counter statistic */
+ if (__mlx4_clear_if_stat(dev, vf->index))
+ mlx4_dbg(dev, "%s: reset counter %d failed\n",
+ __func__, vf->index);
+ list_del(&vf->list);
+ kfree(vf);
+ }
+ }
+ }
+ mlx4_bitmap_cleanup(&priv->counters_table.bitmap);
+ }
}
+ mutex_unlock(&priv->counters_table.mutex);
}
-int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
+int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave)
{
struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, first;
+ struct counter_index *vf, *tmp_vf;
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- case MLX4_CUNTERS_EXT:
- *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
- if (*idx == -1)
- return -ENOMEM;
+ /* clean VF's counters for the next useg */
+ if (slave > 0 && slave <= dev->num_vfs) {
+ mlx4_dbg(dev, "%s: free counters of slave(%d)\n"
+ , __func__, slave);
+
+ mutex_lock(&priv->counters_table.mutex);
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ first = 0;
+ list_for_each_entry_safe(vf, tmp_vf,
+ &priv->counters_table.vf_list[slave - 1][i],
+ list) {
+ /* clear the counter statistic */
+ if (__mlx4_clear_if_stat(dev, vf->index))
+ mlx4_dbg(dev, "%s: reset counter %d failed\n",
+ __func__, vf->index);
+ if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) {
+ mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n"
+ , __func__, vf->index, slave, i + 1);
+ mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR);
+ list_del(&vf->list);
+ kfree(vf);
+ } else {
+ mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n"
+ , __func__, vf->index, slave, i + 1);
+ }
+ }
+ }
+ mutex_unlock(&priv->counters_table.mutex);
+ }
+
+ return 0;
+}
+
+int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct counter_index *new_counter_index;
+
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
+ return -ENOENT;
+
+ if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) ||
+ (port < 0) || (port > MLX4_MAX_PORTS)) {
+ mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n",
+ __func__, slave, port);
+ return -EINVAL;
+ }
+
+ /* handle old guest request does not support request by port index */
+ if (port == 0) {
+ *idx = MLX4_SINK_COUNTER_INDEX;
+ mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n"
+ , __func__, *idx, slave, port);
return 0;
- default:
- return -ENOMEM;
}
+
+ mutex_lock(&priv->counters_table.mutex);
+
+ *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap);
+ /* if no resources return the default counter of the slave and port */
+ if (*idx == -1) {
+ if (slave == 0) { /* its the ethernet counter ?????? */
+ new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next,
+ struct counter_index,
+ list);
+ } else {
+ new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next,
+ struct counter_index,
+ list);
+ }
+
+ *idx = new_counter_index->index;
+ mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n"
+ , __func__, *idx, slave, port);
+ goto out;
+ }
+
+ if (slave == 0) { /* native or master */
+ new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
+ if (!new_counter_index)
+ goto no_mem;
+ new_counter_index->index = *idx;
+ list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]);
+ } else {
+ new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
+ if (!new_counter_index)
+ goto no_mem;
+ new_counter_index->index = *idx;
+ list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]);
+ }
+
+ mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n"
+ , __func__, *idx, slave, port);
+out:
+ mutex_unlock(&priv->counters_table.mutex);
+ return 0;
+
+no_mem:
+ mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR);
+ mutex_unlock(&priv->counters_table.mutex);
+ *idx = MLX4_SINK_COUNTER_INDEX;
+ mlx4_dbg(dev, "%s: failed err (%d)\n"
+ , __func__, -ENOMEM);
+ return -ENOMEM;
}
+
+int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx)
+{
+ u64 out_param;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct counter_index *new_counter_index, *c_index;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param,
+ ((u32) port) << 8 | (u32) RES_COUNTER,
+ RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err) {
+ *idx = get_param_l(&out_param);
+ if (*idx == MLX4_SINK_COUNTER_INDEX)
+ return -ENOSPC;
+
+ mutex_lock(&priv->counters_table.mutex);
+ c_index = list_entry(priv->counters_table.global_port_list[port - 1].next,
+ struct counter_index,
+ list);
+ mutex_unlock(&priv->counters_table.mutex);
+ if (c_index->index == *idx)
+ return -EEXIST;
+
+ if (mlx4_is_slave(dev)) {
+ new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
+ if (!new_counter_index) {
+ mlx4_counter_free(dev, port, *idx);
+ return -ENOMEM;
+ }
+ new_counter_index->index = *idx;
+ mutex_lock(&priv->counters_table.mutex);
+ list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]);
+ mutex_unlock(&priv->counters_table.mutex);
+ mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n"
+ , __func__, *idx, port);
+ }
+ }
+ return err;
+ }
+ return __mlx4_counter_alloc(dev, 0, port, idx);
+}
EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
-void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
+void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx)
{
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- case MLX4_CUNTERS_EXT:
- mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
+ /* check if native or slave and deletes acordingly */
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct counter_index *pf, *tmp_pf;
+ struct counter_index *vf, *tmp_vf;
+ int first;
+
+
+ if (idx == MLX4_SINK_COUNTER_INDEX) {
+ mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n"
+ , __func__, idx, port);
+ return;
+ }
+
+ if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) ||
+ (port < 0) || (port > MLX4_MAX_PORTS)) {
+ mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n"
+ , __func__, slave, idx);
+ return;
+ }
+
+ mutex_lock(&priv->counters_table.mutex);
+ if (slave == 0) {
+ first = 0;
+ list_for_each_entry_safe(pf, tmp_pf,
+ &priv->counters_table.global_port_list[port - 1],
+ list) {
+ /* the first 2 counters are reserved */
+ if (pf->index == idx) {
+ /* clear the counter statistic */
+ if (__mlx4_clear_if_stat(dev, pf->index))
+ mlx4_dbg(dev, "%s: reset counter %d failed\n",
+ __func__, pf->index);
+ if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) {
+ list_del(&pf->list);
+ kfree(pf);
+ mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n"
+ , __func__, idx, slave, port);
+ mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR);
+ goto out;
+ } else {
+ mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n"
+ , __func__, idx, slave, port);
+ goto out;
+ }
+ }
+ first++;
+ }
+ mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n"
+ , __func__, idx, slave, port);
+ } else {
+ first = 0;
+ list_for_each_entry_safe(vf, tmp_vf,
+ &priv->counters_table.vf_list[slave - 1][port - 1],
+ list) {
+ /* the first element is reserved */
+ if (vf->index == idx) {
+ /* clear the counter statistic */
+ if (__mlx4_clear_if_stat(dev, vf->index))
+ mlx4_dbg(dev, "%s: reset counter %d failed\n",
+ __func__, vf->index);
+ if (first) {
+ list_del(&vf->list);
+ kfree(vf);
+ mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n",
+ __func__, idx, slave, port);
+ mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR);
+ goto out;
+ } else {
+ mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n"
+ , __func__, slave, idx, port);
+ goto out;
+ }
+ }
+ first++;
+ }
+ mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n"
+ , __func__, slave, idx, port);
+ }
+
+out:
+ mutex_unlock(&priv->counters_table.mutex);
+}
+
+void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx)
+{
+ u64 in_param = 0;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct counter_index *counter, *tmp_counter;
+ int first = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, idx);
+ mlx4_cmd(dev, in_param,
+ ((u32) port) << 8 | (u32) RES_COUNTER,
+ RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+
+ if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) {
+ mutex_lock(&priv->counters_table.mutex);
+ list_for_each_entry_safe(counter, tmp_counter,
+ &priv->counters_table.global_port_list[port - 1],
+ list) {
+ if (counter->index == idx && first++) {
+ list_del(&counter->list);
+ kfree(counter);
+ mlx4_dbg(dev, "%s: delete counter index %d for port %d\n"
+ , __func__, idx, port);
+ mutex_unlock(&priv->counters_table.mutex);
+ return;
+ }
+ }
+ mutex_unlock(&priv->counters_table.mutex);
+ }
+
return;
- default:
- return;
}
+ __mlx4_counter_free(dev, 0, port, idx);
}
EXPORT_SYMBOL_GPL(mlx4_counter_free);
+int __mlx4_clear_if_stat(struct mlx4_dev *dev,
+ u8 counter_index)
+{
+ struct mlx4_cmd_mailbox *if_stat_mailbox = NULL;
+ int err = 0;
+ u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31);
+
+ if (counter_index == MLX4_SINK_COUNTER_INDEX)
+ return -EINVAL;
+
+ if (mlx4_is_slave(dev))
+ return 0;
+
+ if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(if_stat_mailbox)) {
+ err = PTR_ERR(if_stat_mailbox);
+ return err;
+ }
+
+ err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
+ MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
+ return err;
+}
+
+u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct counter_index *new_counter_index;
+
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) {
+ mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n",
+ __func__, MLX4_SINK_COUNTER_INDEX, slave, port);
+ return (u8)MLX4_SINK_COUNTER_INDEX;
+ }
+
+ mutex_lock(&priv->counters_table.mutex);
+ if (slave == 0) {
+ new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next,
+ struct counter_index,
+ list);
+ } else {
+ new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next,
+ struct counter_index,
+ list);
+ }
+ mutex_unlock(&priv->counters_table.mutex);
+
+ mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n",
+ __func__, new_counter_index->index, slave, port);
+
+
+ return (u8)new_counter_index->index;
+}
+
+int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port,
+ struct mlx4_en_vport_stats *vport_stats,
+ int reset)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_mailbox *if_stat_mailbox = NULL;
+ union mlx4_counter *counter;
+ int err = 0;
+ u32 if_stat_in_mod;
+ struct counter_index *vport, *tmp_vport;
+
+ if (!vport_stats)
+ return -EINVAL;
+
+ if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(if_stat_mailbox)) {
+ err = PTR_ERR(if_stat_mailbox);
+ return err;
+ }
+
+ mutex_lock(&priv->counters_table.mutex);
+ list_for_each_entry_safe(vport, tmp_vport,
+ &priv->counters_table.global_port_list[port - 1],
+ list) {
+ if (vport->index == MLX4_SINK_COUNTER_INDEX)
+ continue;
+
+ memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter));
+ if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31);
+ err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma,
+ if_stat_in_mod, 0,
+ MLX4_CMD_QUERY_IF_STAT,
+ MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n",
+ __func__, vport->index);
+ goto if_stat_out;
+ }
+ counter = (union mlx4_counter *)if_stat_mailbox->buf;
+ if ((counter->control.cnt_mode & 0xf) == 1) {
+ vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames);
+ vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames);
+ vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames);
+ vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames);
+ vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames);
+ vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames);
+ vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets);
+ vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets);
+ vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets);
+ vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets);
+ vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets);
+ vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets);
+ vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames);
+ vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames);
+ vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames);
+ }
+ }
+
+if_stat_out:
+ mutex_unlock(&priv->counters_table.mutex);
+ mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats);
+
static int mlx4_setup_hca(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1081,18 +2711,19 @@
err = mlx4_init_uar_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "user access region table, aborting.\n");
+ "user access region table (err=%d), aborting.\n",
+ err);
return err;
}
err = mlx4_uar_alloc(dev, &priv->driver_uar);
if (err) {
- mlx4_err(dev, "Failed to allocate driver access region, "
- "aborting.\n");
+ mlx4_err(dev, "Failed to allocate driver access region "
+ "(err=%d), aborting.\n", err);
goto err_uar_table_free;
}
- priv->kar = ioremap(priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
if (!priv->kar) {
mlx4_err(dev, "Couldn't map kernel access region, "
"aborting.\n");
@@ -1103,14 +2734,15 @@
err = mlx4_init_pd_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "protection domain table, aborting.\n");
+ "protection domain table (err=%d), aborting.\n", err);
goto err_kar_unmap;
}
err = mlx4_init_xrcd_table(dev);
if (err) {
- mlx4_err(dev, "Failed to initialize extended "
- "reliably connected domain table, aborting.\n");
+ mlx4_err(dev, "Failed to initialize "
+ "reliable connection domain table (err=%d), "
+ "aborting.\n", err);
goto err_pd_table_free;
}
@@ -1117,21 +2749,31 @@
err = mlx4_init_mr_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "memory region table, aborting.\n");
+ "memory region table (err=%d), aborting.\n", err);
goto err_xrcd_table_free;
}
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_mcg_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize "
+ "multicast group table (err=%d), aborting.\n",
+ err);
+ goto err_mr_table_free;
+ }
+ }
+
err = mlx4_init_eq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "event queue table, aborting.\n");
- goto err_mr_table_free;
+ "event queue table (err=%d), aborting.\n", err);
+ goto err_mcg_table_free;
}
err = mlx4_cmd_use_events(dev);
if (err) {
mlx4_err(dev, "Failed to switch to event-driven "
- "firmware commands, aborting.\n");
+ "firmware commands (err=%d), aborting.\n", err);
goto err_eq_table_free;
}
@@ -1157,7 +2799,7 @@
err = mlx4_init_cq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "completion queue table, aborting.\n");
+ "completion queue table (err=%d), aborting.\n", err);
goto err_cmd_poll;
}
@@ -1164,7 +2806,8 @@
err = mlx4_init_srq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "shared receive queue table, aborting.\n");
+ "shared receive queue table (err=%d), aborting.\n",
+ err);
goto err_cq_table_free;
}
@@ -1171,36 +2814,48 @@
err = mlx4_init_qp_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "queue pair table, aborting.\n");
+ "queue pair table (err=%d), aborting.\n", err);
goto err_srq_table_free;
}
- err = mlx4_init_mcg_table(dev);
- if (err) {
- mlx4_err(dev, "Failed to initialize "
- "multicast group table, aborting.\n");
- goto err_qp_table_free;
- }
-
err = mlx4_init_counters_table(dev);
if (err && err != -ENOENT) {
- mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
- goto err_mcg_table_free;
+ mlx4_err(dev, "Failed to initialize counters table (err=%d), "
+ "aborting.\n", err);
+ goto err_qp_table_free;
}
- for (port = 1; port <= dev->caps.num_ports; port++) {
- ib_port_default_caps = 0;
- err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps);
- if (err)
- mlx4_warn(dev, "failed to get port %d default "
- "ib capabilities (%d). Continuing with "
- "caps = 0\n", port, err);
- dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
- err = mlx4_SET_PORT(dev, port);
- if (err) {
- mlx4_err(dev, "Failed to set port %d, aborting\n",
- port);
- goto err_counters_table_free;
+ if (!mlx4_is_slave(dev)) {
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ ib_port_default_caps = 0;
+ err = mlx4_get_port_ib_caps(dev, port,
+ &ib_port_default_caps);
+ if (err)
+ mlx4_warn(dev, "failed to get port %d default "
+ "ib capabilities (%d). Continuing "
+ "with caps = 0\n", port, err);
+ dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
+
+ /* initialize per-slave default ib port capabilities */
+ if (mlx4_is_master(dev)) {
+ int i;
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
+ ib_port_default_caps;
+ }
+ }
+
+ dev->caps.port_ib_mtu[port] = IB_MTU_4096;
+
+ err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
+ dev->caps.pkey_table_len[port] : -1);
+ if (err) {
+ mlx4_err(dev, "Failed to set port %d (err=%d), "
+ "aborting\n", port, err);
+ goto err_counters_table_free;
+ }
}
}
@@ -1209,9 +2864,6 @@
err_counters_table_free:
mlx4_cleanup_counters_table(dev);
-err_mcg_table_free:
- mlx4_cleanup_mcg_table(dev);
-
err_qp_table_free:
mlx4_cleanup_qp_table(dev);
@@ -1227,6 +2879,10 @@
err_eq_table_free:
mlx4_cleanup_eq_table(dev);
+err_mcg_table_free:
+ if (!mlx4_is_slave(dev))
+ mlx4_cleanup_mcg_table(dev);
+
err_mr_table_free:
mlx4_cleanup_mr_table(dev);
@@ -1251,13 +2907,19 @@
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct msix_entry *entries;
- int nreq;
+ int nreq = min_t(int, dev->caps.num_ports *
+ min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT)
+ + MSIX_LEGACY_SZ, MAX_MSIX);
int err;
int i;
if (msi_x) {
nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
- num_possible_cpus() + 1);
+ nreq);
+
+ if (msi_x > 1 && !mlx4_is_mfunc(dev))
+ nreq = min_t(int, nreq, msi_x);
+
entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
if (!entries)
goto no_msi;
@@ -1277,10 +2939,24 @@
goto retry;
}
kfree(entries);
+ /* if error, or can't alloc even 1 IRQ */
+ if (err < 0) {
+ mlx4_err(dev, "No IRQs left, device can't "
+ "be started.\n");
+ goto no_irq;
+ }
goto no_msi;
}
- dev->caps.num_comp_vectors = nreq - 1;
+ if (nreq <
+ MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
+ /*Working in legacy mode , all EQ's shared*/
+ dev->caps.comp_pool = 0;
+ dev->caps.num_comp_vectors = nreq - 1;
+ } else {
+ dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ;
+ dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
+ }
for (i = 0; i < nreq; ++i)
priv->eq_table.eq[i].irq = entries[i].vector;
@@ -1292,11 +2968,40 @@
no_msi:
dev->caps.num_comp_vectors = 1;
+ dev->caps.comp_pool = 0;
for (i = 0; i < 2; ++i)
priv->eq_table.eq[i].irq = dev->pdev->irq;
+ return;
+no_irq:
+ dev->caps.num_comp_vectors = 0;
+ dev->caps.comp_pool = 0;
+ return;
}
+static void
+mlx4_init_hca_info(struct mlx4_dev *dev)
+{
+ struct mlx4_hca_info *info = &mlx4_priv(dev)->hca_info;
+
+ info->dev = dev;
+
+ info->firmware_attr = (struct device_attribute)__ATTR(fw_ver, S_IRUGO,
+ show_firmware_version, NULL);
+ if (device_create_file(&dev->pdev->dev, &info->firmware_attr))
+ mlx4_err(dev, "Failed to add file firmware version");
+
+ info->hca_attr = (struct device_attribute)__ATTR(hca, S_IRUGO, show_hca,
+ NULL);
+ if (device_create_file(&dev->pdev->dev, &info->hca_attr))
+ mlx4_err(dev, "Failed to add file hca type");
+
+ info->board_attr = (struct device_attribute)__ATTR(board_id, S_IRUGO,
+ show_board, NULL);
+ if (device_create_file(&dev->pdev->dev, &info->board_attr))
+ mlx4_err(dev, "Failed to add file board id type");
+}
+
static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
{
struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
@@ -1304,14 +3009,22 @@
info->dev = dev;
info->port = port;
- mlx4_init_mac_table(dev, &info->mac_table);
- mlx4_init_vlan_table(dev, &info->vlan_table);
+ if (!mlx4_is_slave(dev)) {
+ mlx4_init_mac_table(dev, &info->mac_table);
+ mlx4_init_vlan_table(dev, &info->vlan_table);
+ info->base_qpn = mlx4_get_base_qpn(dev, port);
+ }
sprintf(info->dev_name, "mlx4_port%d", port);
info->port_attr.attr.name = info->dev_name;
- info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+ if (mlx4_is_mfunc(dev))
+ info->port_attr.attr.mode = S_IRUGO;
+ else {
+ info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+ info->port_attr.store = set_port_type;
+ }
info->port_attr.show = show_port_type;
- info->port_attr.store = set_port_type;
+ sysfs_attr_init(&info->port_attr.attr);
err = device_create_file(&dev->pdev->dev, &info->port_attr);
if (err) {
@@ -1319,9 +3032,35 @@
info->port = -1;
}
+ sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
+ info->port_mtu_attr.attr.name = info->dev_mtu_name;
+ if (mlx4_is_mfunc(dev))
+ info->port_mtu_attr.attr.mode = S_IRUGO;
+ else {
+ info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
+ info->port_mtu_attr.store = set_port_ib_mtu;
+ }
+ info->port_mtu_attr.show = show_port_ib_mtu;
+ sysfs_attr_init(&info->port_mtu_attr.attr);
+
+ err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr);
+ if (err) {
+ mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
+ device_remove_file(&info->dev->pdev->dev, &info->port_attr);
+ info->port = -1;
+ }
+
return err;
}
+static void
+mlx4_cleanup_hca_info(struct mlx4_hca_info *info)
+{
+ device_remove_file(&info->dev->pdev->dev, &info->firmware_attr);
+ device_remove_file(&info->dev->pdev->dev, &info->board_attr);
+ device_remove_file(&info->dev->pdev->dev, &info->hca_attr);
+}
+
static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
{
if (info->port < 0)
@@ -1328,25 +3067,115 @@
return;
device_remove_file(&info->dev->pdev->dev, &info->port_attr);
+ device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr);
}
-static int mlx4_init_trigger(struct mlx4_priv *priv)
+static int mlx4_init_steering(struct mlx4_dev *dev)
{
- memcpy(&priv->trigger_attr, &dev_attr_port_trigger,
- sizeof(struct device_attribute));
- return device_create_file(&priv->dev.pdev->dev, &priv->trigger_attr);
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int num_entries = dev->caps.num_ports;
+ int i, j;
+
+ priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
+ if (!priv->steer)
+ return -ENOMEM;
+
+ for (i = 0; i < num_entries; i++)
+ for (j = 0; j < MLX4_NUM_STEERS; j++) {
+ INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
+ INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
+ }
+ return 0;
}
-static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+static void mlx4_clear_steering(struct mlx4_dev *dev)
{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_steer_index *entry, *tmp_entry;
+ struct mlx4_promisc_qp *pqp, *tmp_pqp;
+ int num_entries = dev->caps.num_ports;
+ int i, j;
+
+ for (i = 0; i < num_entries; i++) {
+ for (j = 0; j < MLX4_NUM_STEERS; j++) {
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &priv->steer[i].promisc_qps[j],
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ list_for_each_entry_safe(entry, tmp_entry,
+ &priv->steer[i].steer_entries[j],
+ list) {
+ list_del(&entry->list);
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &entry->duplicates,
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ kfree(entry);
+ }
+ }
+ }
+ kfree(priv->steer);
+}
+
+static int extended_func_num(struct pci_dev *pdev)
+{
+ return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
+}
+
+#define MLX4_OWNER_BASE 0x8069c
+#define MLX4_OWNER_SIZE 4
+
+static int mlx4_get_ownership(struct mlx4_dev *dev)
+{
+ void __iomem *owner;
+ u32 ret;
+
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+ MLX4_OWNER_SIZE);
+ if (!owner) {
+ mlx4_err(dev, "Failed to obtain ownership bit\n");
+ return -ENOMEM;
+ }
+
+ ret = readl(owner);
+ iounmap(owner);
+ return (int) !!ret;
+}
+
+static void mlx4_free_ownership(struct mlx4_dev *dev)
+{
+ void __iomem *owner;
+
+ if (pci_channel_offline(dev->pdev))
+ return;
+
+ owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+ MLX4_OWNER_SIZE);
+ if (!owner) {
+ mlx4_err(dev, "Failed to obtain ownership bit\n");
+ return;
+ }
+ writel(0, owner);
+ msleep(1000);
+ iounmap(owner);
+}
+
+static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
+{
struct mlx4_priv *priv;
struct mlx4_dev *dev;
int err;
int port;
- int i;
+ int nvfs, prb_vf;
- printk(KERN_INFO PFX "Initializing %s\n",
- pci_name(pdev));
+ pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
err = pci_enable_device(pdev);
if (err) {
@@ -1355,12 +3184,26 @@
return err;
}
+ mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs);
+ mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf);
+ if (nvfs > MLX4_MAX_NUM_VF) {
+ dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n",
+ nvfs, MLX4_MAX_NUM_VF);
+ return -EINVAL;
+ }
+
+ if (nvfs < 0) {
+ dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
+ return -EINVAL;
+ }
/*
- * Check for BARs. We expect 0: 1MB
+ * Check for BARs.
*/
- if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
- pci_resource_len(pdev, 0) != 1 << 20) {
- dev_err(&pdev->dev, "Missing DCS, aborting.\n");
+ if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
+ !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing DCS, aborting."
+ "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n",
+ pci_dev_data, pci_resource_flags(pdev, 0));
err = -ENODEV;
goto err_disable_pdev;
}
@@ -1370,18 +3213,12 @@
goto err_disable_pdev;
}
- err = pci_request_region(pdev, 0, DRV_NAME);
+ err = pci_request_regions(pdev, DRV_NAME);
if (err) {
- dev_err(&pdev->dev, "Cannot request control region, aborting.\n");
+ dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
goto err_disable_pdev;
}
- err = pci_request_region(pdev, 2, DRV_NAME);
- if (err) {
- dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
- goto err_release_bar0;
- }
-
pci_set_master(pdev);
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1390,7 +3227,7 @@
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
- goto err_release_bar2;
+ goto err_release_regions;
}
}
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1401,20 +3238,24 @@
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
"aborting.\n");
- goto err_release_bar2;
+ goto err_release_regions;
}
}
+ /* Allow large DMA segments, up to the firmware limit of 1 GB */
+ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
+
priv = kzalloc(sizeof *priv, GFP_KERNEL);
if (!priv) {
dev_err(&pdev->dev, "Device struct alloc failed, "
"aborting.\n");
err = -ENOMEM;
- goto err_release_bar2;
+ goto err_release_regions;
}
dev = &priv->dev;
dev->pdev = pdev;
+ INIT_LIST_HEAD(&priv->dev_list);
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
@@ -1422,48 +3263,163 @@
INIT_LIST_HEAD(&priv->pgdir_list);
mutex_init(&priv->pgdir_mutex);
- for (i = 0; i < MLX4_MAX_PORTS; ++i)
- priv->iboe_counter_index[i] = -1;
INIT_LIST_HEAD(&priv->bf_list);
mutex_init(&priv->bf_mutex);
- /*
- * Now reset the HCA before we touch the PCI capabilities or
- * attempt a firmware command, since a boot ROM may have left
- * the HCA in an undefined state.
- */
- err = mlx4_reset(dev);
- if (err) {
- mlx4_err(dev, "Failed to reset HCA, aborting.\n");
- goto err_free_dev;
+ dev->rev_id = pdev->revision;
+ dev->numa_node = dev_to_node(&pdev->dev);
+ /* Detect if this device is a virtual function */
+ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
+ /* When acting as pf, we normally skip vfs unless explicitly
+ * requested to probe them. */
+ if (nvfs && extended_func_num(pdev) > prb_vf) {
+ mlx4_warn(dev, "Skipping virtual function:%d\n",
+ extended_func_num(pdev));
+ err = -ENODEV;
+ goto err_free_dev;
+ }
+ mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
+ dev->flags |= MLX4_FLAG_SLAVE;
+ } else {
+ /* We reset the device and enable SRIOV only for physical
+ * devices. Try to claim ownership on the device;
+ * if already taken, skip -- do not allow multiple PFs */
+ err = mlx4_get_ownership(dev);
+ if (err) {
+ if (err < 0)
+ goto err_free_dev;
+ else {
+ mlx4_warn(dev, "Multiple PFs not yet supported."
+ " Skipping PF.\n");
+ err = -EINVAL;
+ goto err_free_dev;
+ }
+ }
+
+ if (nvfs) {
+ mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs);
+ err = pci_enable_sriov(pdev, nvfs);
+ if (err) {
+ mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
+ err);
+ err = 0;
+ } else {
+ mlx4_warn(dev, "Running in master mode\n");
+ dev->flags |= MLX4_FLAG_SRIOV |
+ MLX4_FLAG_MASTER;
+ dev->num_vfs = nvfs;
+ }
+ }
+
+ atomic_set(&priv->opreq_count, 0);
+ INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
+
+ /*
+ * Now reset the HCA before we touch the PCI capabilities or
+ * attempt a firmware command, since a boot ROM may have left
+ * the HCA in an undefined state.
+ */
+ err = mlx4_reset(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to reset HCA, aborting.\n");
+ goto err_sriov;
+ }
}
- if (mlx4_cmd_init(dev)) {
+slave_start:
+ err = mlx4_cmd_init(dev);
+ if (err) {
mlx4_err(dev, "Failed to init command interface, aborting.\n");
- goto err_free_dev;
+ goto err_sriov;
}
+ /* In slave functions, the communication channel must be initialized
+ * before posting commands. Also, init num_slaves before calling
+ * mlx4_init_hca */
+ if (mlx4_is_mfunc(dev)) {
+ if (mlx4_is_master(dev))
+ dev->num_slaves = MLX4_MAX_NUM_SLAVES;
+ else {
+ dev->num_slaves = 0;
+ err = mlx4_multi_func_init(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init slave mfunc"
+ " interface, aborting.\n");
+ goto err_cmd;
+ }
+ }
+ }
+
err = mlx4_init_hca(dev);
- if (err)
- goto err_cmd;
+ if (err) {
+ if (err == -EACCES) {
+ /* Not primary Physical function
+ * Running in slave mode */
+ mlx4_cmd_cleanup(dev);
+ dev->flags |= MLX4_FLAG_SLAVE;
+ dev->flags &= ~MLX4_FLAG_MASTER;
+ goto slave_start;
+ } else
+ goto err_mfunc;
+ }
+ /* In master functions, the communication channel must be initialized
+ * after obtaining its address from fw */
+ if (mlx4_is_master(dev)) {
+ err = mlx4_multi_func_init(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init master mfunc"
+ "interface, aborting.\n");
+ goto err_close;
+ }
+ }
+
err = mlx4_alloc_eq_table(dev);
if (err)
- goto err_close;
+ goto err_master_mfunc;
+ priv->msix_ctl.pool_bm = 0;
+ mutex_init(&priv->msix_ctl.pool_lock);
+
mlx4_enable_msi_x(dev);
+ /* no MSIX and no shared IRQ */
+ if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) {
+ err = -ENOSPC;
+ goto err_free_eq;
+ }
+
+ if ((mlx4_is_mfunc(dev)) &&
+ !(dev->flags & MLX4_FLAG_MSI_X)) {
+ err = -ENOSYS;
+ mlx4_err(dev, "INTx is not supported in multi-function mode."
+ " aborting.\n");
+ goto err_free_eq;
+ }
+
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_steering(dev);
+ if (err)
+ goto err_free_eq;
+ }
+
err = mlx4_setup_hca(dev);
- if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
+ if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
+ !mlx4_is_mfunc(dev)) {
dev->flags &= ~MLX4_FLAG_MSI_X;
+ dev->caps.num_comp_vectors = 1;
+ dev->caps.comp_pool = 0;
pci_disable_msix(pdev);
err = mlx4_setup_hca(dev);
}
if (err)
- goto err_free_eq;
+ goto err_steer;
+ mlx4_init_quotas(dev);
+ mlx4_init_hca_info(dev);
+
for (port = 1; port <= dev->caps.num_ports; port++) {
err = mlx4_init_port_info(dev, port);
if (err)
@@ -1474,43 +3430,52 @@
if (err)
goto err_port;
- err = mlx4_init_trigger(priv);
- if (err)
- goto err_register;
+ mlx4_request_modules(dev);
- err = mlx4_sense_init(dev);
- if (err)
- goto err_trigger;
-
+ mlx4_sense_init(dev);
mlx4_start_sense(dev);
+ priv->pci_dev_data = pci_dev_data;
pci_set_drvdata(pdev, dev);
return 0;
-err_trigger:
- device_remove_file(&dev->pdev->dev, &priv->trigger_attr);
-err_register:
- mlx4_unregister_device(dev);
err_port:
for (--port; port >= 1; --port)
mlx4_cleanup_port_info(&priv->port[port]);
mlx4_cleanup_counters_table(dev);
- mlx4_cleanup_mcg_table(dev);
mlx4_cleanup_qp_table(dev);
mlx4_cleanup_srq_table(dev);
mlx4_cleanup_cq_table(dev);
mlx4_cmd_use_polling(dev);
mlx4_cleanup_eq_table(dev);
+ mlx4_cleanup_mcg_table(dev);
mlx4_cleanup_mr_table(dev);
mlx4_cleanup_xrcd_table(dev);
mlx4_cleanup_pd_table(dev);
mlx4_cleanup_uar_table(dev);
+err_steer:
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
+
err_free_eq:
mlx4_free_eq_table(dev);
+err_master_mfunc:
+ if (mlx4_is_master(dev)) {
+ mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
+ mlx4_multi_func_cleanup(dev);
+ }
+
+ if (mlx4_is_slave(dev)) {
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ }
+
err_close:
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
@@ -1517,18 +3482,26 @@
mlx4_close_hca(dev);
+err_mfunc:
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
+
err_cmd:
mlx4_cmd_cleanup(dev);
+err_sriov:
+ if (dev->flags & MLX4_FLAG_SRIOV)
+ pci_disable_sriov(pdev);
+
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
err_free_dev:
kfree(priv);
-err_release_bar2:
- pci_release_region(pdev, 2);
+err_release_regions:
+ pci_release_regions(pdev);
-err_release_bar0:
- pci_release_region(pdev, 0);
-
err_disable_pdev:
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
@@ -1538,14 +3511,8 @@
static int __devinit mlx4_init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
{
- static int mlx4_version_printed;
-
- if (!mlx4_version_printed) {
- printk(KERN_INFO "%s", mlx4_version);
- ++mlx4_version_printed;
- }
-
- return __mlx4_init_one(pdev, id);
+ device_set_desc(pdev->dev.bsddev, mlx4_version);
+ return __mlx4_init_one(pdev, id->driver_data);
}
static void mlx4_remove_one(struct pci_dev *pdev)
@@ -1555,106 +3522,307 @@
int p;
if (dev) {
- mlx4_sense_cleanup(dev);
+ /* in SRIOV it is not allowed to unload the pf's
+ * driver while there are alive vf's */
+ if (mlx4_is_master(dev)) {
+ if (mlx4_how_many_lives_vf(dev))
+ mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n");
+ }
+ mlx4_stop_sense(dev);
mlx4_unregister_device(dev);
- device_remove_file(&dev->pdev->dev, &priv->trigger_attr);
+ mlx4_cleanup_hca_info(&priv->hca_info);
for (p = 1; p <= dev->caps.num_ports; p++) {
mlx4_cleanup_port_info(&priv->port[p]);
mlx4_CLOSE_PORT(dev, p);
}
- mlx4_cleanup_counters_table(dev);
- mlx4_cleanup_mcg_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_SLAVES_ONLY);
+
+ mlx4_cleanup_counters_table(dev);
mlx4_cleanup_qp_table(dev);
mlx4_cleanup_srq_table(dev);
mlx4_cleanup_cq_table(dev);
mlx4_cmd_use_polling(dev);
mlx4_cleanup_eq_table(dev);
+ mlx4_cleanup_mcg_table(dev);
mlx4_cleanup_mr_table(dev);
mlx4_cleanup_xrcd_table(dev);
mlx4_cleanup_pd_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_STRUCTS_ONLY);
+
iounmap(priv->kar);
mlx4_uar_free(dev, &priv->driver_uar);
mlx4_cleanup_uar_table(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
mlx4_free_eq_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_multi_func_cleanup(dev);
mlx4_close_hca(dev);
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
mlx4_cmd_cleanup(dev);
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
+ if (dev->flags & MLX4_FLAG_SRIOV) {
+ mlx4_warn(dev, "Disabling SR-IOV\n");
+ pci_disable_sriov(pdev);
+ }
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+
kfree(priv);
- pci_release_region(pdev, 2);
- pci_release_region(pdev, 0);
+ pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
}
+static int restore_current_port_types(struct mlx4_dev *dev,
+ enum mlx4_port_type *types,
+ enum mlx4_port_type *poss_types)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err, i;
+
+ mlx4_stop_sense(dev);
+ mutex_lock(&priv->port_mutex);
+ for (i = 0; i < dev->caps.num_ports; i++)
+ dev->caps.possible_type[i + 1] = poss_types[i];
+ err = mlx4_change_port_types(dev, types);
+ mlx4_start_sense(dev);
+ mutex_unlock(&priv->port_mutex);
+ return err;
+}
+
int mlx4_restart_one(struct pci_dev *pdev)
{
+ struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ enum mlx4_port_type curr_type[MLX4_MAX_PORTS];
+ enum mlx4_port_type poss_type[MLX4_MAX_PORTS];
+ int pci_dev_data, err, i;
+
+ pci_dev_data = priv->pci_dev_data;
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ curr_type[i] = dev->caps.port_type[i + 1];
+ poss_type[i] = dev->caps.possible_type[i + 1];
+ }
+
mlx4_remove_one(pdev);
- return __mlx4_init_one(pdev, NULL);
+ err = __mlx4_init_one(pdev, pci_dev_data);
+ if (err)
+ return err;
+
+ dev = pci_get_drvdata(pdev);
+ err = restore_current_port_types(dev, curr_type, poss_type);
+ if (err)
+ mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n",
+ err);
+ return 0;
}
-static struct pci_device_id mlx4_pci_table[] = {
- { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
- { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
- { PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
- { PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE Virt+ */
- { PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX EN 40GigE PCIe 2.0 5GT/s */
- { PCI_VDEVICE(MELLANOX, 0x6778) }, /* MT26488 ConnectX VPI PCIe 2.0 5GT/s - IB DDR / 10GigE Virt+ */
- { PCI_VDEVICE(MELLANOX, 0x1000) },
- { PCI_VDEVICE(MELLANOX, 0x1001) },
- { PCI_VDEVICE(MELLANOX, 0x1002) },
- { PCI_VDEVICE(MELLANOX, 0x1003) },
- { PCI_VDEVICE(MELLANOX, 0x1004) },
- { PCI_VDEVICE(MELLANOX, 0x1005) },
- { PCI_VDEVICE(MELLANOX, 0x1006) },
- { PCI_VDEVICE(MELLANOX, 0x1007) },
- { PCI_VDEVICE(MELLANOX, 0x1008) },
- { PCI_VDEVICE(MELLANOX, 0x1009) },
- { PCI_VDEVICE(MELLANOX, 0x100a) },
- { PCI_VDEVICE(MELLANOX, 0x100b) },
- { PCI_VDEVICE(MELLANOX, 0x100c) },
- { PCI_VDEVICE(MELLANOX, 0x100d) },
- { PCI_VDEVICE(MELLANOX, 0x100e) },
- { PCI_VDEVICE(MELLANOX, 0x100f) },
+static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
+ /* MT25408 "Hermon" SDR */
+ { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" DDR */
+ { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" QDR */
+ { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" DDR PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" QDR PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" EN 10GigE */
+ { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25458 ConnectX EN 10GBASE-T 10GigE */
+ { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
+ { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT26468 ConnectX EN 10GigE PCIe gen2*/
+ { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
+ { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT26478 ConnectX2 40GigE PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25400 Family [ConnectX-2 Virtual Function] */
+ { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
+ /* MT27500 Family [ConnectX-3] */
+ { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
+ /* MT27500 Family [ConnectX-3 Virtual Function] */
+ { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
+ { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
{ 0, }
};
MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
+static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ mlx4_remove_one(pdev);
+
+ return state == pci_channel_io_perm_failure ?
+ PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
+{
+ int ret = __mlx4_init_one(pdev, 0);
+
+ return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+}
+
+static const struct pci_error_handlers mlx4_err_handler = {
+ .error_detected = mlx4_pci_err_detected,
+ .slot_reset = mlx4_pci_slot_reset,
+};
+
+static int suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ mlx4_remove_one(pdev);
+
+ return 0;
+}
+
+static int resume(struct pci_dev *pdev)
+{
+ return __mlx4_init_one(pdev, 0);
+}
+
static struct pci_driver mlx4_driver = {
.name = DRV_NAME,
.id_table = mlx4_pci_table,
.probe = mlx4_init_one,
- .remove = __devexit_p(mlx4_remove_one)
+ .remove = __devexit_p(mlx4_remove_one),
+ .suspend = suspend,
+ .resume = resume,
+ .err_handler = &mlx4_err_handler,
};
static int __init mlx4_verify_params(void)
{
+ int status;
+
+ status = update_defaults(&port_type_array);
+ if (status == INVALID_STR) {
+ if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val))
+ return -1;
+ } else if (status == INVALID_DATA) {
+ return -1;
+ }
+
+ status = update_defaults(&num_vfs);
+ if (status == INVALID_STR) {
+ if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val))
+ return -1;
+ } else if (status == INVALID_DATA) {
+ return -1;
+ }
+
+ status = update_defaults(&probe_vf);
+ if (status == INVALID_STR) {
+ if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val))
+ return -1;
+ } else if (status == INVALID_DATA) {
+ return -1;
+ }
+
+ if (msi_x < 0) {
+ pr_warn("mlx4_core: bad msi_x: %d\n", msi_x);
+ return -1;
+ }
+
if ((log_num_mac < 0) || (log_num_mac > 7)) {
- printk(KERN_WARNING "mlx4_core: bad num_mac: %d\n", log_num_mac);
+ pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
return -1;
}
- if (log_mtts_per_seg == 0)
- log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
- if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
- printk(KERN_WARNING "mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
+ if (log_num_vlan != 0)
+ pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
+ MLX4_LOG_NUM_VLANS);
+
+ if (mlx4_set_4k_mtu != -1)
+ pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n");
+
+ if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) {
+ pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
return -1;
}
+ if (mlx4_log_num_mgm_entry_size != -1 &&
+ (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
+ mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
+ pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
+ "in legal range (-1 or %d..%d)\n",
+ mlx4_log_num_mgm_entry_size,
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE,
+ MLX4_MAX_MGM_LOG_ENTRY_SIZE);
+ return -1;
+ }
+
+ if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) {
+ pr_warning("mlx4_core: bad log_num_qp: %d\n",
+ mod_param_profile.num_qp);
+ return -1;
+ }
+
+ if (mod_param_profile.num_srq < 10) {
+ pr_warning("mlx4_core: too low log_num_srq: %d\n",
+ mod_param_profile.num_srq);
+ return -1;
+ }
+
+ if (mod_param_profile.num_cq < 10) {
+ pr_warning("mlx4_core: too low log_num_cq: %d\n",
+ mod_param_profile.num_cq);
+ return -1;
+ }
+
+ if (mod_param_profile.num_mpt < 10) {
+ pr_warning("mlx4_core: too low log_num_mpt: %d\n",
+ mod_param_profile.num_mpt);
+ return -1;
+ }
+
+ if (mod_param_profile.num_mtt_segs &&
+ mod_param_profile.num_mtt_segs < 15) {
+ pr_warning("mlx4_core: too low log_num_mtt: %d\n",
+ mod_param_profile.num_mtt_segs);
+ return -1;
+ }
+
+ if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) {
+ pr_warning("mlx4_core: too high log_num_mtt: %d\n",
+ mod_param_profile.num_mtt_segs);
+ return -1;
+ }
return 0;
}
@@ -1662,8 +3830,6 @@
{
int ret;
- mutex_init(&drv_mutex);
-
if (mlx4_verify_params())
return -EINVAL;
@@ -1673,16 +3839,30 @@
if (!mlx4_wq)
return -ENOMEM;
+ if (enable_sys_tune)
+ sys_tune_init();
+
ret = pci_register_driver(&mlx4_driver);
- return ret < 0 ? ret : 0;
+ if (ret < 0)
+ goto err;
+
+ return 0;
+
+err:
+ if (enable_sys_tune)
+ sys_tune_fini();
+
+ destroy_workqueue(mlx4_wq);
+
+ return ret;
}
static void __exit mlx4_cleanup(void)
{
- mutex_lock(&drv_mutex);
- mlx4_config_cleanup();
+ if (enable_sys_tune)
+ sys_tune_fini();
+
pci_unregister_driver(&mlx4_driver);
- mutex_unlock(&drv_mutex);
destroy_workqueue(mlx4_wq);
}
@@ -1689,17 +3869,16 @@
module_init_order(mlx4_init, SI_ORDER_MIDDLE);
module_exit(mlx4_cleanup);
-#undef MODULE_VERSION
#include <sys/module.h>
static int
mlx4_evhand(module_t mod, int event, void *arg)
{
- return (0);
+ return (0);
}
static moduledata_t mlx4_mod = {
- .name = "mlx4",
- .evhand = mlx4_evhand,
+ .name = "mlx4",
+ .evhand = mlx4_evhand,
};
MODULE_VERSION(mlx4, 1);
-DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_SMP, SI_ORDER_ANY);
+DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY);
Modified: trunk/sys/ofed/drivers/net/mlx4/mcg.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/mcg.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/mcg.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,50 +31,88 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/string.h>
-#include <linux/slab.h>
+#include <linux/etherdevice.h>
#include <linux/mlx4/cmd.h>
-#include <linux/mlx4/driver.h>
+#include <linux/module.h>
+#include <linux/printk.h>
#include "mlx4.h"
-#define MGM_QPN_MASK 0x00FFFFFF
-#define MGM_BLCK_LB_BIT 30
+int mlx4_get_mgm_entry_size(struct mlx4_dev *dev)
+{
+ return 1 << dev->oper_log_mgm_entry_size;
+}
-struct mlx4_mgm {
- __be32 next_gid_index;
- __be32 members_count;
- u32 reserved[2];
- u8 gid[16];
- __be32 qp[MLX4_QP_PER_MGM];
-};
+int mlx4_get_qp_per_mgm(struct mlx4_dev *dev)
+{
+ return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2);
+}
-static const u8 zero_gid[16]; /* automatically initialized to 0 */
+static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox,
+ u32 size,
+ u64 *reg_id)
+{
+ u64 imm;
+ int err = 0;
-static int mlx4_READ_MCG(struct mlx4_dev *dev, int index,
- struct mlx4_cmd_mailbox *mailbox)
+ err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ *reg_id = imm;
+
+ return err;
+}
+
+static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid)
{
+ int err = 0;
+
+ err = mlx4_cmd(dev, regid, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ return err;
+}
+
+static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index,
+ struct mlx4_cmd_mailbox *mailbox)
+{
return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
-static int mlx4_WRITE_MCG(struct mlx4_dev *dev, int index,
- struct mlx4_cmd_mailbox *mailbox)
+static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index,
+ struct mlx4_cmd_mailbox *mailbox)
{
return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
-static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
- u16 *hash)
+static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 port, u8 steer,
+ struct mlx4_cmd_mailbox *mailbox)
{
+ u32 in_mod;
+
+ in_mod = (u32) port << 16 | steer << 1;
+ return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1,
+ MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+}
+
+static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ u16 *hash, u8 op_mod)
+{
u64 imm;
int err;
- err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, 0, MLX4_CMD_MGID_HASH,
- MLX4_CMD_TIME_CLASS_A);
+ err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod,
+ MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
if (!err)
*hash = imm;
@@ -82,7 +120,538 @@
return err;
}
+static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_promisc_qp *pqp;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return NULL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) {
+ if (pqp->qpn == qpn)
+ return pqp;
+ }
+ /* not found */
+ return NULL;
+}
+
/*
+ * Add new entry to steering data structure.
+ * All promisc QPs should be added as well
+ */
+static int new_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ u32 members_count;
+ struct mlx4_steer_index *new_entry;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp = NULL;
+ u32 prot;
+ int err;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+ new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL);
+ if (!new_entry)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&new_entry->duplicates);
+ new_entry->index = index;
+ list_add_tail(&new_entry->list, &s_steer->steer_entries[steer]);
+
+ /* If the given qpn is also a promisc qp,
+ * it should be inserted to duplicates list
+ */
+ pqp = get_promisc_qp(dev, port, steer, qpn);
+ if (pqp) {
+ dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+ if (!dqp) {
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ dqp->qpn = qpn;
+ list_add_tail(&dqp->list, &new_entry->duplicates);
+ }
+
+ /* if no promisc qps for this vep, we are done */
+ if (list_empty(&s_steer->promisc_qps[steer]))
+ return 0;
+
+ /* now need to add all the promisc qps to the new
+ * steering entry, as they should also receive the packets
+ * destined to this address */
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ mgm = mailbox->buf;
+
+ err = mlx4_READ_ENTRY(dev, index, mailbox);
+ if (err)
+ goto out_mailbox;
+
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ prot = be32_to_cpu(mgm->members_count) >> 30;
+ list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) {
+ /* don't add already existing qpn */
+ if (pqp->qpn == qpn)
+ continue;
+ if (members_count == dev->caps.num_qp_per_mgm) {
+ /* out of space */
+ err = -ENOMEM;
+ goto out_mailbox;
+ }
+
+ /* add the qpn */
+ mgm->qp[members_count++] = cpu_to_be32(pqp->qpn & MGM_QPN_MASK);
+ }
+ /* update the qps count and update the entry with all the promisc qps*/
+ mgm->members_count = cpu_to_be32(members_count | (prot << 30));
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
+
+out_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ if (!err)
+ return 0;
+out_alloc:
+ if (dqp) {
+ list_del(&dqp->list);
+ kfree(dqp);
+ }
+ list_del(&new_entry->list);
+ kfree(new_entry);
+ return err;
+}
+
+/* update the data structures with existing steering entry */
+static int existing_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_steer_index *tmp_entry, *entry = NULL;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ pqp = get_promisc_qp(dev, port, steer, qpn);
+ if (!pqp)
+ return 0; /* nothing to do */
+
+ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) {
+ if (tmp_entry->index == index) {
+ entry = tmp_entry;
+ break;
+ }
+ }
+ if (unlikely(!entry)) {
+ mlx4_warn(dev, "Steering entry at index %x is not registered\n", index);
+ return -EINVAL;
+ }
+
+ /* the given qpn is listed as a promisc qpn
+ * we need to add it as a duplicate to this entry
+ * for future references */
+ list_for_each_entry(dqp, &entry->duplicates, list) {
+ if (qpn == dqp->qpn)
+ return 0; /* qp is already duplicated */
+ }
+
+ /* add the qp as a duplicate on this index */
+ dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+ if (!dqp)
+ return -ENOMEM;
+ dqp->qpn = qpn;
+ list_add_tail(&dqp->list, &entry->duplicates);
+
+ return 0;
+}
+
+/* Check whether a qpn is a duplicate on steering entry
+ * If so, it should not be removed from mgm */
+static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_steer_index *tmp_entry, *entry = NULL;
+ struct mlx4_promisc_qp *dqp, *tmp_dqp;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return NULL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ /* if qp is not promisc, it cannot be duplicated */
+ if (!get_promisc_qp(dev, port, steer, qpn))
+ return false;
+
+ /* The qp is promisc qp so it is a duplicate on this index
+ * Find the index entry, and remove the duplicate */
+ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) {
+ if (tmp_entry->index == index) {
+ entry = tmp_entry;
+ break;
+ }
+ }
+ if (unlikely(!entry)) {
+ mlx4_warn(dev, "Steering entry for index %x is not registered\n", index);
+ return false;
+ }
+ list_for_each_entry_safe(dqp, tmp_dqp, &entry->duplicates, list) {
+ if (dqp->qpn == qpn) {
+ list_del(&dqp->list);
+ kfree(dqp);
+ }
+ }
+ return true;
+}
+
+/*
+ * returns true if all the QPs != tqpn contained in this entry
+ * are Promisc QPs. return false otherwise.
+ */
+static bool promisc_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 tqpn, u32 *members_count)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ u32 m_count;
+ bool ret = false;
+ int i;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return false;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return false;
+ mgm = mailbox->buf;
+
+ if (mlx4_READ_ENTRY(dev, index, mailbox))
+ goto out;
+ m_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ if (members_count)
+ *members_count = m_count;
+
+ for (i = 0; i < m_count; i++) {
+ u32 qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK;
+ if (!get_promisc_qp(dev, port, steer, qpn) && qpn != tqpn) {
+ /* the qp is not promisc, the entry can't be removed */
+ goto out;
+ }
+ }
+ ret = true;
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+
+/* IF a steering entry contains only promisc QPs, it can be removed. */
+static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 tqpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_steer_index *entry = NULL, *tmp_entry;
+ u32 members_count;
+ bool ret = false;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return NULL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ if (!promisc_steering_entry(dev, port, steer, index, tqpn, &members_count))
+ goto out;
+
+ /* All the qps currently registered for this entry are promiscuous,
+ * Checking for duplicates */
+ ret = true;
+ list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) {
+ if (entry->index == index) {
+ if (list_empty(&entry->duplicates) || members_count == 1) {
+ struct mlx4_promisc_qp *pqp, *tmp_pqp;
+ /*
+ * If there is only 1 entry in duplicates than
+ * this is the QP we want to delete, going over
+ * the list and deleting the entry.
+ */
+ list_del(&entry->list);
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &entry->duplicates,
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ kfree(entry);
+ } else {
+ /* This entry contains duplicates so it shouldn't be removed */
+ ret = false;
+ goto out;
+ }
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int add_promisc_qp(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ struct mlx4_steer_index *entry;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp;
+ u32 members_count;
+ u32 prot;
+ int i;
+ bool found;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ mutex_lock(&priv->mcg_table.mutex);
+
+ if (get_promisc_qp(dev, port, steer, qpn)) {
+ err = 0; /* Noting to do, already exists */
+ goto out_mutex;
+ }
+
+ pqp = kmalloc(sizeof *pqp, GFP_KERNEL);
+ if (!pqp) {
+ err = -ENOMEM;
+ goto out_mutex;
+ }
+ pqp->qpn = qpn;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ mgm = mailbox->buf;
+
+ if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) {
+ /* the promisc qp needs to be added for each one of the steering
+ * entries, if it already exists, needs to be added as a duplicate
+ * for this entry */
+ list_for_each_entry(entry, &s_steer->steer_entries[steer], list) {
+ err = mlx4_READ_ENTRY(dev, entry->index, mailbox);
+ if (err)
+ goto out_mailbox;
+
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ prot = be32_to_cpu(mgm->members_count) >> 30;
+ found = false;
+ for (i = 0; i < members_count; i++) {
+ if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) {
+ /* Entry already exists, add to duplicates */
+ dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+ if (!dqp) {
+ err = -ENOMEM;
+ goto out_mailbox;
+ }
+ dqp->qpn = qpn;
+ list_add_tail(&dqp->list, &entry->duplicates);
+ found = true;
+ }
+ }
+ if (!found) {
+ /* Need to add the qpn to mgm */
+ if (members_count == dev->caps.num_qp_per_mgm) {
+ /* entry is full */
+ err = -ENOMEM;
+ goto out_mailbox;
+ }
+ mgm->qp[members_count++] = cpu_to_be32(qpn & MGM_QPN_MASK);
+ mgm->members_count = cpu_to_be32(members_count | (prot << 30));
+ err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox);
+ if (err)
+ goto out_mailbox;
+ }
+ }
+ }
+
+ /* add the new qpn to list of promisc qps */
+ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]);
+ /* now need to add all the promisc qps to default entry */
+ memset(mgm, 0, sizeof *mgm);
+ members_count = 0;
+ list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) {
+ if (members_count == dev->caps.num_qp_per_mgm) {
+ /* entry is full */
+ err = -ENOMEM;
+ goto out_list;
+ }
+ mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK);
+ }
+ mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30);
+
+ err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox);
+ if (err)
+ goto out_list;
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ mutex_unlock(&priv->mcg_table.mutex);
+ return 0;
+
+out_list:
+ list_del(&pqp->list);
+out_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+out_alloc:
+ kfree(pqp);
+out_mutex:
+ mutex_unlock(&priv->mcg_table.mutex);
+ return err;
+}
+
+static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer, u32 qpn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ struct mlx4_steer_index *entry, *tmp_entry;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp;
+ u32 members_count;
+ bool found;
+ bool back_to_list = false;
+ int i, loc = -1;
+ int err;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+ mutex_lock(&priv->mcg_table.mutex);
+
+ pqp = get_promisc_qp(dev, port, steer, qpn);
+ if (unlikely(!pqp)) {
+ mlx4_warn(dev, "QP %x is not promiscuous QP\n", qpn);
+ /* nothing to do */
+ err = 0;
+ goto out_mutex;
+ }
+
+ /*remove from list of promisc qps */
+ list_del(&pqp->list);
+
+ /* set the default entry not to include the removed one */
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = -ENOMEM;
+ back_to_list = true;
+ goto out_list;
+ }
+ mgm = mailbox->buf;
+ memset(mgm, 0, sizeof *mgm);
+ members_count = 0;
+ list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list)
+ mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK);
+ mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30);
+
+ err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox);
+ if (err)
+ goto out_mailbox;
+
+ if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) {
+ /* remove the qp from all the steering entries*/
+ list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) {
+ found = false;
+ list_for_each_entry(dqp, &entry->duplicates, list) {
+ if (dqp->qpn == qpn) {
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ /* a duplicate, no need to change the mgm,
+ * only update the duplicates list */
+ list_del(&dqp->list);
+ kfree(dqp);
+ } else {
+ err = mlx4_READ_ENTRY(dev, entry->index, mailbox);
+ if (err)
+ goto out_mailbox;
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ if (!members_count) {
+ mlx4_warn(dev, "QP %06x wasn't found in entry %x mcount=0."
+ " deleting entry...\n", qpn, entry->index);
+ list_del(&entry->list);
+ kfree(entry);
+ continue;
+ }
+
+ for (i = 0; i < members_count; ++i)
+ if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) {
+ loc = i;
+ break;
+ }
+
+ if (loc < 0) {
+ mlx4_err(dev, "QP %06x wasn't found in entry %d\n",
+ qpn, entry->index);
+ err = -EINVAL;
+ goto out_mailbox;
+ }
+
+ /* copy the last QP in this MGM over removed QP */
+ mgm->qp[loc] = mgm->qp[members_count - 1];
+ mgm->qp[members_count - 1] = 0;
+ mgm->members_count = cpu_to_be32(--members_count |
+ (MLX4_PROT_ETH << 30));
+
+ err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox);
+ if (err)
+ goto out_mailbox;
+ }
+ }
+ }
+
+out_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+out_list:
+ if (back_to_list)
+ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]);
+ else
+ kfree(pqp);
+out_mutex:
+ mutex_unlock(&priv->mcg_table.mutex);
+ return err;
+}
+
+/*
* Caller must hold MCG table semaphore. gid and mgm parameters must
* be properly aligned for command interface.
*
@@ -97,15 +666,18 @@
* If no AMGM exists for given gid, *index = -1, *prev = index of last
* entry in hash chain and *mgm holds end of hash chain.
*/
-static int find_mgm(struct mlx4_dev *dev,
- u8 *gid, enum mlx4_mcast_prot prot,
- struct mlx4_cmd_mailbox *mgm_mailbox,
- u16 *hash, int *prev, int *index)
+static int find_entry(struct mlx4_dev *dev, u8 port,
+ u8 *gid, enum mlx4_protocol prot,
+ struct mlx4_cmd_mailbox *mgm_mailbox,
+ int *prev, int *index)
{
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mgm *mgm = mgm_mailbox->buf;
u8 *mgid;
int err;
+ u16 hash;
+ u8 op_mod = (prot == MLX4_PROT_ETH) ?
+ !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) : 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -114,24 +686,26 @@
memcpy(mgid, gid, 16);
- err = mlx4_MGID_HASH(dev, mailbox, hash);
+ err = mlx4_GID_HASH(dev, mailbox, &hash, op_mod);
mlx4_free_cmd_mailbox(dev, mailbox);
if (err)
return err;
- if (0)
- mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash);
+ if (0) {
+ mlx4_dbg(dev, "Hash for "GID_PRINT_FMT" is %04x\n",
+ GID_PRINT_ARGS(gid), hash);
+ }
- *index = *hash;
+ *index = hash;
*prev = -1;
do {
- err = mlx4_READ_MCG(dev, *index, mgm_mailbox);
+ err = mlx4_READ_ENTRY(dev, *index, mgm_mailbox);
if (err)
return err;
- if (!memcmp(mgm->gid, zero_gid, 16)) {
- if (*index != *hash) {
+ if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) {
+ if (*index != hash) {
mlx4_err(dev, "Found zero MGID in AMGM.\n");
err = -EINVAL;
}
@@ -139,7 +713,7 @@
}
if (!memcmp(mgm->gid, gid, 16) &&
- (prot == be32_to_cpu(mgm->members_count) >> 30))
+ be32_to_cpu(mgm->members_count) >> 30 == prot)
return err;
*prev = *index;
@@ -150,18 +724,300 @@
return err;
}
-int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- int block_mcast_loopback, enum mlx4_mcast_prot prot)
+static const u8 __promisc_mode[] = {
+ [MLX4_FS_REGULAR] = 0x0,
+ [MLX4_FS_ALL_DEFAULT] = 0x1,
+ [MLX4_FS_MC_DEFAULT] = 0x3,
+ [MLX4_FS_UC_SNIFFER] = 0x4,
+ [MLX4_FS_MC_SNIFFER] = 0x5,
+};
+
+int map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
+ enum mlx4_net_trans_promisc_mode flow_type)
{
+ if (flow_type >= MLX4_FS_MODE_NUM || flow_type < 0) {
+ mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type);
+ return -EINVAL;
+ }
+ return __promisc_mode[flow_type];
+}
+EXPORT_SYMBOL_GPL(map_sw_to_hw_steering_mode);
+
+static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl,
+ struct mlx4_net_trans_rule_hw_ctrl *hw)
+{
+ u8 flags = 0;
+
+ flags = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0;
+ flags |= ctrl->exclusive ? (1 << 2) : 0;
+ flags |= ctrl->allow_loopback ? (1 << 3) : 0;
+
+ hw->flags = flags;
+ hw->type = __promisc_mode[ctrl->promisc_mode];
+ hw->prio = cpu_to_be16(ctrl->priority);
+ hw->port = ctrl->port;
+ hw->qpn = cpu_to_be32(ctrl->qpn);
+}
+
+const u16 __sw_id_hw[] = {
+ [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001,
+ [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005,
+ [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003,
+ [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002,
+ [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004,
+ [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006
+};
+
+int map_sw_to_hw_steering_id(struct mlx4_dev *dev,
+ enum mlx4_net_trans_rule_id id)
+{
+ if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) {
+ mlx4_err(dev, "Invalid network rule id. id = %d\n", id);
+ return -EINVAL;
+ }
+ return __sw_id_hw[id];
+}
+EXPORT_SYMBOL_GPL(map_sw_to_hw_steering_id);
+
+static const int __rule_hw_sz[] = {
+ [MLX4_NET_TRANS_RULE_ID_ETH] =
+ sizeof(struct mlx4_net_trans_rule_hw_eth),
+ [MLX4_NET_TRANS_RULE_ID_IB] =
+ sizeof(struct mlx4_net_trans_rule_hw_ib),
+ [MLX4_NET_TRANS_RULE_ID_IPV6] = 0,
+ [MLX4_NET_TRANS_RULE_ID_IPV4] =
+ sizeof(struct mlx4_net_trans_rule_hw_ipv4),
+ [MLX4_NET_TRANS_RULE_ID_TCP] =
+ sizeof(struct mlx4_net_trans_rule_hw_tcp_udp),
+ [MLX4_NET_TRANS_RULE_ID_UDP] =
+ sizeof(struct mlx4_net_trans_rule_hw_tcp_udp)
+};
+
+int hw_rule_sz(struct mlx4_dev *dev,
+ enum mlx4_net_trans_rule_id id)
+{
+ if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) {
+ mlx4_err(dev, "Invalid network rule id. id = %d\n", id);
+ return -EINVAL;
+ }
+
+ return __rule_hw_sz[id];
+}
+EXPORT_SYMBOL_GPL(hw_rule_sz);
+
+static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec,
+ struct _rule_hw *rule_hw)
+{
+ if (hw_rule_sz(dev, spec->id) < 0)
+ return -EINVAL;
+ memset(rule_hw, 0, hw_rule_sz(dev, spec->id));
+ rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]);
+ rule_hw->size = hw_rule_sz(dev, spec->id) >> 2;
+
+ switch (spec->id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN);
+ memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk,
+ ETH_ALEN);
+ memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN);
+ memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk,
+ ETH_ALEN);
+ if (spec->eth.ether_type_enable) {
+ rule_hw->eth.ether_type_enable = 1;
+ rule_hw->eth.ether_type = spec->eth.ether_type;
+ }
+ rule_hw->eth.vlan_tag = spec->eth.vlan_id;
+ rule_hw->eth.vlan_tag_msk = spec->eth.vlan_id_msk;
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ rule_hw->ib.l3_qpn = spec->ib.l3_qpn;
+ rule_hw->ib.qpn_mask = spec->ib.qpn_msk;
+ memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16);
+ memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV6:
+ return -EOPNOTSUPP;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ rule_hw->ipv4.src_ip = spec->ipv4.src_ip;
+ rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk;
+ rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip;
+ rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk;
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port;
+ rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk;
+ rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port;
+ rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return __rule_hw_sz[spec->id];
+}
+
+static void mlx4_err_rule(struct mlx4_dev *dev, char *str,
+ struct mlx4_net_trans_rule *rule)
+{
+#define BUF_SIZE 256
+ struct mlx4_spec_list *cur;
+ char buf[BUF_SIZE];
+ int len = 0;
+
+ mlx4_err(dev, "%s", str);
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "port = %d prio = 0x%x qp = 0x%x ",
+ rule->port, rule->priority, rule->qpn);
+
+ list_for_each_entry(cur, &rule->list, list) {
+ switch (cur->id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dmac = %pM ", &cur->eth.dst_mac);
+ if (cur->eth.ether_type)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "ethertype = 0x%x ",
+ be16_to_cpu(cur->eth.ether_type));
+ if (cur->eth.vlan_id)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "vlan-id = %d ",
+ be16_to_cpu(cur->eth.vlan_id));
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ if (cur->ipv4.src_ip)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "src-ip = %pI4 ",
+ &cur->ipv4.src_ip);
+ if (cur->ipv4.dst_ip)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-ip = %pI4 ",
+ &cur->ipv4.dst_ip);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ if (cur->tcp_udp.src_port)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "src-port = %d ",
+ be16_to_cpu(cur->tcp_udp.src_port));
+ if (cur->tcp_udp.dst_port)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-port = %d ",
+ be16_to_cpu(cur->tcp_udp.dst_port));
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-gid = "GID_PRINT_FMT"\n",
+ GID_PRINT_ARGS(cur->ib.dst_gid));
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-gid-mask = "GID_PRINT_FMT"\n",
+ GID_PRINT_ARGS(cur->ib.dst_gid_msk));
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV6:
+ break;
+
+ default:
+ break;
+ }
+ }
+ len += snprintf(buf + len, BUF_SIZE - len, "\n");
+ mlx4_err(dev, "%s", buf);
+
+ if (len >= BUF_SIZE)
+ mlx4_err(dev, "Network rule error message was truncated, print buffer is too small.\n");
+}
+
+int mlx4_flow_attach(struct mlx4_dev *dev,
+ struct mlx4_net_trans_rule *rule, u64 *reg_id)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_spec_list *cur;
+ u32 size = 0;
+ int ret;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memset(mailbox->buf, 0, sizeof(struct mlx4_net_trans_rule_hw_ctrl));
+ trans_rule_ctrl_to_hw(rule, mailbox->buf);
+
+ size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+
+ list_for_each_entry(cur, &rule->list, list) {
+ ret = parse_trans_rule(dev, cur, mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return -EINVAL;
+ }
+ size += ret;
+ }
+
+ ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id);
+ if (ret == -ENOMEM)
+ mlx4_err_rule(dev,
+ "mcg table is full. Fail to register network rule.\n",
+ rule);
+ else if (ret)
+ mlx4_err_rule(dev, "Fail to register network rule.\n", rule);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_attach);
+
+int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id)
+{
+ int err;
+
+ err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id);
+ if (err)
+ mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n",
+ (unsigned long long)reg_id);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_detach);
+
+int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn)
+{
+ int err;
+ u64 in_param;
+
+ in_param = ((u64) min_range_qpn) << 32;
+ in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF;
+
+ err = mlx4_cmd(dev, in_param, 0, 0,
+ MLX4_FLOW_STEERING_IB_UC_QP_RANGE,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE);
+
+int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer)
+{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mgm *mgm;
u32 members_count;
- u16 hash;
int index, prev;
int link = 0;
int i;
int err;
+ u8 port = gid[5];
+ u8 new_entry = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -169,14 +1025,16 @@
mgm = mailbox->buf;
mutex_lock(&priv->mcg_table.mutex);
-
- err = find_mgm(dev, gid, prot, mailbox, &hash, &prev, &index);
+ err = find_entry(dev, port, gid, prot,
+ mailbox, &prev, &index);
if (err)
goto out;
if (index != -1) {
- if (!memcmp(mgm->gid, zero_gid, 16))
+ if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) {
+ new_entry = 1;
memcpy(mgm->gid, gid, 16);
+ }
} else {
link = 1;
@@ -188,12 +1046,13 @@
}
index += dev->caps.num_mgms;
+ new_entry = 1;
memset(mgm, 0, sizeof *mgm);
memcpy(mgm->gid, gid, 16);
}
members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
- if (members_count == MLX4_QP_PER_MGM) {
+ if (members_count == dev->caps.num_qp_per_mgm) {
mlx4_err(dev, "MGM at index %x is full.\n", index);
err = -ENOMEM;
goto out;
@@ -209,25 +1068,36 @@
mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) |
(!!mlx4_blck_lb << MGM_BLCK_LB_BIT));
- mgm->members_count = cpu_to_be32(members_count | ((u32) prot << 30));
+ mgm->members_count = cpu_to_be32(members_count | (u32) prot << 30);
- err = mlx4_WRITE_MCG(dev, index, mailbox);
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
if (err)
goto out;
+ /* if !link, still add the new entry. */
if (!link)
- goto out;
+ goto skip_link;
- err = mlx4_READ_MCG(dev, prev, mailbox);
+ err = mlx4_READ_ENTRY(dev, prev, mailbox);
if (err)
goto out;
mgm->next_gid_index = cpu_to_be32(index << 6);
- err = mlx4_WRITE_MCG(dev, prev, mailbox);
+ err = mlx4_WRITE_ENTRY(dev, prev, mailbox);
if (err)
goto out;
+skip_link:
+ if (prot == MLX4_PROT_ETH) {
+ /* manage the steering entry for promisc mode */
+ if (new_entry)
+ new_steering_entry(dev, port, steer, index, qp->qpn);
+ else
+ existing_steering_entry(dev, port, steer,
+ index, qp->qpn);
+ }
+
out:
if (err && link && index != -1) {
if (index < dev->caps.num_mgms)
@@ -235,7 +1105,7 @@
index, dev->caps.num_mgms);
else
mlx4_bitmap_free(&priv->mcg_table.bitmap,
- index - dev->caps.num_mgms);
+ index - dev->caps.num_mgms, MLX4_USE_RR);
}
mutex_unlock(&priv->mcg_table.mutex);
@@ -242,19 +1112,19 @@
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
-EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
-int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- enum mlx4_mcast_prot prot)
+int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, enum mlx4_steer_type steer)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mgm *mgm;
u32 members_count;
- u16 hash;
int prev, index;
- int i, loc;
+ int i, loc = -1;
int err;
+ u8 port = gid[5];
+ bool removed_entry = false;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -263,20 +1133,33 @@
mutex_lock(&priv->mcg_table.mutex);
- err = find_mgm(dev, gid, prot, mailbox, &hash, &prev, &index);
+ err = find_entry(dev, port, gid, prot,
+ mailbox, &prev, &index);
if (err)
goto out;
if (index == -1) {
- mlx4_err(dev, "MGID %pI6 not found\n", gid);
+ mlx4_err(dev, "MGID "GID_PRINT_FMT" not found\n",
+ GID_PRINT_ARGS(gid));
err = -EINVAL;
goto out;
}
+ /*
+ if this QP is also a promisc QP, it shouldn't be removed only if
+ at least one none promisc QP is also attached to this MCG
+ */
+ if (prot == MLX4_PROT_ETH &&
+ check_duplicate_entry(dev, port, steer, index, qp->qpn) &&
+ !promisc_steering_entry(dev, port, steer, index, qp->qpn, NULL))
+ goto out;
+
members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
- for (loc = -1, i = 0; i < members_count; ++i)
- if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn)
+ for (i = 0; i < members_count; ++i)
+ if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) {
loc = i;
+ break;
+ }
if (loc == -1) {
mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn);
@@ -284,27 +1167,33 @@
goto out;
}
+ /* copy the last QP in this MGM over removed QP */
+ mgm->qp[loc] = mgm->qp[members_count - 1];
+ mgm->qp[members_count - 1] = 0;
+ mgm->members_count = cpu_to_be32(--members_count | (u32) prot << 30);
- mgm->members_count = cpu_to_be32(--members_count | ((u32) prot << 30));
- mgm->qp[loc] = mgm->qp[i - 1];
- mgm->qp[i - 1] = 0;
-
- if (i != 1) {
- err = mlx4_WRITE_MCG(dev, index, mailbox);
+ if (prot == MLX4_PROT_ETH)
+ removed_entry = can_remove_steering_entry(dev, port, steer,
+ index, qp->qpn);
+ if (members_count && (prot != MLX4_PROT_ETH || !removed_entry)) {
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
goto out;
}
+ /* We are going to delete the entry, members count should be 0 */
+ mgm->members_count = cpu_to_be32((u32) prot << 30);
+
if (prev == -1) {
/* Remove entry from MGM */
int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6;
if (amgm_index) {
- err = mlx4_READ_MCG(dev, amgm_index, mailbox);
+ err = mlx4_READ_ENTRY(dev, amgm_index, mailbox);
if (err)
goto out;
} else
memset(mgm->gid, 0, 16);
- err = mlx4_WRITE_MCG(dev, index, mailbox);
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
if (err)
goto out;
@@ -314,18 +1203,18 @@
index, amgm_index, dev->caps.num_mgms);
else
mlx4_bitmap_free(&priv->mcg_table.bitmap,
- amgm_index - dev->caps.num_mgms);
+ amgm_index - dev->caps.num_mgms, MLX4_USE_RR);
}
} else {
/* Remove entry from AMGM */
int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
- err = mlx4_READ_MCG(dev, prev, mailbox);
+ err = mlx4_READ_ENTRY(dev, prev, mailbox);
if (err)
goto out;
mgm->next_gid_index = cpu_to_be32(cur_next_index << 6);
- err = mlx4_WRITE_MCG(dev, prev, mailbox);
+ err = mlx4_WRITE_ENTRY(dev, prev, mailbox);
if (err)
goto out;
@@ -334,7 +1223,7 @@
prev, index, dev->caps.num_mgms);
else
mlx4_bitmap_free(&priv->mcg_table.bitmap,
- index - dev->caps.num_mgms);
+ index - dev->caps.num_mgms, MLX4_USE_RR);
}
out:
@@ -343,13 +1232,299 @@
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
+
+static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], u8 attach, u8 block_loopback,
+ enum mlx4_protocol prot)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err = 0;
+ int qpn;
+
+ if (!mlx4_is_mfunc(dev))
+ return -EBADF;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memcpy(mailbox->buf, gid, 16);
+ qpn = qp->qpn;
+ qpn |= (prot << 28);
+ if (attach && block_loopback)
+ qpn |= (1 << 31);
+
+ err = mlx4_cmd(dev, mailbox->dma, qpn, attach,
+ MLX4_CMD_QP_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], u8 port,
+ int block_mcast_loopback,
+ enum mlx4_protocol prot, u64 *reg_id)
+{
+ struct mlx4_spec_list spec = { {NULL} };
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .promisc_mode = MLX4_FS_REGULAR,
+ .priority = MLX4_DOMAIN_NIC,
+ };
+
+ rule.allow_loopback = !block_mcast_loopback;
+ rule.port = port;
+ rule.qpn = qp->qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ switch (prot) {
+ case MLX4_PROT_ETH:
+ spec.id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN);
+ memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+ break;
+
+ case MLX4_PROT_IB_IPV6:
+ spec.id = MLX4_NET_TRANS_RULE_ID_IB;
+ memcpy(spec.ib.dst_gid, gid, 16);
+ memset(&spec.ib.dst_gid_msk, 0xff, 16);
+ break;
+ default:
+ return -EINVAL;
+ }
+ list_add_tail(&spec.list, &rule.list);
+
+ return mlx4_flow_attach(dev, &rule, reg_id);
+}
+
+int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ u8 port, int block_mcast_loopback,
+ enum mlx4_protocol prot, u64 *reg_id)
+{
+ enum mlx4_steer_type steer;
+ steer = (is_valid_ether_addr(&gid[10])) ? MLX4_UC_STEER : MLX4_MC_STEER;
+
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ if (prot == MLX4_PROT_ETH)
+ return 0;
+
+ case MLX4_STEERING_MODE_B0:
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (steer << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 1,
+ block_mcast_loopback, prot);
+ return mlx4_qp_attach_common(dev, qp, gid,
+ block_mcast_loopback, prot,
+ MLX4_MC_STEER);
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return mlx4_trans_to_dmfs_attach(dev, qp, gid, port,
+ block_mcast_loopback,
+ prot, reg_id);
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
+
+int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, u64 reg_id)
+{
+ enum mlx4_steer_type steer;
+ steer = (is_valid_ether_addr(&gid[10])) ? MLX4_UC_STEER : MLX4_MC_STEER;
+
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ if (prot == MLX4_PROT_ETH)
+ return 0;
+
+ case MLX4_STEERING_MODE_B0:
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (steer << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot);
+
+ return mlx4_qp_detach_common(dev, qp, gid, prot,
+ MLX4_MC_STEER);
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return mlx4_flow_detach(dev, reg_id);
+
+ default:
+ return -EINVAL;
+ }
+}
EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
+int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port,
+ u32 qpn, enum mlx4_net_trans_promisc_mode mode)
+{
+ struct mlx4_net_trans_rule rule;
+ u64 *regid_p;
+
+ switch (mode) {
+ case MLX4_FS_ALL_DEFAULT:
+ regid_p = &dev->regid_promisc_array[port];
+ break;
+ case MLX4_FS_MC_DEFAULT:
+ regid_p = &dev->regid_allmulti_array[port];
+ break;
+ default:
+ return -1;
+ }
+
+ if (*regid_p != 0)
+ return -1;
+
+ rule.promisc_mode = mode;
+ rule.port = port;
+ rule.qpn = qpn;
+ INIT_LIST_HEAD(&rule.list);
+ mlx4_err(dev, "going promisc on %x\n", port);
+
+ return mlx4_flow_attach(dev, &rule, regid_p);
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add);
+
+int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
+ enum mlx4_net_trans_promisc_mode mode)
+{
+ int ret;
+ u64 *regid_p;
+
+ switch (mode) {
+ case MLX4_FS_ALL_DEFAULT:
+ regid_p = &dev->regid_promisc_array[port];
+ break;
+ case MLX4_FS_MC_DEFAULT:
+ regid_p = &dev->regid_allmulti_array[port];
+ break;
+ default:
+ return -1;
+ }
+
+ if (*regid_p == 0)
+ return -1;
+
+ ret = mlx4_flow_detach(dev, *regid_p);
+ if (ret == 0)
+ *regid_p = 0;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove);
+
+int mlx4_unicast_attach(struct mlx4_dev *dev,
+ struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot)
+{
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_UC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 1,
+ block_mcast_loopback, prot);
+
+ return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback,
+ prot, MLX4_UC_STEER);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_attach);
+
+int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], enum mlx4_protocol prot)
+{
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_UC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot);
+
+ return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_UC_STEER);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_detach);
+
+int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u32 qpn = (u32) vhcr->in_param & 0xffffffff;
+ u8 port = vhcr->in_param >> 62;
+ enum mlx4_steer_type steer = vhcr->in_modifier;
+
+ /* Promiscuous unicast is not allowed in mfunc for VFs */
+ if ((slave != dev->caps.function) && (steer == MLX4_UC_STEER))
+ return 0;
+
+ if (vhcr->op_modifier)
+ return add_promisc_qp(dev, port, steer, qpn);
+ else
+ return remove_promisc_qp(dev, port, steer, qpn);
+}
+
+static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn,
+ enum mlx4_steer_type steer, u8 add, u8 port)
+{
+ return mlx4_cmd(dev, (u64) qpn | (u64) port << 62, (u32) steer, add,
+ MLX4_CMD_PROMISC, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+}
+
+int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port);
+
+ return add_promisc_qp(dev, port, MLX4_MC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add);
+
+int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port);
+
+ return remove_promisc_qp(dev, port, MLX4_MC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove);
+
+int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port);
+
+ return add_promisc_qp(dev, port, MLX4_UC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add);
+
+int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port);
+
+ return remove_promisc_qp(dev, port, MLX4_UC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_remove);
+
int mlx4_init_mcg_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
+ /* No need for mcg_table when fw managed the mcg table*/
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return 0;
err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms,
dev->caps.num_amgms - 1, 0, 0);
if (err)
@@ -362,5 +1537,7 @@
void mlx4_cleanup_mcg_table(struct mlx4_dev *dev)
{
- mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap);
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap);
}
Modified: trunk/sys/ofed/drivers/net/mlx4/mlx4.h
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/mlx4.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/mlx4.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,7 +2,7 @@
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -39,28 +39,60 @@
#include <linux/mutex.h>
#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
#include <linux/timer.h>
+#include <linux/semaphore.h>
#include <linux/workqueue.h>
-
+#include <linux/device.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/cmd.h>
#define DRV_NAME "mlx4_core"
#define PFX DRV_NAME ": "
-#define DRV_VERSION "1.0-ofed1.5.2"
-#define DRV_RELDATE "August 4, 2010"
+#define DRV_VERSION "2.1.6"
+#define DRV_RELDATE __DATE__
+#define DRV_STACK_NAME "Linux-MLNX_OFED"
+#define DRV_STACK_VERSION "2.1"
+#define DRV_NAME_FOR_FW DRV_STACK_NAME","DRV_STACK_VERSION
+
+#define MLX4_FS_UDP_UC_EN (1 << 1)
+#define MLX4_FS_TCP_UC_EN (1 << 2)
+#define MLX4_FS_NUM_OF_L2_ADDR 8
+#define MLX4_FS_MGM_LOG_ENTRY_SIZE 7
+#define MLX4_FS_NUM_MCG (1 << 17)
+
+struct mlx4_set_port_prio2tc_context {
+ u8 prio2tc[4];
+};
+
+struct mlx4_port_scheduler_tc_cfg_be {
+ __be16 pg;
+ __be16 bw_precentage;
+ __be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */
+ __be16 max_bw_value;
+};
+
+struct mlx4_set_port_scheduler_context {
+ struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC];
+};
+
enum {
MLX4_HCR_BASE = 0x80680,
MLX4_HCR_SIZE = 0x0001c,
- MLX4_CLR_INT_SIZE = 0x00008
+ MLX4_CLR_INT_SIZE = 0x00008,
+ MLX4_SLAVE_COMM_BASE = 0x0,
+ MLX4_COMM_PAGESIZE = 0x1000,
+ MLX4_CLOCK_SIZE = 0x00008
};
enum {
- MLX4_MGM_ENTRY_SIZE = 0x100,
- MLX4_QP_PER_MGM = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2),
- MLX4_MTT_ENTRY_PER_SEG = 8
+ MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10,
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7,
+ MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12,
+ MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE)/16 - 2),
};
enum {
@@ -80,6 +112,107 @@
MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT
};
+enum mlx4_mpt_state {
+ MLX4_MPT_DISABLED = 0,
+ MLX4_MPT_EN_HW,
+ MLX4_MPT_EN_SW
+};
+
+#define MLX4_COMM_TIME 10000
+enum {
+ MLX4_COMM_CMD_RESET,
+ MLX4_COMM_CMD_VHCR0,
+ MLX4_COMM_CMD_VHCR1,
+ MLX4_COMM_CMD_VHCR2,
+ MLX4_COMM_CMD_VHCR_EN,
+ MLX4_COMM_CMD_VHCR_POST,
+ MLX4_COMM_CMD_FLR = 254
+};
+
+/*The flag indicates that the slave should delay the RESET cmd*/
+#define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
+/*indicates how many retries will be done if we are in the middle of FLR*/
+#define NUM_OF_RESET_RETRIES 10
+#define SLEEP_TIME_IN_RESET (2 * 1000)
+enum mlx4_resource {
+ RES_QP,
+ RES_CQ,
+ RES_SRQ,
+ RES_XRCD,
+ RES_MPT,
+ RES_MTT,
+ RES_MAC,
+ RES_VLAN,
+ RES_NPORT_ID,
+ RES_COUNTER,
+ RES_FS_RULE,
+ RES_EQ,
+ MLX4_NUM_OF_RESOURCE_TYPE
+};
+
+enum mlx4_alloc_mode {
+ RES_OP_RESERVE,
+ RES_OP_RESERVE_AND_MAP,
+ RES_OP_MAP_ICM,
+};
+
+enum mlx4_res_tracker_free_type {
+ RES_TR_FREE_ALL,
+ RES_TR_FREE_SLAVES_ONLY,
+ RES_TR_FREE_STRUCTS_ONLY,
+};
+
+/*
+ *Virtual HCR structures.
+ * mlx4_vhcr is the sw representation, in machine endianess
+ *
+ * mlx4_vhcr_cmd is the formalized structure, the one that is passed
+ * to FW to go through communication channel.
+ * It is big endian, and has the same structure as the physical HCR
+ * used by command interface
+ */
+struct mlx4_vhcr {
+ u64 in_param;
+ u64 out_param;
+ u32 in_modifier;
+ u32 errno;
+ u16 op;
+ u16 token;
+ u8 op_modifier;
+ u8 e_bit;
+};
+
+struct mlx4_vhcr_cmd {
+ __be64 in_param;
+ __be32 in_modifier;
+ u32 reserved1;
+ __be64 out_param;
+ __be16 token;
+ u16 reserved;
+ u8 status;
+ u8 flags;
+ __be16 opcode;
+} __packed;
+
+struct mlx4_cmd_info {
+ u16 opcode;
+ bool has_inbox;
+ bool has_outbox;
+ bool out_is_imm;
+ bool encode_slave_id;
+ bool skip_err_print;
+ int (*verify)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox);
+ int (*wrapper)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+};
+
+enum {
+ MLX4_DEBUG_MASK_CMD_TIME = 0x100,
+};
+
#ifdef CONFIG_MLX4_DEBUG
extern int mlx4_debug_level;
#else /* CONFIG_MLX4_DEBUG */
@@ -87,20 +220,26 @@
#endif /* CONFIG_MLX4_DEBUG */
#define mlx4_dbg(mdev, format, arg...) \
- do { \
- if (mlx4_debug_level) \
- dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
- } while (0)
+do { \
+ if (mlx4_debug_level) \
+ dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ##arg); \
+} while (0)
#define mlx4_err(mdev, format, arg...) \
- dev_err(&mdev->pdev->dev, format, ## arg)
+ dev_err(&mdev->pdev->dev, format, ##arg)
#define mlx4_info(mdev, format, arg...) \
- dev_info(&mdev->pdev->dev, format, ## arg)
+ dev_info(&mdev->pdev->dev, format, ##arg)
#define mlx4_warn(mdev, format, arg...) \
- dev_warn(&mdev->pdev->dev, format, ## arg)
+ dev_warn(&mdev->pdev->dev, format, ##arg)
+extern int mlx4_log_num_mgm_entry_size;
+extern int log_mtts_per_seg;
extern int mlx4_blck_lb;
+extern int mlx4_set_4k_mtu;
+#define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF)
+#define ALL_SLAVES 0xff
+
struct mlx4_bitmap {
u32 last;
u32 top;
@@ -115,7 +254,7 @@
struct mlx4_buddy {
unsigned long **bits;
unsigned int *num_free;
- int max_order;
+ u32 max_order;
spinlock_t lock;
};
@@ -124,7 +263,7 @@
struct mlx4_icm_table {
u64 virt;
int num_icm;
- int num_obj;
+ u32 num_obj;
int obj_size;
int lowmem;
int coherent;
@@ -132,6 +271,107 @@
struct mlx4_icm **icm;
};
+#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
+#define MLX4_MPT_FLAG_FREE (0x3UL << 28)
+#define MLX4_MPT_FLAG_MIO (1 << 17)
+#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15)
+#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
+#define MLX4_MPT_FLAG_REGION (1 << 8)
+
+#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27)
+#define MLX4_MPT_PD_FLAG_RAE (1 << 28)
+#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
+
+#define MLX4_MPT_QP_FLAG_BOUND_QP (1 << 7)
+
+#define MLX4_MPT_STATUS_SW 0xF0
+#define MLX4_MPT_STATUS_HW 0x00
+
+/*
+ * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
+ */
+struct mlx4_mpt_entry {
+ __be32 flags;
+ __be32 qpn;
+ __be32 key;
+ __be32 pd_flags;
+ __be64 start;
+ __be64 length;
+ __be32 lkey;
+ __be32 win_cnt;
+ u8 reserved1[3];
+ u8 mtt_rep;
+ __be64 mtt_addr;
+ __be32 mtt_sz;
+ __be32 entity_size;
+ __be32 first_byte_offset;
+} __packed;
+
+/*
+ * Must be packed because start is 64 bits but only aligned to 32 bits.
+ */
+struct mlx4_eq_context {
+ __be32 flags;
+ u16 reserved1[3];
+ __be16 page_offset;
+ u8 log_eq_size;
+ u8 reserved2[4];
+ u8 eq_period;
+ u8 reserved3;
+ u8 eq_max_count;
+ u8 reserved4[3];
+ u8 intr;
+ u8 log_page_size;
+ u8 reserved5[2];
+ u8 mtt_base_addr_h;
+ __be32 mtt_base_addr_l;
+ u32 reserved6[2];
+ __be32 consumer_index;
+ __be32 producer_index;
+ u32 reserved7[4];
+};
+
+struct mlx4_cq_context {
+ __be32 flags;
+ u16 reserved1[3];
+ __be16 page_offset;
+ __be32 logsize_usrpage;
+ __be16 cq_period;
+ __be16 cq_max_count;
+ u8 reserved2[3];
+ u8 comp_eqn;
+ u8 log_page_size;
+ u8 reserved3[2];
+ u8 mtt_base_addr_h;
+ __be32 mtt_base_addr_l;
+ __be32 last_notified_index;
+ __be32 solicit_producer_index;
+ __be32 consumer_index;
+ __be32 producer_index;
+ u32 reserved4[2];
+ __be64 db_rec_addr;
+};
+
+struct mlx4_srq_context {
+ __be32 state_logsize_srqn;
+ u8 logstride;
+ u8 reserved1;
+ __be16 xrcd;
+ __be32 pg_offset_cqn;
+ u32 reserved2;
+ u8 log_page_size;
+ u8 reserved3[2];
+ u8 mtt_base_addr_h;
+ __be32 mtt_base_addr_l;
+ __be32 pd;
+ __be16 limit_watermark;
+ __be16 wqe_cnt;
+ u16 reserved4;
+ __be16 wqe_counter;
+ u32 reserved5;
+ __be64 db_rec_addr;
+};
+
struct mlx4_eq {
struct mlx4_dev *dev;
void __iomem *doorbell;
@@ -140,11 +380,21 @@
u16 irq;
u16 have_irq;
int nent;
- int load;
struct mlx4_buf_list *page_list;
struct mlx4_mtt mtt;
};
+struct mlx4_slave_eqe {
+ u8 type;
+ u8 port;
+ u32 param;
+};
+
+struct mlx4_slave_event_eq_info {
+ int eqn;
+ u16 token;
+};
+
struct mlx4_profile {
int num_qp;
int rdmarc_per_qp;
@@ -152,12 +402,14 @@
int num_cq;
int num_mcg;
int num_mpt;
- int num_mtt;
+ unsigned num_mtt_segs;
};
struct mlx4_fw {
u64 clr_int_base;
u64 catas_offset;
+ u64 comm_base;
+ u64 clock_offset;
struct mlx4_icm *fw_icm;
struct mlx4_icm *aux_icm;
u32 catas_size;
@@ -164,12 +416,184 @@
u16 fw_pages;
u8 clr_int_bar;
u8 catas_bar;
+ u8 comm_bar;
+ u8 clock_bar;
};
+struct mlx4_comm {
+ u32 slave_write;
+ u32 slave_read;
+};
+
+enum {
+ MLX4_MCAST_CONFIG = 0,
+ MLX4_MCAST_DISABLE = 1,
+ MLX4_MCAST_ENABLE = 2,
+};
+
+#define VLAN_FLTR_SIZE 128
+
+struct mlx4_vlan_fltr {
+ __be32 entry[VLAN_FLTR_SIZE];
+};
+
+struct mlx4_mcast_entry {
+ struct list_head list;
+ u64 addr;
+};
+
+struct mlx4_promisc_qp {
+ struct list_head list;
+ u32 qpn;
+};
+
+struct mlx4_steer_index {
+ struct list_head list;
+ unsigned int index;
+ struct list_head duplicates;
+};
+
+#define MLX4_EVENT_TYPES_NUM 64
+
+struct mlx4_slave_state {
+ u8 comm_toggle;
+ u8 last_cmd;
+ u8 init_port_mask;
+ bool active;
+ bool old_vlan_api;
+ u8 function;
+ dma_addr_t vhcr_dma;
+ u16 mtu[MLX4_MAX_PORTS + 1];
+ __be32 ib_cap_mask[MLX4_MAX_PORTS + 1];
+ struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES];
+ struct list_head mcast_filters[MLX4_MAX_PORTS + 1];
+ struct mlx4_vlan_fltr *vlan_filter[MLX4_MAX_PORTS + 1];
+ /* event type to eq number lookup */
+ struct mlx4_slave_event_eq_info event_eq[MLX4_EVENT_TYPES_NUM];
+ u16 eq_pi;
+ u16 eq_ci;
+ spinlock_t lock;
+ /*initialized via the kzalloc*/
+ u8 is_slave_going_down;
+ u32 cookie;
+ enum slave_port_state port_state[MLX4_MAX_PORTS + 1];
+};
+
+#define MLX4_VGT 4095
+#define NO_INDX (-1)
+
+
+struct mlx4_vport_state {
+ u64 mac;
+ u16 default_vlan;
+ u8 default_qos;
+ u32 tx_rate;
+ bool spoofchk;
+ u32 link_state;
+};
+
+struct mlx4_vf_admin_state {
+ struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1];
+};
+
+struct mlx4_vport_oper_state {
+ struct mlx4_vport_state state;
+ int mac_idx;
+ int vlan_idx;
+};
+struct mlx4_vf_oper_state {
+ struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1];
+};
+
+struct slave_list {
+ struct mutex mutex;
+ struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE];
+};
+
+struct resource_allocator {
+ spinlock_t alloc_lock;
+ union {
+ int res_reserved;
+ int res_port_rsvd[MLX4_MAX_PORTS];
+ };
+ union {
+ int res_free;
+ int res_port_free[MLX4_MAX_PORTS];
+ };
+ int *quota;
+ int *allocated;
+ int *guaranteed;
+};
+
+struct mlx4_resource_tracker {
+ spinlock_t lock;
+ /* tree for each resources */
+ struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
+ /* num_of_slave's lists, one per slave */
+ struct slave_list *slave_list;
+ struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE];
+};
+
+#define SLAVE_EVENT_EQ_SIZE 128
+struct mlx4_slave_event_eq {
+ u32 eqn;
+ u32 cons;
+ u32 prod;
+ spinlock_t event_lock;
+ struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE];
+};
+
+struct mlx4_master_qp0_state {
+ int proxy_qp0_active;
+ int qp0_active;
+ int port_active;
+};
+
+struct mlx4_mfunc_master_ctx {
+ struct mlx4_slave_state *slave_state;
+ struct mlx4_vf_admin_state *vf_admin;
+ struct mlx4_vf_oper_state *vf_oper;
+ struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1];
+ int init_port_ref[MLX4_MAX_PORTS + 1];
+ u16 max_mtu[MLX4_MAX_PORTS + 1];
+ int disable_mcast_ref[MLX4_MAX_PORTS + 1];
+ struct mlx4_resource_tracker res_tracker;
+ struct workqueue_struct *comm_wq;
+ struct work_struct comm_work;
+ struct work_struct arm_comm_work;
+ struct work_struct slave_event_work;
+ struct work_struct slave_flr_event_work;
+ spinlock_t slave_state_lock;
+ __be32 comm_arm_bit_vector[4];
+ struct mlx4_eqe cmd_eqe;
+ struct mlx4_slave_event_eq slave_eq;
+ struct mutex gen_eqe_mutex[MLX4_MFUNC_MAX];
+};
+
+struct mlx4_mfunc {
+ struct mlx4_comm __iomem *comm;
+ struct mlx4_vhcr_cmd *vhcr;
+ dma_addr_t vhcr_dma;
+
+ struct mlx4_mfunc_master_ctx master;
+};
+
+#define MGM_QPN_MASK 0x00FFFFFF
+#define MGM_BLCK_LB_BIT 30
+
+struct mlx4_mgm {
+ __be32 next_gid_index;
+ __be32 members_count;
+ u32 reserved[2];
+ u8 gid[16];
+ __be32 qp[MLX4_MAX_QP_PER_MGM];
+};
+
struct mlx4_cmd {
struct pci_pool *pool;
void __iomem *hcr;
struct mutex hcr_mutex;
+ struct mutex slave_cmd_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
int max_cmds;
@@ -179,8 +603,27 @@
u16 token_mask;
u8 use_events;
u8 toggle;
+ u8 comm_toggle;
};
+enum {
+ MLX4_VF_IMMED_VLAN_FLAG_VLAN = 1 << 0,
+ MLX4_VF_IMMED_VLAN_FLAG_QOS = 1 << 1,
+};
+struct mlx4_vf_immed_vlan_work {
+ struct work_struct work;
+ struct mlx4_priv *priv;
+ int flags;
+ int slave;
+ int vlan_ix;
+ int orig_vlan_ix;
+ u8 port;
+ u8 qos;
+ u16 vlan_id;
+ u16 orig_vlan_id;
+};
+
+
struct mlx4_uar_table {
struct mlx4_bitmap bitmap;
};
@@ -197,6 +640,7 @@
struct mlx4_cq_table {
struct mlx4_bitmap bitmap;
spinlock_t lock;
+ rwlock_t cq_table_lock;
struct radix_tree_root tree;
struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
@@ -218,6 +662,7 @@
struct mlx4_srq_table {
struct mlx4_bitmap bitmap;
spinlock_t lock;
+ struct radix_tree_root tree;
struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
};
@@ -268,6 +713,55 @@
int max;
};
+#define SET_PORT_GEN_ALL_VALID 0x7
+#define SET_PORT_PROMISC_SHIFT 31
+#define SET_PORT_MC_PROMISC_SHIFT 30
+
+enum {
+ MCAST_DIRECT_ONLY = 0,
+ MCAST_DIRECT = 1,
+ MCAST_DEFAULT = 2
+};
+
+
+struct mlx4_set_port_general_context {
+ u8 reserved[3];
+ u8 flags;
+ u16 reserved2;
+ __be16 mtu;
+ u8 pptx;
+ u8 pfctx;
+ u16 reserved3;
+ u8 pprx;
+ u8 pfcrx;
+ u16 reserved4;
+};
+
+struct mlx4_set_port_rqp_calc_context {
+ __be32 base_qpn;
+ u8 rererved;
+ u8 n_mac;
+ u8 n_vlan;
+ u8 n_prio;
+ u8 reserved2[3];
+ u8 mac_miss;
+ u8 intra_no_vlan;
+ u8 no_vlan;
+ u8 intra_vlan_miss;
+ u8 vlan_miss;
+ u8 reserved3[3];
+ u8 no_vlan_prio;
+ __be32 promisc;
+ __be32 mcast;
+};
+
+struct mlx4_hca_info {
+ struct mlx4_dev *dev;
+ struct device_attribute firmware_attr;
+ struct device_attribute hca_attr;
+ struct device_attribute board_attr;
+};
+
struct mlx4_port_info {
struct mlx4_dev *dev;
int port;
@@ -274,8 +768,11 @@
char dev_name[16];
struct device_attribute port_attr;
enum mlx4_port_type tmp_type;
+ char dev_mtu_name[16];
+ struct device_attribute port_mtu_attr;
struct mlx4_mac_table mac_table;
struct mlx4_vlan_table vlan_table;
+ int base_qpn;
};
struct mlx4_sense {
@@ -283,12 +780,44 @@
u8 do_sense_port[MLX4_MAX_PORTS + 1];
u8 sense_allowed[MLX4_MAX_PORTS + 1];
struct delayed_work sense_poll;
- struct workqueue_struct *sense_wq;
- u32 resched;
};
-extern struct mutex drv_mutex;
+struct mlx4_msix_ctl {
+ u64 pool_bm;
+ struct mutex pool_lock;
+};
+struct mlx4_steer {
+ struct list_head promisc_qps[MLX4_NUM_STEERS];
+ struct list_head steer_entries[MLX4_NUM_STEERS];
+};
+
+enum {
+ MLX4_PCI_DEV_IS_VF = 1 << 0,
+ MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1,
+};
+
+struct mlx4_roce_gid_entry {
+ u8 raw[16];
+};
+
+struct counter_index {
+ struct list_head list;
+ u32 index;
+};
+
+struct mlx4_counters {
+ struct mlx4_bitmap bitmap;
+ struct list_head global_port_list[MLX4_MAX_PORTS];
+ struct list_head vf_list[MLX4_MAX_NUM_VF][MLX4_MAX_PORTS];
+ struct mutex mutex;
+};
+
+enum {
+ MLX4_NO_RR = 0,
+ MLX4_USE_RR = 1,
+};
+
struct mlx4_priv {
struct mlx4_dev dev;
@@ -296,11 +825,14 @@
struct list_head ctx_list;
spinlock_t ctx_lock;
+ int pci_dev_data;
+
struct list_head pgdir_list;
struct mutex pgdir_mutex;
struct mlx4_fw fw;
struct mlx4_cmd cmd;
+ struct mlx4_mfunc mfunc;
struct mlx4_bitmap pd_bitmap;
struct mlx4_bitmap xrcd_bitmap;
@@ -311,9 +843,7 @@
struct mlx4_srq_table srq_table;
struct mlx4_qp_table qp_table;
struct mlx4_mcg_table mcg_table;
- struct mlx4_bitmap counters_bitmap;
- struct list_head bf_list;
- struct mutex bf_mutex;
+ struct mlx4_counters counters_table;
struct mlx4_catas_err catas_err;
@@ -322,13 +852,22 @@
struct mlx4_uar driver_uar;
void __iomem *kar;
struct mlx4_port_info port[MLX4_MAX_PORTS + 1];
- struct device_attribute trigger_attr;
- int trig;
- int changed_ports;
+ struct mlx4_hca_info hca_info;
struct mlx4_sense sense;
struct mutex port_mutex;
- int iboe_counter_index[MLX4_MAX_PORTS];
- struct io_mapping *bf_mapping;
+ struct mlx4_msix_ctl msix_ctl;
+ struct mlx4_steer *steer;
+ struct list_head bf_list;
+ struct mutex bf_mutex;
+ struct io_mapping *bf_mapping;
+ void __iomem *clock_mapping;
+ int reserved_mtts;
+ int fs_hash_mode;
+ u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ __be64 slave_node_guids[MLX4_MFUNC_MAX];
+ struct mlx4_roce_gid_entry roce_gids[MLX4_MAX_PORTS][128];
+ atomic_t opreq_count;
+ struct work_struct opreq_task;
};
static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
@@ -341,9 +880,11 @@
extern struct workqueue_struct *mlx4_wq;
u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
-void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj);
-u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align);
-void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt);
+void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr);
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
+ int align, u32 skip_mask);
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt,
+ int use_rr);
u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap);
int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
u32 reserved_bot, u32 resetrved_top);
@@ -365,6 +906,7 @@
int mlx4_init_mcg_table(struct mlx4_dev *dev);
void mlx4_cleanup_pd_table(struct mlx4_dev *dev);
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev);
void mlx4_cleanup_uar_table(struct mlx4_dev *dev);
void mlx4_cleanup_mr_table(struct mlx4_dev *dev);
void mlx4_cleanup_eq_table(struct mlx4_dev *dev);
@@ -372,8 +914,72 @@
void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
-void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev);
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn);
+void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
+int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
+void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
+int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn);
+void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
+int __mlx4_mpt_reserve(struct mlx4_dev *dev);
+void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index);
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index);
+void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index);
+u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
+void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SYNC_TPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 flags);
+void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
+int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list);
+int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx);
+void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx);
+
+int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave);
+int __mlx4_clear_if_stat(struct mlx4_dev *dev,
+ u8 counter_index);
+u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port);
+
+int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
+void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
+
void mlx4_start_catas_poll(struct mlx4_dev *dev);
void mlx4_stop_catas_poll(struct mlx4_dev *dev);
void mlx4_catas_init(void);
@@ -380,8 +986,8 @@
int mlx4_restart_one(struct pci_dev *pdev);
int mlx4_register_device(struct mlx4_dev *dev);
void mlx4_unregister_device(struct mlx4_dev *dev);
-void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port);
-void *mlx4_find_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port);
+void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
+ unsigned long param);
struct mlx4_dev_cap;
struct mlx4_init_hca_param;
@@ -390,13 +996,159 @@
struct mlx4_profile *request,
struct mlx4_dev_cap *dev_cap,
struct mlx4_init_hca_param *init_hca);
+void mlx4_master_comm_channel(struct work_struct *work);
+void mlx4_master_arm_comm_channel(struct work_struct *work);
+void mlx4_gen_slave_eqe(struct work_struct *work);
+void mlx4_master_handle_slave_flr(struct work_struct *work);
+int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_COMM_INT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
+
int mlx4_cmd_init(struct mlx4_dev *dev);
void mlx4_cmd_cleanup(struct mlx4_dev *dev);
+int mlx4_multi_func_init(struct mlx4_dev *dev);
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
int mlx4_cmd_use_events(struct mlx4_dev *dev);
void mlx4_cmd_use_polling(struct mlx4_dev *dev);
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+ unsigned long timeout);
+
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
@@ -406,13 +1158,14 @@
void mlx4_handle_catas_err(struct mlx4_dev *dev);
+int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
+ enum mlx4_port_type *type);
void mlx4_do_sense_ports(struct mlx4_dev *dev,
enum mlx4_port_type *stype,
enum mlx4_port_type *defaults);
void mlx4_start_sense(struct mlx4_dev *dev);
void mlx4_stop_sense(struct mlx4_dev *dev);
-int mlx4_sense_init(struct mlx4_dev *dev);
-void mlx4_sense_cleanup(struct mlx4_dev *dev);
+void mlx4_sense_init(struct mlx4_dev *dev);
int mlx4_check_port_params(struct mlx4_dev *dev,
enum mlx4_port_type *port_type);
int mlx4_change_port_types(struct mlx4_dev *dev,
@@ -420,8 +1173,156 @@
void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
+int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz);
+/* resource tracker functions*/
+int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
+ enum mlx4_resource resource_type,
+ u64 resource_id, int *slave);
+void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id);
+int mlx4_init_resource_tracker(struct mlx4_dev *dev);
+
+void mlx4_free_resource_tracker(struct mlx4_dev *dev,
+ enum mlx4_res_tracker_free_type type);
+
+int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps);
+int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port,
+ int *gid_tbl_len, int *pkey_tbl_len);
+
+int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, enum mlx4_steer_type steer);
+int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer);
+int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], u8 port,
+ int block_mcast_loopback,
+ enum mlx4_protocol prot, u64 *reg_id);
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function,
+ int port, void *buf);
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_PKEY_TABLE_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_MOD_STAT_CFG_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_get_mgm_entry_size(struct mlx4_dev *dev);
+int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);
+
+static inline void set_param_l(u64 *arg, u32 val)
+{
+ *arg = (*arg & 0xffffffff00000000ULL) | (u64) val;
+}
+
+static inline void set_param_h(u64 *arg, u32 val)
+{
+ *arg = (*arg & 0xffffffff) | ((u64) val << 32);
+}
+
+static inline u32 get_param_l(u64 *arg)
+{
+ return (u32) (*arg & 0xffffffff);
+}
+
+static inline u32 get_param_h(u64 *arg)
+{
+ return (u32)(*arg >> 32);
+}
+
+static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev)
+{
+ return &mlx4_priv(dev)->mfunc.master.res_tracker.lock;
+}
+
+#define NOT_MASKED_PD_BITS 17
+
+void sys_tune_init(void);
+void sys_tune_fini(void);
+
+void mlx4_init_quotas(struct mlx4_dev *dev);
+
+int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave);
+int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave);
+void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work);
+
#endif /* MLX4_H */
Property changes on: trunk/sys/ofed/drivers/net/mlx4/mlx4.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/net/mlx4/mlx4_en.h
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/mlx4_en.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/mlx4_en.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -34,13 +34,17 @@
#ifndef _MLX4_EN_H_
#define _MLX4_EN_H_
-#include <sys/cdefs.h>
-
-#include <linux/types.h>
+#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/kobject.h>
#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#ifdef CONFIG_MLX4_EN_DCB
+#include <linux/dcbnl.h>
+#endif
#include <linux/mlx4/device.h>
#include <linux/mlx4/qp.h>
@@ -49,56 +53,15 @@
#include <linux/mlx4/doorbell.h>
#include <linux/mlx4/cmd.h>
-#include <net/if_media.h>
#include <netinet/tcp_lro.h>
#include "en_port.h"
+#include "mlx4_stats.h"
#define DRV_NAME "mlx4_en"
-#define DRV_VERSION "1.5.2"
-#define DRV_RELDATE "July 2010"
-/* XXX */
-#define NETIF_MSG_LINK 0x1
-#define NETIF_MSG_IFDOWN 0x2
-#define NETIF_MSG_HW 0x4
-#define NETIF_MSG_DRV 0x8
-#define NETIF_MSG_INTR 0x10
-#define NETIF_MSG_RX_ERR 0x20
-
#define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN)
-#define en_print(level, priv, format, arg...) \
- { \
- if ((priv)->registered) \
- printk(level "%s: %s: " format, DRV_NAME, \
- (priv->dev)->if_xname, ## arg); \
- else \
- printk(level "%s: %s: Port %d: " format, \
- DRV_NAME, dev_name(&priv->mdev->pdev->dev), \
- (priv)->port, ## arg); \
- }
-
-#define en_dbg(mlevel, priv, format, arg...) \
- if (NETIF_MSG_##mlevel & priv->msg_enable) \
- en_print(KERN_DEBUG, priv, format, ## arg)
-#define en_warn(priv, format, arg...) \
- en_print(KERN_WARNING, priv, format, ## arg)
-#define en_err(priv, format, arg...) \
- en_print(KERN_ERR, priv, format, ## arg)
-#define en_info(priv, format, arg...) \
- en_print(KERN_INFO, priv, format, ## arg)
-
-#define mlx4_err(mdev, format, arg...) \
- printk(KERN_ERR "%s %s: " format , DRV_NAME ,\
- dev_name(&mdev->pdev->dev) , ## arg)
-#define mlx4_info(mdev, format, arg...) \
- printk(KERN_INFO "%s %s: " format , DRV_NAME ,\
- dev_name(&mdev->pdev->dev) , ## arg)
-#define mlx4_warn(mdev, format, arg...) \
- printk(KERN_WARNING "%s %s: " format , DRV_NAME ,\
- dev_name(&mdev->pdev->dev) , ## arg)
-
/*
* Device constants
*/
@@ -106,8 +69,10 @@
#define MLX4_EN_PAGE_SHIFT 12
#define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT)
-#define MAX_TX_RINGS (MLX4_EN_NUM_HASH_RINGS + 1 + MLX4_EN_NUM_PPP_RINGS)
-#define MAX_RX_RINGS 16
+#define MLX4_NET_IP_ALIGN 2 /* bytes */
+#define DEF_RX_RINGS 16
+#define MAX_RX_RINGS 128
+#define MIN_RX_RINGS 4
#define TXBB_SIZE 64
#define HEADROOM (2048 / TXBB_SIZE + 1)
#define STAMP_STRIDE 64
@@ -115,11 +80,20 @@
#define STAMP_SHIFT 31
#define STAMP_VAL 0x7fffffff
#define STATS_DELAY (HZ / 4)
+#define SERVICE_TASK_DELAY (HZ / 4)
+#define MAX_NUM_OF_FS_RULES 256
-/* Typical TSO descriptor with 16 gather entries is 352 bytes... */
-#define MAX_DESC_SIZE 512
-#define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE)
+#define MLX4_EN_FILTER_HASH_SHIFT 4
+#define MLX4_EN_FILTER_EXPIRY_QUOTA 60
+#ifdef CONFIG_NET_RX_BUSY_POLL
+#define LL_EXTENDED_STATS
+#endif
+
+/* vlan valid range */
+#define VLAN_MIN_VALUE 1
+#define VLAN_MAX_VALUE 4094
+
/*
* OS related constants and tunables
*/
@@ -126,52 +100,46 @@
#define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ)
-#define MLX4_EN_MAX_LRO_DESCRIPTORS 32
-#define MLX4_EN_NUM_IPFRAG_SESSIONS 16
+#define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(PAGE_SIZE)
+#define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE)
-/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU
- * and 4K allocations) */
-#if MJUMPAGESIZE == 4096
-enum {
- FRAG_SZ0 = MCLBYTES,
- FRAG_SZ1 = MJUMPAGESIZE,
- FRAG_SZ2 = MJUMPAGESIZE,
+enum mlx4_en_alloc_type {
+ MLX4_EN_ALLOC_NEW = 0,
+ MLX4_EN_ALLOC_REPLACEMENT = 1,
};
-#define MLX4_EN_MAX_RX_FRAGS 3
-#elif MJUMPAGESIZE == 8192
-enum {
- FRAG_SZ0 = MCLBYTES,
- FRAG_SZ1 = MJUMPAGESIZE,
-};
-#define MLX4_EN_MAX_RX_FRAGS 2
-#elif MJUMPAGESIZE == 8192
-#else
-#error "Unknown PAGE_SIZE"
-#endif
/* Maximum ring sizes */
+#define MLX4_EN_DEF_TX_QUEUE_SIZE 4096
+
+/* Minimum packet number till arming the CQ */
+#define MLX4_EN_MIN_RX_ARM 2048
+#define MLX4_EN_MIN_TX_ARM 2048
+
+/* Maximum ring sizes */
#define MLX4_EN_MAX_TX_SIZE 8192
#define MLX4_EN_MAX_RX_SIZE 8192
-#define MLX4_EN_MIN_RX_SIZE (128)
+/* Minimum ring sizes */
+#define MLX4_EN_MIN_RX_SIZE (4096 / TXBB_SIZE)
#define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE)
#define MLX4_EN_SMALL_PKT_SIZE 64
-#define MLX4_EN_TX_HASH_SIZE 256
-#define MLX4_EN_TX_HASH_MASK (MLX4_EN_TX_HASH_SIZE - 1)
-#define MLX4_EN_NUM_HASH_RINGS 4
-#define MLX4_EN_NUM_PPP_RINGS 8
-#define MLX4_EN_DEF_TX_RING_SIZE 512
-#define MLX4_EN_DEF_TX_QUEUE_SIZE 4096
+
+#define MLX4_EN_MAX_TX_RING_P_UP 32
+#define MLX4_EN_NUM_UP 1
+
+#define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \
+ MLX4_EN_NUM_UP)
+
+#define MLX4_EN_DEF_TX_RING_SIZE 1024
#define MLX4_EN_DEF_RX_RING_SIZE 1024
-#define MLX4_EN_MAX_RX_POLL 1024
/* Target number of bytes to coalesce with interrupt moderation */
#define MLX4_EN_RX_COAL_TARGET 0x20000
#define MLX4_EN_RX_COAL_TIME 0x10
-#define MLX4_EN_TX_COAL_PKTS 5
-#define MLX4_EN_TX_COAL_TIME 0x80
+#define MLX4_EN_TX_COAL_PKTS 64
+#define MLX4_EN_TX_COAL_TIME 64
#define MLX4_EN_RX_RATE_LOW 400000
#define MLX4_EN_RX_COAL_TIME_LOW 0
@@ -187,14 +155,13 @@
#define MLX4_EN_DEF_RX_PAUSE 1
#define MLX4_EN_DEF_TX_PAUSE 1
-/* Interval between sucessive polls in the Tx routine when polling is used
+/* Interval between successive polls in the Tx routine when polling is used
instead of interrupts (in per-core Tx rings) - should be power of 2 */
#define MLX4_EN_TX_POLL_MODER 16
#define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4)
-#define ETH_LLC_SNAP_SIZE 8
-
-#define SMALL_PACKET_SIZE (MHLEN)
+#define MLX4_EN_64_ALIGN (64 - NET_SKB_PAD)
+#define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN)
#define HEADER_COPY_SIZE (128)
#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETHER_HDR_LEN)
@@ -208,7 +175,6 @@
/* Number of samples to 'average' */
#define AVG_SIZE 128
#define AVG_FACTOR 1024
-#define NUM_PERF_STATS NUM_PERF_COUNTERS
#define INC_PERF_COUNTER(cnt) (++(cnt))
#define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add))
@@ -219,7 +185,6 @@
#else
-#define NUM_PERF_STATS 0
#define INC_PERF_COUNTER(cnt) do {} while (0)
#define ADD_PERF_COUNTER(cnt, add) do {} while (0)
#define AVG_PERF_COUNTER(cnt, sample) do {} while (0)
@@ -244,13 +209,11 @@
#define XNOR(x, y) (!(x) == !(y))
#define ILLEGAL_MAC(addr) (addr == 0xffffffffffffULL || addr == 0x0)
-
struct mlx4_en_tx_info {
- struct mbuf *mb;
- u32 nr_txbb;
- u8 nr_segs;
- u8 data_offset;
- u8 inl;
+ bus_dmamap_t dma_map;
+ struct mbuf *mb;
+ u32 nr_txbb;
+ u32 nr_bytes;
};
@@ -271,8 +234,22 @@
#define MLX4_EN_USE_SRQ 0x01000000
+#define MLX4_EN_RX_BUDGET 64
+
+#define MLX4_EN_TX_MAX_DESC_SIZE 512 /* bytes */
+#define MLX4_EN_TX_MAX_MBUF_SIZE 65536 /* bytes */
+#define MLX4_EN_TX_MAX_PAYLOAD_SIZE 65536 /* bytes */
+#define MLX4_EN_TX_MAX_MBUF_FRAGS \
+ ((MLX4_EN_TX_MAX_DESC_SIZE - 128) / DS_SIZE_ALIGNMENT) /* units */
+#define MLX4_EN_TX_WQE_MAX_WQEBBS \
+ (MLX4_EN_TX_MAX_DESC_SIZE / TXBB_SIZE) /* units */
+
+#define MLX4_EN_CX3_LOW_ID 0x1000
+#define MLX4_EN_CX3_HIGH_ID 0x1005
+
struct mlx4_en_tx_ring {
- spinlock_t tx_lock;
+ spinlock_t tx_lock;
+ bus_dma_tag_t dma_tag;
struct mlx4_hwq_resources wqres;
u32 size ; /* number of TXBBs */
u32 size_mask;
@@ -282,12 +259,13 @@
u32 cons;
u32 buf_size;
u32 doorbell_qpn;
- void *buf;
+ u8 *buf;
u16 poll_cnt;
int blocked;
+ struct mlx4_en_tx_info *tx_info;
+ u8 queue_index;
+ cpuset_t affinity_mask;
struct buf_ring *br;
- struct mlx4_en_tx_info *tx_info;
- u8 *bounce_buf;
u32 last_nr_txbb;
struct mlx4_qp qp;
struct mlx4_qp_context context;
@@ -296,31 +274,38 @@
struct mlx4_srq dummy;
unsigned long bytes;
unsigned long packets;
- unsigned long errors;
- spinlock_t comp_lock;
+ unsigned long tx_csum;
+ unsigned long queue_stopped;
+ unsigned long oversized_packets;
+ unsigned long wake_queue;
struct mlx4_bf bf;
bool bf_enabled;
+ int hwtstamp_tx_type;
+ spinlock_t comp_lock;
+ int inline_thold;
u64 watchdog_time;
};
-struct mlx4_en_ipfrag {
- struct mbuf *fragments;
- struct mbuf *last;
- __be32 saddr;
- __be32 daddr;
- __be16 id;
- u8 protocol;
- int total_len;
- u16 offset;
-};
-
struct mlx4_en_rx_desc {
/* actual number of entries depends on rx ring stride */
struct mlx4_wqe_data_seg data[0];
};
+struct mlx4_en_rx_mbuf {
+ bus_dmamap_t dma_map;
+ struct mbuf *mbuf;
+};
+
+struct mlx4_en_rx_spare {
+ bus_dmamap_t dma_map;
+ struct mbuf *mbuf;
+ u64 paddr_be;
+};
+
struct mlx4_en_rx_ring {
struct mlx4_hwq_resources wqres;
+ bus_dma_tag_t dma_tag;
+ struct mlx4_en_rx_spare spare;
u32 size ; /* number of Rx descs*/
u32 actual_size;
u32 size_mask;
@@ -330,28 +315,48 @@
u32 prod;
u32 cons;
u32 buf_size;
- void *buf;
- void *rx_info;
+ u8 fcs_del;
+ u32 rx_mb_size;
+ int qpn;
+ u8 *buf;
+ struct mlx4_en_rx_mbuf *mbuf;
+ unsigned long errors;
unsigned long bytes;
unsigned long packets;
- unsigned long errors;
+#ifdef LL_EXTENDED_STATS
+ unsigned long yields;
+ unsigned long misses;
+ unsigned long cleaned;
+#endif
+ unsigned long csum_ok;
+ unsigned long csum_none;
+ int hwtstamp_rx_filter;
+ int numa_node;
struct lro_ctrl lro;
- struct mlx4_en_ipfrag ipfrag[MLX4_EN_NUM_IPFRAG_SESSIONS];
};
-
static inline int mlx4_en_can_lro(__be16 status)
{
- return (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
- MLX4_CQE_STATUS_IPV4F |
- MLX4_CQE_STATUS_IPV6 |
- MLX4_CQE_STATUS_IPV4OPT |
- MLX4_CQE_STATUS_TCP |
- MLX4_CQE_STATUS_UDP |
- MLX4_CQE_STATUS_IPOK)) ==
- cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
- MLX4_CQE_STATUS_IPOK |
- MLX4_CQE_STATUS_TCP);
+ const __be16 status_all = cpu_to_be16(
+ MLX4_CQE_STATUS_IPV4 |
+ MLX4_CQE_STATUS_IPV4F |
+ MLX4_CQE_STATUS_IPV6 |
+ MLX4_CQE_STATUS_IPV4OPT |
+ MLX4_CQE_STATUS_TCP |
+ MLX4_CQE_STATUS_UDP |
+ MLX4_CQE_STATUS_IPOK);
+ const __be16 status_ipv4_ipok_tcp = cpu_to_be16(
+ MLX4_CQE_STATUS_IPV4 |
+ MLX4_CQE_STATUS_IPOK |
+ MLX4_CQE_STATUS_TCP);
+ const __be16 status_ipv6_ipok_tcp = cpu_to_be16(
+ MLX4_CQE_STATUS_IPV6 |
+ MLX4_CQE_STATUS_IPOK |
+ MLX4_CQE_STATUS_TCP);
+
+ status &= status_all;
+ return (status == status_ipv4_ipok_tcp ||
+ status == status_ipv6_ipok_tcp);
}
struct mlx4_en_cq {
@@ -360,8 +365,8 @@
int ring;
spinlock_t lock;
struct net_device *dev;
- /* Per-core Tx cq processing support */
- struct timer_list timer;
+ /* Per-core Tx cq processing support */
+ struct timer_list timer;
int size;
int buf_size;
unsigned vector;
@@ -373,6 +378,21 @@
struct taskqueue *tq;
#define MLX4_EN_OPCODE_ERROR 0x1e
u32 tot_rx;
+ u32 tot_tx;
+ u32 curr_poll_rx_cpu_id;
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ unsigned int state;
+#define MLX4_EN_CQ_STATEIDLE 0
+#define MLX4_EN_CQ_STATENAPI 1 /* NAPI owns this CQ */
+#define MLX4_EN_CQ_STATEPOLL 2 /* poll owns this CQ */
+#define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATENAPI | MLX4_EN_CQ_STATEPOLL)
+#define MLX4_EN_CQ_STATENAPI_YIELD 4 /* NAPI yielded this CQ */
+#define MLX4_EN_CQ_STATEPOLL_YIELD 8 /* poll yielded this CQ */
+#define CQ_YIELD (MLX4_EN_CQ_STATENAPI_YIELD | MLX4_EN_CQ_STATEPOLL_YIELD)
+#define CQ_USER_PEND (MLX4_EN_CQ_STATEPOLL | MLX4_EN_CQ_STATEPOLL_YIELD)
+ spinlock_t poll_lock; /* protects from LLS/napi conflicts */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
};
struct mlx4_en_port_profile {
@@ -382,41 +402,42 @@
u32 tx_ring_size;
u32 rx_ring_size;
u8 rx_pause;
+ u8 rx_ppp;
u8 tx_pause;
- u32 rx_ppp;
- u32 tx_ppp;
+ u8 tx_ppp;
+ int rss_rings;
};
struct mlx4_en_profile {
int rss_xor;
- int num_lro;
- int ip_reasm;
- int tcp_rss;
int udp_rss;
u8 rss_mask;
u32 active_ports;
u32 small_pkt_int;
u8 no_reset;
+ u8 num_tx_rings_p_up;
struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1];
};
struct mlx4_en_dev {
- struct mlx4_dev *dev;
+ struct mlx4_dev *dev;
struct pci_dev *pdev;
struct mutex state_lock;
- struct net_device *pndev[MLX4_MAX_PORTS + 1];
- u32 port_cnt;
+ struct net_device *pndev[MLX4_MAX_PORTS + 1];
+ u32 port_cnt;
bool device_up;
- struct mlx4_en_profile profile;
+ struct mlx4_en_profile profile;
u32 LSO_support;
struct workqueue_struct *workqueue;
- struct device *dma_device;
- void __iomem *uar_map;
- struct mlx4_uar priv_uar;
+ struct device *dma_device;
+ void __iomem *uar_map;
+ struct mlx4_uar priv_uar;
struct mlx4_mr mr;
- u32 priv_pdn;
- spinlock_t uar_lock;
+ u32 priv_pdn;
+ spinlock_t uar_lock;
u8 mac_removed[MLX4_MAX_PORTS + 1];
+ unsigned long last_overflow_check;
+ unsigned long overflow_period;
};
@@ -428,58 +449,59 @@
enum mlx4_qp_state indir_state;
};
-struct mlx4_en_rss_context {
- __be32 base_qpn;
- __be32 default_qpn;
- u16 reserved;
- u8 hash_fn;
- u8 flags;
- __be32 rss_key[10];
- __be32 base_qpn_udp;
-};
-
struct mlx4_en_port_state {
int link_state;
int link_speed;
int transciver;
+ int autoneg;
};
-struct mlx4_en_pkt_stats {
- unsigned long broadcast;
- unsigned long rx_prio[8];
- unsigned long tx_prio[8];
-#define NUM_PKT_STATS 17
+enum mlx4_en_mclist_act {
+ MCLIST_NONE,
+ MCLIST_REM,
+ MCLIST_ADD,
};
-struct mlx4_en_port_stats {
- unsigned long tso_packets;
- unsigned long queue_stopped;
- unsigned long wake_queue;
- unsigned long tx_timeout;
- unsigned long rx_alloc_failed;
- unsigned long rx_chksum_good;
- unsigned long rx_chksum_none;
- unsigned long tx_chksum_offload;
+struct mlx4_en_mc_list {
+ struct list_head list;
+ enum mlx4_en_mclist_act action;
+ u8 addr[ETH_ALEN];
+ u64 reg_id;
};
-struct mlx4_en_perf_stats {
- u32 tx_poll;
- u64 tx_pktsz_avg;
- u32 inflight_avg;
- u32 tx_coal_avg;
- u32 rx_coal_avg;
-};
+#ifdef CONFIG_MLX4_EN_DCB
+/* Minimal TC BW - setting to 0 will block traffic */
+#define MLX4_EN_BW_MIN 1
+#define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */
-struct mlx4_en_frag_info {
- u16 frag_size;
- u16 frag_prefix_size;
+#define MLX4_EN_TC_ETS 7
+
+#endif
+
+
+enum {
+ MLX4_EN_FLAG_PROMISC = (1 << 0),
+ MLX4_EN_FLAG_MC_PROMISC = (1 << 1),
+ /* whether we need to enable hardware loopback by putting dmac
+ * in Tx WQE
+ */
+ MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2),
+ /* whether we need to drop packets that hardware loopback-ed */
+ MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3),
+ MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4),
+#ifdef CONFIG_MLX4_EN_DCB
+ MLX4_EN_FLAG_DCB_ENABLED = (1 << 5)
+#endif
};
-struct mlx4_en_tx_hash_entry {
- u8 cnt;
- unsigned int small_pkts;
- unsigned int big_pkts;
- unsigned int ring;
+#define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE)
+#define MLX4_EN_MAC_HASH_IDX 5
+
+struct en_port {
+ struct kobject kobj;
+ struct mlx4_dev *dev;
+ u8 port_num;
+ u8 vport_num;
};
struct mlx4_en_priv {
@@ -486,29 +508,27 @@
struct mlx4_en_dev *mdev;
struct mlx4_en_port_profile *prof;
struct net_device *dev;
- bool vlgrp_modified;
- u32 vlan_register[VLAN_FLTR_SIZE];
- u32 vlan_unregister[VLAN_FLTR_SIZE];
- u32 vlans[VLAN_FLTR_SIZE];
- spinlock_t vlan_lock;
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
struct mlx4_en_port_state port_state;
spinlock_t stats_lock;
+ /* To allow rules removal while port is going down */
+ struct list_head ethtool_list;
- unsigned long last_moder_packets;
+ unsigned long last_moder_packets[MAX_RX_RINGS];
unsigned long last_moder_tx_packets;
- unsigned long last_moder_bytes;
+ unsigned long last_moder_bytes[MAX_RX_RINGS];
unsigned long last_moder_jiffies;
- int last_moder_time;
+ int last_moder_time[MAX_RX_RINGS];
u16 rx_usecs;
u16 rx_frames;
u16 tx_usecs;
u16 tx_frames;
u32 pkt_rate_low;
- u16 rx_usecs_low;
+ u32 rx_usecs_low;
u32 pkt_rate_high;
- u16 rx_usecs_high;
- u16 sample_interval;
- u16 adaptive_rx_coal;
+ u32 rx_usecs_high;
+ u32 sample_interval;
+ u32 adaptive_rx_coal;
u32 msg_enable;
u32 loopback_ok;
u32 validate_loopback;
@@ -520,61 +540,199 @@
int port;
int registered;
int allocated;
- int rx_csum;
- u64 mac;
+ int stride;
+ unsigned char current_mac[ETH_ALEN + 2];
+ u64 mac;
int mac_index;
unsigned max_mtu;
int base_qpn;
+ int cqe_factor;
struct mlx4_en_rss_map rss_map;
- u16 tx_prio_map[8];
u32 flags;
-#define MLX4_EN_FLAG_PROMISC 0x1
+ u8 num_tx_rings_p_up;
u32 tx_ring_num;
u32 rx_ring_num;
u32 rx_mb_size;
- struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
- u16 num_frags;
- u16 log_rx_info;
- int ip_reasm;
- bool wol;
- struct mlx4_en_tx_ring tx_ring[MAX_TX_RINGS];
- struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS];
- struct mlx4_en_cq tx_cq[MAX_TX_RINGS];
- struct mlx4_en_cq rx_cq[MAX_RX_RINGS];
- struct mlx4_en_tx_hash_entry tx_hash[MLX4_EN_TX_HASH_SIZE];
- struct work_struct mcast_task;
+ struct mlx4_en_tx_ring **tx_ring;
+ struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
+ struct mlx4_en_cq **tx_cq;
+ struct mlx4_en_cq *rx_cq[MAX_RX_RINGS];
+ struct mlx4_qp drop_qp;
+ struct work_struct rx_mode_task;
struct work_struct watchdog_task;
struct work_struct linkstate_task;
struct delayed_work stats_task;
+ struct delayed_work service_task;
struct mlx4_en_perf_stats pstats;
struct mlx4_en_pkt_stats pkstats;
+ struct mlx4_en_flow_stats flowstats[MLX4_NUM_PRIORITIES];
struct mlx4_en_port_stats port_stats;
+ struct mlx4_en_vport_stats vport_stats;
+ struct mlx4_en_vf_stats vf_stats;
+ DECLARE_BITMAP(stats_bitmap, NUM_ALL_STATS);
+ struct list_head mc_list;
+ struct list_head curr_list;
+ u64 broadcast_id;
struct mlx4_en_stat_out_mbox hw_stats;
- struct ifmedia media;
+ int vids[128];
+ bool wol;
+ struct device *ddev;
+ struct dentry *dev_root;
+ u32 counter_index;
eventhandler_tag vlan_attach;
eventhandler_tag vlan_detach;
struct callout watchdog_timer;
+ struct ifmedia media;
volatile int blocked;
struct sysctl_oid *sysctl;
struct sysctl_ctx_list conf_ctx;
struct sysctl_ctx_list stat_ctx;
+#define MLX4_EN_MAC_HASH_IDX 5
+ struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE];
+
+#ifdef CONFIG_MLX4_EN_DCB
+ struct ieee_ets ets;
+ u16 maxrate[IEEE_8021QAZ_MAX_TCS];
+ u8 dcbx_cap;
+#endif
+#ifdef CONFIG_RFS_ACCEL
+ spinlock_t filters_lock;
+ int last_filter_id;
+ struct list_head filters;
+ struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT];
+#endif
+ struct en_port *vf_ports[MLX4_MAX_NUM_VF];
+ unsigned long last_ifq_jiffies;
+ u64 if_counters_rx_errors;
+ u64 if_counters_rx_no_buffer;
};
enum mlx4_en_wol {
MLX4_EN_WOL_MAGIC = (1ULL << 61),
MLX4_EN_WOL_ENABLED = (1ULL << 62),
- MLX4_EN_WOL_DO_MODIFY = (1ULL << 63),
};
-int mlx4_en_transmit(struct net_device *dev, struct mbuf *mb);
-void mlx4_en_qflush(struct net_device *dev);
+struct mlx4_mac_entry {
+ struct hlist_node hlist;
+ unsigned char mac[ETH_ALEN + 2];
+ u64 reg_id;
+};
-int mlx4_en_rx_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring,
- struct mbuf *mb, struct mlx4_cqe *cqe);
-void mlx4_en_flush_frags(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq)
+{
+ spin_lock_init(&cq->poll_lock);
+ cq->state = MLX4_EN_CQ_STATEIDLE;
+}
+
+/* called from the device poll rutine to get ownership of a cq */
+static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq)
+{
+ int rc = true;
+ spin_lock(&cq->poll_lock);
+ if (cq->state & MLX4_CQ_LOCKED) {
+ WARN_ON(cq->state & MLX4_EN_CQ_STATENAPI);
+ cq->state |= MLX4_EN_CQ_STATENAPI_YIELD;
+ rc = false;
+ } else
+ /* we don't care if someone yielded */
+ cq->state = MLX4_EN_CQ_STATENAPI;
+ spin_unlock(&cq->poll_lock);
+ return rc;
+}
+
+/* returns true is someone tried to get the cq while napi had it */
+static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq)
+{
+ int rc = false;
+ spin_lock(&cq->poll_lock);
+ WARN_ON(cq->state & (MLX4_EN_CQ_STATEPOLL |
+ MLX4_EN_CQ_STATENAPI_YIELD));
+
+ if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD)
+ rc = true;
+ cq->state = MLX4_EN_CQ_STATEIDLE;
+ spin_unlock(&cq->poll_lock);
+ return rc;
+}
+
+/* called from mlx4_en_low_latency_poll() */
+static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq)
+{
+ int rc = true;
+ spin_lock_bh(&cq->poll_lock);
+ if ((cq->state & MLX4_CQ_LOCKED)) {
+ struct net_device *dev = cq->dev;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring];
+
+ cq->state |= MLX4_EN_CQ_STATEPOLL_YIELD;
+ rc = false;
+#ifdef LL_EXTENDED_STATS
+ rx_ring->yields++;
+#endif
+ } else
+ /* preserve yield marks */
+ cq->state |= MLX4_EN_CQ_STATEPOLL;
+ spin_unlock_bh(&cq->poll_lock);
+ return rc;
+}
+
+/* returns true if someone tried to get the cq while it was locked */
+static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq)
+{
+ int rc = false;
+ spin_lock_bh(&cq->poll_lock);
+ WARN_ON(cq->state & (MLX4_EN_CQ_STATENAPI));
+
+ if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD)
+ rc = true;
+ cq->state = MLX4_EN_CQ_STATEIDLE;
+ spin_unlock_bh(&cq->poll_lock);
+ return rc;
+}
+
+/* true if a socket is polling, even if it did not get the lock */
+static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq)
+{
+ WARN_ON(!(cq->state & MLX4_CQ_LOCKED));
+ return cq->state & CQ_USER_PEND;
+}
+#else
+static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq)
+{
+}
+
+static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq)
+{
+ return true;
+}
+
+static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq)
+{
+ return false;
+}
+
+static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq)
+{
+ return false;
+}
+
+static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq)
+{
+ return false;
+}
+
+static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq)
+{
+ return false;
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+#define MLX4_EN_WOL_DO_MODIFY (1ULL << 63)
+
void mlx4_en_destroy_netdev(struct net_device *dev);
int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
struct mlx4_en_port_profile *prof);
@@ -585,31 +743,40 @@
void mlx4_en_free_resources(struct mlx4_en_priv *priv);
int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
-int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
- int entries, int ring, enum cq_type mode);
-void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
-int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+int mlx4_en_pre_config(struct mlx4_en_priv *priv);
+int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq,
+ int entries, int ring, enum cq_type mode, int node);
+void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq);
+int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
+ int cq_idx);
void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
-void mlx4_en_poll_tx_cq(unsigned long data);
void mlx4_en_tx_irq(struct mlx4_cq *mcq);
u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb);
-int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring,
- u32 size, u16 stride);
-void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring);
+int mlx4_en_transmit(struct ifnet *dev, struct mbuf *m);
+int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring **pring,
+ u32 size, u16 stride, int node, int queue_idx);
+void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring **pring);
int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring,
- int cq);
+ int cq, int user_prio);
void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring);
+void mlx4_en_qflush(struct ifnet *dev);
int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring, u32 size);
+ struct mlx4_en_rx_ring **pring,
+ u32 size, int node);
void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring);
+ struct mlx4_en_rx_ring **pring,
+ u32 size, u16 stride);
+void mlx4_en_tx_que(void *context, int pending);
+void mlx4_en_rx_que(void *context, int pending);
int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv);
void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring);
@@ -616,41 +783,129 @@
int mlx4_en_process_rx_cq(struct net_device *dev,
struct mlx4_en_cq *cq,
int budget);
-int mlx4_en_process_rx_cq_mb(struct net_device *dev,
- struct mlx4_en_cq *cq,
- int budget);
-void mlx4_en_tx_que(void *context, int pending);
-void mlx4_en_rx_que(void *context, int pending);
+void mlx4_en_poll_tx_cq(unsigned long data);
void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
- int is_tx, int rss, int qpn, int cqn,
- struct mlx4_qp_context *context);
+ int is_tx, int rss, int qpn, int cqn, int user_prio,
+ struct mlx4_qp_context *context);
void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
int mlx4_en_map_buffer(struct mlx4_buf *buf);
void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
+void mlx4_en_calc_rx_buf(struct net_device *dev);
-void mlx4_en_calc_rx_buf(struct net_device *dev);
-void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num);
int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv);
void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
+int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv);
+void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv);
int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring);
void mlx4_en_rx_irq(struct mlx4_cq *mcq);
int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
-int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans);
-int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
- u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
-int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
- u8 promisc);
+int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv);
int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);
+int mlx4_en_get_vport_stats(struct mlx4_en_dev *mdev, u8 port);
+void mlx4_en_create_debug_files(struct mlx4_en_priv *priv);
+void mlx4_en_delete_debug_files(struct mlx4_en_priv *priv);
+int mlx4_en_register_debugfs(void);
+void mlx4_en_unregister_debugfs(void);
+#ifdef CONFIG_MLX4_EN_DCB
+extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops;
+extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops;
+#endif
+
+int mlx4_en_setup_tc(struct net_device *dev, u8 up);
+
+#ifdef CONFIG_RFS_ACCEL
+void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *rx_ring);
+#endif
+
#define MLX4_EN_NUM_SELF_TEST 5
void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf);
-u64 mlx4_en_mac_to_u64(u8 *addr);
+void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev);
/*
+ * Functions for time stamping
+ */
+#define SKBTX_HW_TSTAMP (1 << 0)
+#define SKBTX_IN_PROGRESS (1 << 2)
+
+u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe);
+
+/* Functions for caching and restoring statistics */
+int mlx4_en_get_sset_count(struct net_device *dev, int sset);
+void mlx4_en_restore_ethtool_stats(struct mlx4_en_priv *priv,
+ u64 *data);
+
+/*
* Globals
*/
extern const struct ethtool_ops mlx4_en_ethtool_ops;
+
+/*
+ * Defines for link speed - needed by selftest
+ */
+#define MLX4_EN_LINK_SPEED_1G 1000
+#define MLX4_EN_LINK_SPEED_10G 10000
+#define MLX4_EN_LINK_SPEED_40G 40000
+
+enum {
+ NETIF_MSG_DRV = 0x0001,
+ NETIF_MSG_PROBE = 0x0002,
+ NETIF_MSG_LINK = 0x0004,
+ NETIF_MSG_TIMER = 0x0008,
+ NETIF_MSG_IFDOWN = 0x0010,
+ NETIF_MSG_IFUP = 0x0020,
+ NETIF_MSG_RX_ERR = 0x0040,
+ NETIF_MSG_TX_ERR = 0x0080,
+ NETIF_MSG_TX_QUEUED = 0x0100,
+ NETIF_MSG_INTR = 0x0200,
+ NETIF_MSG_TX_DONE = 0x0400,
+ NETIF_MSG_RX_STATUS = 0x0800,
+ NETIF_MSG_PKTDATA = 0x1000,
+ NETIF_MSG_HW = 0x2000,
+ NETIF_MSG_WOL = 0x4000,
+};
+
+
+/*
+ * printk / logging functions
+ */
+
+#define en_print(level, priv, format, arg...) \
+ { \
+ if ((priv)->registered) \
+ printk(level "%s: %s: " format, DRV_NAME, \
+ (priv->dev)->if_xname, ## arg); \
+ else \
+ printk(level "%s: %s: Port %d: " format, \
+ DRV_NAME, dev_name(&priv->mdev->pdev->dev), \
+ (priv)->port, ## arg); \
+ }
+
+
+#define en_dbg(mlevel, priv, format, arg...) \
+do { \
+ if (NETIF_MSG_##mlevel & priv->msg_enable) \
+ en_print(KERN_DEBUG, priv, format, ##arg); \
+} while (0)
+#define en_warn(priv, format, arg...) \
+ en_print(KERN_WARNING, priv, format, ##arg)
+#define en_err(priv, format, arg...) \
+ en_print(KERN_ERR, priv, format, ##arg)
+#define en_info(priv, format, arg...) \
+ en_print(KERN_INFO, priv, format, ## arg)
+
+#define mlx4_err(mdev, format, arg...) \
+ pr_err("%s %s: " format, DRV_NAME, \
+ dev_name(&mdev->pdev->dev), ##arg)
+#define mlx4_info(mdev, format, arg...) \
+ pr_info("%s %s: " format, DRV_NAME, \
+ dev_name(&mdev->pdev->dev), ##arg)
+#define mlx4_warn(mdev, format, arg...) \
+ pr_warning("%s %s: " format, DRV_NAME, \
+ dev_name(&mdev->pdev->dev), ##arg)
+
#endif
Property changes on: trunk/sys/ofed/drivers/net/mlx4/mlx4_en.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Added: trunk/sys/ofed/drivers/net/mlx4/mlx4_stats.h
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/mlx4_stats.h (rev 0)
+++ trunk/sys/ofed/drivers/net/mlx4/mlx4_stats.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX4_STATS_
+#define _MLX4_STATS_
+
+
+#ifdef MLX4_EN_PERF_STAT
+#define NUM_PERF_STATS NUM_PERF_COUNTERS
+#else
+#define NUM_PERF_STATS 0
+#endif
+
+#define NUM_PRIORITIES 9
+#define NUM_PRIORITY_STATS 2
+
+struct mlx4_en_pkt_stats {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long rx_multicast_packets;
+ unsigned long rx_broadcast_packets;
+ unsigned long rx_errors;
+ unsigned long rx_dropped;
+ unsigned long rx_length_errors;
+ unsigned long rx_over_errors;
+ unsigned long rx_crc_errors;
+ unsigned long rx_jabbers;
+ unsigned long rx_in_range_length_error;
+ unsigned long rx_out_range_length_error;
+ unsigned long rx_lt_64_bytes_packets;
+ unsigned long rx_127_bytes_packets;
+ unsigned long rx_255_bytes_packets;
+ unsigned long rx_511_bytes_packets;
+ unsigned long rx_1023_bytes_packets;
+ unsigned long rx_1518_bytes_packets;
+ unsigned long rx_1522_bytes_packets;
+ unsigned long rx_1548_bytes_packets;
+ unsigned long rx_gt_1548_bytes_packets;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+ unsigned long tx_multicast_packets;
+ unsigned long tx_broadcast_packets;
+ unsigned long tx_errors;
+ unsigned long tx_dropped;
+ unsigned long tx_lt_64_bytes_packets;
+ unsigned long tx_127_bytes_packets;
+ unsigned long tx_255_bytes_packets;
+ unsigned long tx_511_bytes_packets;
+ unsigned long tx_1023_bytes_packets;
+ unsigned long tx_1518_bytes_packets;
+ unsigned long tx_1522_bytes_packets;
+ unsigned long tx_1548_bytes_packets;
+ unsigned long tx_gt_1548_bytes_packets;
+ unsigned long rx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS];
+ unsigned long tx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS];
+#define NUM_PKT_STATS 72
+};
+
+struct mlx4_en_vf_stats {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long rx_multicast_packets;
+ unsigned long rx_broadcast_packets;
+ unsigned long rx_errors;
+ unsigned long rx_dropped;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+ unsigned long tx_multicast_packets;
+ unsigned long tx_broadcast_packets;
+ unsigned long tx_errors;
+#define NUM_VF_STATS 11
+};
+
+struct mlx4_en_vport_stats {
+ unsigned long rx_unicast_packets;
+ unsigned long rx_unicast_bytes;
+ unsigned long rx_multicast_packets;
+ unsigned long rx_multicast_bytes;
+ unsigned long rx_broadcast_packets;
+ unsigned long rx_broadcast_bytes;
+ unsigned long rx_dropped;
+ unsigned long rx_errors;
+ unsigned long tx_unicast_packets;
+ unsigned long tx_unicast_bytes;
+ unsigned long tx_multicast_packets;
+ unsigned long tx_multicast_bytes;
+ unsigned long tx_broadcast_packets;
+ unsigned long tx_broadcast_bytes;
+ unsigned long tx_errors;
+#define NUM_VPORT_STATS 15
+};
+
+struct mlx4_en_port_stats {
+ unsigned long tso_packets;
+ unsigned long queue_stopped;
+ unsigned long wake_queue;
+ unsigned long tx_timeout;
+ unsigned long oversized_packets;
+ unsigned long rx_alloc_failed;
+ unsigned long rx_chksum_good;
+ unsigned long rx_chksum_none;
+ unsigned long tx_chksum_offload;
+#define NUM_PORT_STATS 8
+};
+
+struct mlx4_en_perf_stats {
+ u32 tx_poll;
+ u64 tx_pktsz_avg;
+ u32 inflight_avg;
+ u16 tx_coal_avg;
+ u16 rx_coal_avg;
+ u32 napi_quota;
+#define NUM_PERF_COUNTERS 6
+};
+
+struct mlx4_en_flow_stats {
+ u64 rx_pause;
+ u64 rx_pause_duration;
+ u64 rx_pause_transition;
+ u64 tx_pause;
+ u64 tx_pause_duration;
+ u64 tx_pause_transition;
+};
+#define MLX4_NUM_PRIORITIES 8
+#define NUM_FLOW_PRIORITY_STATS 6
+#define NUM_FLOW_STATS (NUM_FLOW_PRIORITY_STATS*MLX4_NUM_PRIORITIES)
+
+
+struct mlx4_en_stat_out_flow_control_mbox {
+ /* Total number of PAUSE frames received from the far-end port */
+ __be64 rx_pause;
+ /* Total number of microseconds that far-end port requested to pause
+ * transmission of packets
+ */
+ __be64 rx_pause_duration;
+ /* Number of received transmission from XOFF state to XON state */
+ __be64 rx_pause_transition;
+ /* Total number of PAUSE frames sent from the far-end port */
+ __be64 tx_pause;
+ /* Total time in microseconds that transmission of packets has been
+ * paused
+ */
+ __be64 tx_pause_duration;
+ /* Number of transmitter transitions from XOFF state to XON state */
+ __be64 tx_pause_transition;
+ /* Reserverd */
+ __be64 reserved[2];
+};
+
+int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port,
+ struct mlx4_en_vport_stats *vport_stats,
+ int reset);
+
+#define NUM_ALL_STATS (NUM_PKT_STATS + NUM_FLOW_STATS + NUM_VPORT_STATS + \
+ NUM_VF_STATS + NUM_PORT_STATS + NUM_PERF_STATS)
+#endif
Property changes on: trunk/sys/ofed/drivers/net/mlx4/mlx4_stats.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/net/mlx4/mr.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/mr.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/mr.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,52 +32,20 @@
* SOFTWARE.
*/
-#include <linux/init.h>
+#include <linux/err.h>
#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
#include <linux/mlx4/cmd.h>
+#include <linux/math64.h>
+
#include "mlx4.h"
#include "icm.h"
-/*
- * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
- */
-struct mlx4_mpt_entry {
- __be32 flags;
- __be32 qpn;
- __be32 key;
- __be32 pd_flags;
- __be64 start;
- __be64 length;
- __be32 lkey;
- __be32 win_cnt;
- u8 reserved1;
- u8 flags2;
- u8 reserved2;
- u8 mtt_rep;
- __be64 mtt_seg;
- __be32 mtt_sz;
- __be32 entity_size;
- __be32 first_byte_offset;
-} __attribute__((packed));
-
-#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
-#define MLX4_MPT_FLAG_FREE (0x3UL << 28)
-#define MLX4_MPT_FLAG_MIO (1 << 17)
-#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15)
-#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
-#define MLX4_MPT_FLAG_REGION (1 << 8)
-
-#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27)
-#define MLX4_MPT_PD_FLAG_RAE (1 << 28)
-#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
-
-#define MLX4_MPT_FLAG2_FBO_EN (1 << 7)
-
-#define MLX4_MPT_STATUS_SW 0xF0
-#define MLX4_MPT_STATUS_HW 0x00
-
static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
{
int o;
@@ -141,9 +109,9 @@
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
- buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
+ buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *),
GFP_KERNEL);
- buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+ buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
GFP_KERNEL);
if (!buddy->bits || !buddy->num_free)
goto err_out;
@@ -150,10 +118,9 @@
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
- if (!buddy->bits[i])
+ buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN);
+ if (!buddy->bits[i])
goto err_out_free;
- bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
}
set_bit(0, buddy->bits[buddy->max_order]);
@@ -183,24 +150,50 @@
kfree(buddy->num_free);
}
-static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
+u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
u32 seg;
+ int seg_order;
+ u32 offset;
- seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, order);
+ seg_order = max_t(int, order - log_mtts_per_seg, 0);
+
+ seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order);
if (seg == -1)
return -1;
- if (mlx4_table_get_range(dev, &mr_table->mtt_table, seg,
- seg + (1 << order) - 1)) {
- mlx4_buddy_free(&mr_table->mtt_buddy, seg, order);
+ offset = seg * (1 << log_mtts_per_seg);
+
+ if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset,
+ offset + (1 << order) - 1)) {
+ mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order);
return -1;
}
- return seg;
+ return offset;
}
+static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
+{
+ u64 in_param = 0;
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, order);
+ err = mlx4_cmd_imm(dev, in_param, &out_param, RES_MTT,
+ RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ return -1;
+ return get_param_l(&out_param);
+ }
+ return __mlx4_alloc_mtt_range(dev, order);
+}
+
int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
struct mlx4_mtt *mtt)
{
@@ -213,33 +206,66 @@
} else
mtt->page_shift = page_shift;
- for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1)
+ for (mtt->order = 0, i = 1; i < npages; i <<= 1)
++mtt->order;
- mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order);
- if (mtt->first_seg == -1)
+ mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order);
+ if (mtt->offset == -1) {
+ mlx4_err(dev, "Failed to allocate mtts for %d pages(order %d)\n",
+ npages, mtt->order);
return -ENOMEM;
+ }
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_mtt_init);
-void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
+void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
{
+ u32 first_seg;
+ int seg_order;
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+ seg_order = max_t(int, order - log_mtts_per_seg, 0);
+ first_seg = offset / (1 << log_mtts_per_seg);
+
+ mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order);
+ mlx4_table_put_range(dev, &mr_table->mtt_table, offset,
+ offset + (1 << order) - 1);
+}
+
+static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, offset);
+ set_param_h(&in_param, order);
+ err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed to free mtt range at:"
+ "%d order:%d\n", offset, order);
+ return;
+ }
+ __mlx4_free_mtt_range(dev, offset, order);
+}
+
+void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
+{
if (mtt->order < 0)
return;
- mlx4_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order);
- mlx4_table_put_range(dev, &mr_table->mtt_table, mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
+ mlx4_free_mtt_range(dev, mtt->offset, mtt->order);
}
EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
{
- return (u64) mtt->first_seg * dev->caps.mtt_entry_sz;
+ return (u64) mtt->offset * dev->caps.mtt_entry_sz;
}
EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
@@ -256,8 +282,9 @@
static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int mpt_index)
{
- return mlx4_cmd(dev, mailbox->dma, mpt_index, 0, MLX4_CMD_SW2HW_MPT,
- MLX4_CMD_TIME_CLASS_B);
+ return mlx4_cmd(dev, mailbox->dma, mpt_index,
+ 0, MLX4_CMD_SW2HW_MPT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -264,54 +291,127 @@
int mpt_index)
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
- !mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B);
+ !mailbox, MLX4_CMD_HW2SW_MPT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
}
-int mlx4_mr_reserve_range(struct mlx4_dev *dev, int cnt, int align, u32 *base_mridx)
+static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
+ u64 iova, u64 size, u32 access, int npages,
+ int page_shift, struct mlx4_mr *mr)
{
+ mr->iova = iova;
+ mr->size = size;
+ mr->pd = pd;
+ mr->access = access;
+ mr->enabled = MLX4_MPT_DISABLED;
+ mr->key = hw_index_to_key(mridx);
+
+ return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+}
+
+static int mlx4_WRITE_MTT(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox,
+ int num_entries)
+{
+ return mlx4_cmd(dev, mailbox->dma, num_entries, 0, MLX4_CMD_WRITE_MTT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+int __mlx4_mpt_reserve(struct mlx4_dev *dev)
+{
struct mlx4_priv *priv = mlx4_priv(dev);
- u32 mridx;
- mridx = mlx4_bitmap_alloc_range(&priv->mr_table.mpt_bitmap, cnt, align);
- if (mridx == -1)
- return -ENOMEM;
+ return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
+}
- *base_mridx = mridx;
- return 0;
+static int mlx4_mpt_reserve(struct mlx4_dev *dev)
+{
+ u64 out_param;
+ if (mlx4_is_mfunc(dev)) {
+ if (mlx4_cmd_imm(dev, 0, &out_param, RES_MPT, RES_OP_RESERVE,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
+ return -1;
+ return get_param_l(&out_param);
+ }
+ return __mlx4_mpt_reserve(dev);
}
-EXPORT_SYMBOL_GPL(mlx4_mr_reserve_range);
-void mlx4_mr_release_range(struct mlx4_dev *dev, u32 base_mridx, int cnt)
+void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- mlx4_bitmap_free_range(&priv->mr_table.mpt_bitmap, base_mridx, cnt);
+
+ mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_NO_RR);
}
-EXPORT_SYMBOL_GPL(mlx4_mr_release_range);
-int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
- u64 iova, u64 size, u32 access, int npages,
- int page_shift, struct mlx4_mr *mr)
+static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
{
- mr->iova = iova;
- mr->size = size;
- mr->pd = pd;
- mr->access = access;
- mr->enabled = 0;
- mr->key = hw_index_to_key(mridx);
+ u64 in_param = 0;
- return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, index);
+ if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed to release mr index:%d\n",
+ index);
+ return;
+ }
+ __mlx4_mpt_release(dev, index);
}
-EXPORT_SYMBOL_GPL(mlx4_mr_alloc_reserved);
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+{
+ struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+
+ return mlx4_table_get(dev, &mr_table->dmpt_table, index);
+}
+
+static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+{
+ u64 param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(¶m, index);
+ return mlx4_cmd_imm(dev, param, ¶m, RES_MPT, RES_OP_MAP_ICM,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ }
+ return __mlx4_mpt_alloc_icm(dev, index);
+}
+
+void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
+{
+ struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+
+ mlx4_table_put(dev, &mr_table->dmpt_table, index);
+}
+
+static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, index);
+ if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_MAP_ICM,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed to free icm of mr index:%d\n",
+ index);
+ return;
+ }
+ return __mlx4_mpt_free_icm(dev, index);
+}
+
int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
int npages, int page_shift, struct mlx4_mr *mr)
{
- struct mlx4_priv *priv = mlx4_priv(dev);
u32 index;
int err;
- index = mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
+ index = mlx4_mpt_reserve(dev);
if (index == -1)
return -ENOMEM;
@@ -318,44 +418,55 @@
err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size,
access, npages, page_shift, mr);
if (err)
- mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);
+ mlx4_mpt_release(dev, index);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_mr_alloc);
-void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
+static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
int err;
- if (mr->enabled) {
+ if (mr->enabled == MLX4_MPT_EN_HW) {
err = mlx4_HW2SW_MPT(dev, NULL,
key_to_hw_index(mr->key) &
(dev->caps.num_mpts - 1));
- if (err)
- mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err);
+ if (err) {
+ mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
+ mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
+ return err;
+ }
+
+ mr->enabled = MLX4_MPT_EN_SW;
}
+ mlx4_mtt_cleanup(dev, &mr->mtt);
- mlx4_mtt_cleanup(dev, &mr->mtt);
+ return 0;
}
-EXPORT_SYMBOL_GPL(mlx4_mr_free_reserved);
-void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
+int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
- struct mlx4_priv *priv = mlx4_priv(dev);
- mlx4_mr_free_reserved(dev, mr);
- mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, key_to_hw_index(mr->key));
+ int ret;
+
+ ret = mlx4_mr_free_reserved(dev, mr);
+ if (ret)
+ return ret;
+ if (mr->enabled)
+ mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key));
+ mlx4_mpt_release(dev, key_to_hw_index(mr->key));
+
+ return 0;
}
EXPORT_SYMBOL_GPL(mlx4_mr_free);
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
- struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mpt_entry *mpt_entry;
int err;
- err = mlx4_table_get(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
+ err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key));
if (err)
return err;
@@ -380,9 +491,10 @@
if (mr->mtt.order < 0) {
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
- mpt_entry->mtt_seg = 0;
+ mpt_entry->mtt_addr = 0;
} else {
- mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
+ mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+ &mr->mtt));
}
if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
@@ -390,8 +502,7 @@
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
MLX4_MPT_PD_FLAG_RAE);
- mpt_entry->mtt_sz = cpu_to_be32((1 << mr->mtt.order) *
- dev->caps.mtts_per_seg);
+ mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order);
} else {
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
}
@@ -402,9 +513,8 @@
mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
goto err_cmd;
}
+ mr->enabled = MLX4_MPT_EN_HW;
- mr->enabled = 1;
-
mlx4_free_cmd_mailbox(dev, mailbox);
return 0;
@@ -413,7 +523,7 @@
mlx4_free_cmd_mailbox(dev, mailbox);
err_table:
- mlx4_table_put(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
+ mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key));
return err;
}
EXPORT_SYMBOL_GPL(mlx4_mr_enable);
@@ -425,50 +535,94 @@
__be64 *mtts;
dma_addr_t dma_handle;
int i;
- int s = start_index * sizeof (u64);
- /* All MTTs must fit in the same page */
- if (start_index / (PAGE_SIZE / sizeof (u64)) !=
- (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
- return -EINVAL;
+ mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset +
+ start_index, &dma_handle);
- if (start_index & (dev->caps.mtts_per_seg - 1))
- return -EINVAL;
-
- mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
- s / dev->caps.mtt_entry_sz, &dma_handle);
if (!mtts)
return -ENOMEM;
+ dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
+ npages * sizeof (u64), DMA_TO_DEVICE);
+
for (i = 0; i < npages; ++i)
mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
- dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE);
+ dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
+ npages * sizeof (u64), DMA_TO_DEVICE);
return 0;
}
-int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
- int start_index, int npages, u64 *page_list)
+int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list)
{
+ int err = 0;
int chunk;
- int err;
+ int mtts_per_page;
+ int max_mtts_first_page;
- if (mtt->order < 0)
- return -EINVAL;
+ /* compute how may mtts fit in the first page */
+ mtts_per_page = PAGE_SIZE / sizeof(u64);
+ max_mtts_first_page = mtts_per_page - (mtt->offset + start_index)
+ % mtts_per_page;
+ chunk = min_t(int, max_mtts_first_page, npages);
+
while (npages > 0) {
- chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
if (err)
return err;
-
npages -= chunk;
start_index += chunk;
page_list += chunk;
+
+ chunk = min_t(int, mtts_per_page, npages);
}
+ return err;
+}
- return 0;
+int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list)
+{
+ struct mlx4_cmd_mailbox *mailbox = NULL;
+ __be64 *inbox = NULL;
+ int chunk;
+ int err = 0;
+ int i;
+
+ if (mtt->order < 0)
+ return -EINVAL;
+
+ if (mlx4_is_mfunc(dev)) {
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ while (npages > 0) {
+ chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2,
+ npages);
+ inbox[0] = cpu_to_be64(mtt->offset + start_index);
+ inbox[1] = 0;
+ for (i = 0; i < chunk; ++i)
+ inbox[i + 2] = cpu_to_be64(page_list[i] |
+ MLX4_MTT_FLAG_PRESENT);
+ err = mlx4_WRITE_MTT(dev, mailbox, chunk);
+ if (err) {
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+ }
+
+ npages -= chunk;
+ start_index += chunk;
+ page_list += chunk;
+ }
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+ }
+
+ return __mlx4_write_mtt(dev, mtt, start_index, npages, page_list);
}
EXPORT_SYMBOL_GPL(mlx4_write_mtt);
@@ -484,7 +638,7 @@
return -ENOMEM;
for (i = 0; i < buf->npages; ++i)
- if (buf->direct.map)
+ if (buf->nbufs == 1)
page_list[i] = buf->direct.map + (i << buf->page_shift);
else
page_list[i] = buf->page_list[i].map;
@@ -496,11 +650,106 @@
}
EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
+int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type,
+ struct mlx4_mw *mw)
+{
+ u32 index;
+
+ index = mlx4_mpt_reserve(dev);
+ if (index == -1)
+ return -ENOMEM;
+
+ mw->key = hw_index_to_key(index);
+ mw->pd = pd;
+ mw->type = type;
+ mw->enabled = MLX4_MPT_DISABLED;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mw_alloc);
+
+int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mpt_entry *mpt_entry;
+ int err;
+
+ err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key));
+ if (err)
+ return err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto err_table;
+ }
+ mpt_entry = mailbox->buf;
+
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ /* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned
+ * off, thus creating a memory window and not a memory region.
+ */
+ mpt_entry->key = cpu_to_be32(key_to_hw_index(mw->key));
+ mpt_entry->pd_flags = cpu_to_be32(mw->pd);
+ if (mw->type == MLX4_MW_TYPE_2) {
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
+ mpt_entry->qpn = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP);
+ mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV);
+ }
+
+ err = mlx4_SW2HW_MPT(dev, mailbox,
+ key_to_hw_index(mw->key) &
+ (dev->caps.num_mpts - 1));
+ if (err) {
+ mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+ goto err_cmd;
+ }
+ mw->enabled = MLX4_MPT_EN_HW;
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return 0;
+
+err_cmd:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+err_table:
+ mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mw_enable);
+
+void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw)
+{
+ int err;
+
+ if (mw->enabled == MLX4_MPT_EN_HW) {
+ err = mlx4_HW2SW_MPT(dev, NULL,
+ key_to_hw_index(mw->key) &
+ (dev->caps.num_mpts - 1));
+ if (err)
+ mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err);
+
+ mw->enabled = MLX4_MPT_EN_SW;
+ }
+ if (mw->enabled)
+ mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
+ mlx4_mpt_release(dev, key_to_hw_index(mw->key));
+}
+EXPORT_SYMBOL_GPL(mlx4_mw_free);
+
int mlx4_init_mr_table(struct mlx4_dev *dev)
{
- struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_mr_table *mr_table = &priv->mr_table;
int err;
+ /* Nothing to do for slaves - all MR handling is forwarded
+ * to the master */
+ if (mlx4_is_slave(dev))
+ return 0;
+
if (!is_power_of_2(dev->caps.num_mpts))
return -EINVAL;
@@ -510,13 +759,17 @@
return err;
err = mlx4_buddy_init(&mr_table->mtt_buddy,
- ilog2(dev->caps.num_mtt_segs));
+ ilog2(div_u64(dev->caps.num_mtts,
+ (1 << log_mtts_per_seg))));
if (err)
goto err_buddy;
if (dev->caps.reserved_mtts) {
- if (mlx4_alloc_mtt_range(dev, fls(dev->caps.reserved_mtts - 1)) == -1) {
- mlx4_warn(dev, "MTT table of order %d is too small.\n",
+ priv->reserved_mtts =
+ mlx4_alloc_mtt_range(dev,
+ fls(dev->caps.reserved_mtts - 1));
+ if (priv->reserved_mtts < 0) {
+ mlx4_warn(dev, "MTT table of order %u is too small.\n",
mr_table->mtt_buddy.max_order);
err = -ENOMEM;
goto err_reserve_mtts;
@@ -536,8 +789,14 @@
void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
{
- struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_mr_table *mr_table = &priv->mr_table;
+ if (mlx4_is_slave(dev))
+ return;
+ if (priv->reserved_mtts >= 0)
+ mlx4_free_mtt_range(dev, priv->reserved_mtts,
+ fls(dev->caps.reserved_mtts - 1));
mlx4_buddy_cleanup(&mr_table->mtt_buddy);
mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
}
@@ -569,9 +828,8 @@
return 0;
}
-int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
- u64 *page_list, int npages, u64 iova, u32 fbo,
- u32 len, u32 *lkey, u32 *rkey, int same_key)
+int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
+ int npages, u64 iova, u32 *lkey, u32 *rkey)
{
u32 key;
int i, err;
@@ -583,8 +841,7 @@
++fmr->maps;
key = key_to_hw_index(fmr->mr.key);
- if (!same_key)
- key += dev->caps.num_mpts;
+ key += dev->caps.num_mpts;
*lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
*(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
@@ -592,18 +849,19 @@
/* Make sure MPT status is visible before writing MTT entries */
wmb();
+ dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle,
+ npages * sizeof(u64), DMA_TO_DEVICE);
+
for (i = 0; i < npages; ++i)
fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
- dma_sync_single(&dev->pdev->dev, fmr->dma_handle,
- npages * sizeof(u64), DMA_TO_DEVICE);
+ dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle,
+ npages * sizeof(u64), DMA_TO_DEVICE);
fmr->mpt->key = cpu_to_be32(key);
fmr->mpt->lkey = cpu_to_be32(key);
- fmr->mpt->length = cpu_to_be64(len);
+ fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
fmr->mpt->start = cpu_to_be64(iova);
- fmr->mpt->first_byte_offset = cpu_to_be32(fbo & 0x001fffff);
- fmr->mpt->flags2 = (fbo ? MLX4_MPT_FLAG2_FBO_EN : 0);
/* Make MTT entries are visible before setting MPT status */
wmb();
@@ -615,16 +873,6 @@
return 0;
}
-EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr_fbo);
-
-int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
- int npages, u64 iova, u32 *lkey, u32 *rkey)
-{
- u32 len = npages * (1ull << fmr->page_shift);
-
- return mlx4_map_phys_fmr_fbo(dev, fmr, page_list, npages, iova, 0,
- len, lkey, rkey, 0);
-}
EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);
int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
@@ -631,9 +879,11 @@
int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- u64 mtt_seg;
- int err = -ENOMEM;
+ int err = -ENOMEM, ret;
+ if (max_maps > dev->caps.max_fmr_maps)
+ return -EINVAL;
+
if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
return -EINVAL;
@@ -651,11 +901,10 @@
if (err)
return err;
- mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
-
fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
- fmr->mr.mtt.first_seg,
+ fmr->mr.mtt.offset,
&fmr->dma_handle);
+
if (!fmr->mtts) {
err = -ENOMEM;
goto err_free;
@@ -664,54 +913,13 @@
return 0;
err_free:
- mlx4_mr_free(dev, &fmr->mr);
+ ret = mlx4_mr_free(dev, &fmr->mr);
+ if (ret)
+ mlx4_err(dev, "Error deregistering MR. The system may have become unstable.");
return err;
}
EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);
-int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx,
- u32 pd, u32 access, int max_pages,
- int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
-{
- struct mlx4_priv *priv = mlx4_priv(dev);
- u64 mtt_seg;
- int err = -ENOMEM;
-
- if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
- return -EINVAL;
-
- /* All MTTs must fit in the same page */
- if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
- return -EINVAL;
-
- fmr->page_shift = page_shift;
- fmr->max_pages = max_pages;
- fmr->max_maps = max_maps;
- fmr->maps = 0;
-
- err = mlx4_mr_alloc_reserved(dev, mridx, pd, 0, 0, access, max_pages,
- page_shift, &fmr->mr);
- if (err)
- return err;
-
- mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
-
- fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
- fmr->mr.mtt.first_seg,
- &fmr->dma_handle);
- if (!fmr->mtts) {
- err = -ENOMEM;
- goto err_free;
- }
-
- return 0;
-
-err_free:
- mlx4_mr_free_reserved(dev, &fmr->mr);
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx4_fmr_alloc_reserved);
-
int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -733,41 +941,56 @@
void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
u32 *lkey, u32 *rkey)
{
+ u32 key;
+
if (!fmr->maps)
return;
+ key = key_to_hw_index(fmr->mr.key) & (dev->caps.num_mpts - 1);
+
+ *(u8 *)fmr->mpt = MLX4_MPT_STATUS_SW;
+
+ /* Make sure MPT status is visible before changing MPT fields */
+ wmb();
+
+ fmr->mr.key = hw_index_to_key(key);
+
+ fmr->mpt->key = cpu_to_be32(key);
+ fmr->mpt->lkey = cpu_to_be32(key);
+ fmr->mpt->length = 0;
+ fmr->mpt->start = 0;
+
+ /* Make sure MPT data is visible before changing MPT status */
+ wmb();
+
+ *(u8 *)fmr->mpt = MLX4_MPT_STATUS_HW;
+
+ /* Make sure MPT satus is visible */
+ wmb();
+
fmr->maps = 0;
-
- *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
}
EXPORT_SYMBOL_GPL(mlx4_fmr_unmap);
int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
{
+ int ret;
+
if (fmr->maps)
return -EBUSY;
- fmr->mr.enabled = 0;
- mlx4_mr_free(dev, &fmr->mr);
+ ret = mlx4_mr_free(dev, &fmr->mr);
+ if (ret)
+ return ret;
+ fmr->mr.enabled = MLX4_MPT_DISABLED;
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_fmr_free);
-int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
-{
- if (fmr->maps)
- return -EBUSY;
-
- fmr->mr.enabled = 0;
- mlx4_mr_free_reserved(dev, &fmr->mr);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx4_fmr_free_reserved);
-
int mlx4_SYNC_TPT(struct mlx4_dev *dev)
{
- return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000);
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000,
+ MLX4_CMD_NATIVE);
}
EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
Modified: trunk/sys/ofed/drivers/net/mlx4/pd.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/pd.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/pd.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,11 +31,11 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/module.h>
#include <linux/io-mapping.h>
-#include <asm/page.h>
+#include <linux/page.h>
#include "mlx4.h"
#include "icm.h"
@@ -58,16 +58,70 @@
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
{
- mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn);
+ mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn, MLX4_USE_RR);
}
EXPORT_SYMBOL_GPL(mlx4_pd_free);
+int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap);
+ if (*xrcdn == -1)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param,
+ RES_XRCD, RES_OP_RESERVE,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+
+ *xrcdn = get_param_l(&out_param);
+ return 0;
+ }
+ return __mlx4_xrcd_alloc(dev, xrcdn);
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc);
+
+void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
+{
+ mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn, MLX4_USE_RR);
+}
+
+void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, xrcdn);
+ err = mlx4_cmd(dev, in_param, RES_XRCD,
+ RES_OP_RESERVE, MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed to release xrcdn %d\n", xrcdn);
+ } else
+ __mlx4_xrcd_free(dev, xrcdn);
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_free);
+
int mlx4_init_pd_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds,
- (1 << 24) - 1, dev->caps.reserved_pds, 0);
+ (1 << NOT_MASKED_PD_BITS) - 1,
+ dev->caps.reserved_pds, 0);
}
void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
@@ -75,16 +129,34 @@
mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap);
}
+int mlx4_init_xrcd_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16),
+ (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0);
+}
+
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev)
+{
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap);
+}
+
int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
{
+ int offset;
+
uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap);
if (uar->index == -1)
return -ENOMEM;
- uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
+ if (mlx4_is_slave(dev))
+ offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) /
+ dev->caps.uar_page_size);
+ else
+ offset = uar->index;
+ uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset;
uar->map = NULL;
-
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_uar_alloc);
@@ -91,11 +163,12 @@
void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
{
- mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index);
+ mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index, MLX4_USE_RR);
}
EXPORT_SYMBOL_GPL(mlx4_uar_free);
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
+#ifndef CONFIG_PPC
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_uar *uar;
@@ -113,10 +186,13 @@
err = -ENOMEM;
goto out;
}
- uar = kmalloc(sizeof *uar, GFP_KERNEL);
+ uar = kmalloc_node(sizeof *uar, GFP_KERNEL, node);
if (!uar) {
- err = -ENOMEM;
- goto out;
+ uar = kmalloc(sizeof *uar, GFP_KERNEL);
+ if (!uar) {
+ err = -ENOMEM;
+ goto out;
+ }
}
err = mlx4_uar_alloc(dev, uar);
if (err)
@@ -191,6 +267,21 @@
}
EXPORT_SYMBOL_GPL(mlx4_bf_free);
+#else
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
+{
+ memset(bf, 0, sizeof *bf);
+ return -ENOSYS;
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_alloc);
+
+void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf)
+{
+ return;
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_free);
+#endif
+
int mlx4_init_uar_table(struct mlx4_dev *dev)
{
if (dev->caps.num_uars <= 128) {
@@ -202,7 +293,7 @@
return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
dev->caps.num_uars, dev->caps.num_uars - 1,
- max(128, dev->caps.reserved_uars), 0);
+ dev->caps.reserved_uars, 0);
}
void mlx4_cleanup_uar_table(struct mlx4_dev *dev)
Modified: trunk/sys/ofed/drivers/net/mlx4/port.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/port.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/port.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -32,17 +32,22 @@
#include <linux/errno.h>
#include <linux/if_ether.h>
+#include <linux/module.h>
+#include <linux/err.h>
#include <linux/mlx4/cmd.h>
-
+#include <linux/moduleparam.h>
#include "mlx4.h"
+#include "mlx4_stats.h"
-int mlx4_ib_set_4k_mtu = 0;
-module_param_named(set_4k_mtu, mlx4_ib_set_4k_mtu, int, 0444);
-MODULE_PARM_DESC(set_4k_mtu, "attempt to set 4K MTU to all ConnectX ports");
+int mlx4_set_4k_mtu = -1;
+module_param_named(set_4k_mtu, mlx4_set_4k_mtu, int, 0444);
+MODULE_PARM_DESC(set_4k_mtu,
+ "(Obsolete) attempt to set 4K MTU to all ConnectX ports");
+
+
#define MLX4_MAC_VALID (1ull << 63)
-#define MLX4_MAC_MASK 0xffffffffffffULL
#define MLX4_VLAN_VALID (1u << 31)
#define MLX4_VLAN_MASK 0xfff
@@ -69,10 +74,36 @@
table->entries[i] = 0;
table->refs[i] = 0;
}
- table->max = 1 << dev->caps.log_num_vlans;
+ table->max = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR;
table->total = 0;
}
+static int validate_index(struct mlx4_dev *dev,
+ struct mlx4_mac_table *table, int index)
+{
+ int err = 0;
+
+ if (index < 0 || index >= table->max || !table->refs[index]) {
+ mlx4_warn(dev, "No valid Mac entry for the given index\n");
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static int find_index(struct mlx4_dev *dev,
+ struct mlx4_mac_table *table, u64 mac)
+{
+ int i;
+
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if ((mac & MLX4_MAC_MASK) ==
+ (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
+ return i;
+ }
+ /* Mac not found */
+ return -EINVAL;
+}
+
static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
__be64 *entries)
{
@@ -87,40 +118,40 @@
memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
+
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)
+int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
{
- struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
+ struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+ struct mlx4_mac_table *table = &info->mac_table;
int i, err = 0;
int free = -1;
- mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac);
+ mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n",
+ (unsigned long long) mac, port);
+
mutex_lock(&table->mutex);
- for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
if (free < 0 && !table->refs[i]) {
free = i;
continue;
}
- if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
- /* MAC already registered, increase refernce count */
- *index = i;
+ if ((mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) &&
+ table->refs[i]) {
+ /* MAC already registered, Must not have duplicates */
+ err = i;
++table->refs[i];
goto out;
}
}
- if (free < 0) {
- err = -ENOMEM;
- goto out;
- }
-
mlx4_dbg(dev, "Free MAC index is %d\n", free);
if (table->total == table->max) {
@@ -130,39 +161,87 @@
}
/* Register new MAC */
- table->refs[free] = 1;
table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID);
err = mlx4_set_port_mac_table(dev, port, table->entries);
if (unlikely(err)) {
- mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) mac);
- table->refs[free] = 0;
+ mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+ (unsigned long long) mac);
table->entries[free] = 0;
goto out;
}
+ table->refs[free] = 1;
- *index = free;
+ err = free;
++table->total;
out:
mutex_unlock(&table->mutex);
return err;
}
+EXPORT_SYMBOL_GPL(__mlx4_register_mac);
+
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+ u64 out_param = 0;
+ int err = -EINVAL;
+
+ if (mlx4_is_mfunc(dev)) {
+ if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) {
+ err = mlx4_cmd_imm(dev, mac, &out_param,
+ ((u32) port) << 8 | (u32) RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ }
+ if (err && err == -EINVAL && mlx4_is_slave(dev)) {
+ /* retry using old REG_MAC format */
+ set_param_l(&out_param, port);
+ err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ dev->flags |= MLX4_FLAG_OLD_REG_MAC;
+ }
+ if (err)
+ return err;
+
+ return get_param_l(&out_param);
+ }
+ return __mlx4_register_mac(dev, port, mac);
+}
EXPORT_SYMBOL_GPL(mlx4_register_mac);
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index)
+int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port)
{
- struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
+ return dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
+ (port - 1) * (1 << dev->caps.log_num_macs);
+}
+EXPORT_SYMBOL_GPL(mlx4_get_base_qpn);
+void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+ struct mlx4_port_info *info;
+ struct mlx4_mac_table *table;
+ int index;
+
+ if (port < 1 || port > dev->caps.num_ports) {
+ mlx4_warn(dev, "invalid port number (%d), aborting...\n", port);
+ return;
+ }
+ info = &mlx4_priv(dev)->port[port];
+ table = &info->mac_table;
mutex_lock(&table->mutex);
- if (!table->refs[index]) {
- mlx4_warn(dev, "No MAC entry for index %d\n", index);
+
+ index = find_index(dev, table, mac);
+
+ if (validate_index(dev, table, index))
goto out;
- }
+
if (--table->refs[index]) {
- mlx4_warn(dev, "Have more references for index %d,"
- "no need to modify MAC table\n", index);
+ mlx4_dbg(dev, "Have more references for index %d,"
+ "no need to modify mac table\n", index);
goto out;
}
+
table->entries[index] = 0;
mlx4_set_port_mac_table(dev, port, table->entries);
--table->total;
@@ -169,8 +248,60 @@
out:
mutex_unlock(&table->mutex);
}
+EXPORT_SYMBOL_GPL(__mlx4_unregister_mac);
+
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+ u64 out_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) {
+ (void) mlx4_cmd_imm(dev, mac, &out_param,
+ ((u32) port) << 8 | (u32) RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ } else {
+ /* use old unregister mac format */
+ set_param_l(&out_param, port);
+ (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ }
+ return;
+ }
+ __mlx4_unregister_mac(dev, port, mac);
+ return;
+}
EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
+int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
+{
+ struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+ struct mlx4_mac_table *table = &info->mac_table;
+ int index = qpn - info->base_qpn;
+ int err = 0;
+
+ /* CX1 doesn't support multi-functions */
+ mutex_lock(&table->mutex);
+
+ err = validate_index(dev, table, index);
+ if (err)
+ goto out;
+
+ table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID);
+
+ err = mlx4_set_port_mac_table(dev, port, table->entries);
+ if (unlikely(err)) {
+ mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+ (unsigned long long) new_mac);
+ table->entries[index] = 0;
+ }
+out:
+ mutex_unlock(&table->mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(__mlx4_replace_mac);
+
static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
__be32 *entries)
{
@@ -185,7 +316,7 @@
memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE);
in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
@@ -201,7 +332,7 @@
if (table->refs[i] &&
(vid == (MLX4_VLAN_MASK &
be32_to_cpu(table->entries[i])))) {
- /* Vlan already registered, increase refernce count */
+ /* VLAN already registered, increase reference count */
*idx = i;
return 0;
}
@@ -211,7 +342,8 @@
}
EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan);
-int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
+ int *index)
{
struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
int i, err = 0;
@@ -218,6 +350,13 @@
int free = -1;
mutex_lock(&table->mutex);
+
+ if (table->total == table->max) {
+ /* No free vlan entries */
+ err = -ENOSPC;
+ goto out;
+ }
+
for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) {
if (free < 0 && (table->refs[i] == 0)) {
free = i;
@@ -227,7 +366,7 @@
if (table->refs[i] &&
(vlan == (MLX4_VLAN_MASK &
be32_to_cpu(table->entries[i])))) {
- /* Vlan already registered, increase refernce count */
+ /* Vlan already registered, increase references count */
*index = i;
++table->refs[i];
goto out;
@@ -239,13 +378,7 @@
goto out;
}
- if (table->total == table->max) {
- /* No free vlan entries */
- err = -ENOSPC;
- goto out;
- }
-
- /* Register new MAC */
+ /* Register new VLAN */
table->refs[free] = 1;
table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
@@ -263,25 +396,49 @@
mutex_unlock(&table->mutex);
return err;
}
+
+int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+{
+ u64 out_param = 0;
+ int err;
+
+ if (vlan > 4095)
+ return -EINVAL;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, vlan, &out_param,
+ ((u32) port) << 8 | (u32) RES_VLAN,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ *index = get_param_l(&out_param);
+
+ return err;
+ }
+ return __mlx4_register_vlan(dev, port, vlan, index);
+}
EXPORT_SYMBOL_GPL(mlx4_register_vlan);
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
{
struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+ int index;
+ mutex_lock(&table->mutex);
+ if (mlx4_find_cached_vlan(dev, port, vlan, &index)) {
+ mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan);
+ goto out;
+ }
+
if (index < MLX4_VLAN_REGULAR) {
mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
- return;
+ goto out;
}
- mutex_lock(&table->mutex);
- if (!table->refs[index]) {
- mlx4_warn(dev, "No vlan entry for index %d\n", index);
- goto out;
- }
if (--table->refs[index]) {
- mlx4_dbg(dev, "Have more references for index %d,"
- "no need to modify vlan table\n", index);
+ mlx4_dbg(dev, "Have %d more references for index %d, "
+ "no need to modify vlan table\n", table->refs[index],
+ index);
goto out;
}
table->entries[index] = 0;
@@ -290,6 +447,21 @@
out:
mutex_unlock(&table->mutex);
}
+
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
+{
+ u64 out_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ (void) mlx4_cmd_imm(dev, vlan, &out_param,
+ ((u32) port) << 8 | (u32) RES_VLAN,
+ RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ return;
+ }
+ __mlx4_unregister_vlan(dev, port, vlan);
+}
EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
@@ -320,7 +492,8 @@
*(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
- MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
if (!err)
*caps = *(__be32 *) (outbuf + 84);
mlx4_free_cmd_mailbox(dev, inmailbox);
@@ -327,13 +500,263 @@
mlx4_free_cmd_mailbox(dev, outmailbox);
return err;
}
+static struct mlx4_roce_gid_entry zgid_entry;
-int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
+int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave)
{
- struct mlx4_cmd_mailbox *mailbox;
+ if (slave == 0)
+ return MLX4_ROCE_PF_GIDS;
+ if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs))
+ return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1;
+ return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs;
+}
+
+int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave)
+{
+ int gids;
+ int vfs;
+
+ gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+ vfs = dev->num_vfs;
+
+ if (slave == 0)
+ return 0;
+ if (slave <= gids % vfs)
+ return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
+
+ return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
+}
+
+static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
+ u8 op_mod, struct mlx4_cmd_mailbox *inbox)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_port_info *port_info;
+ struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master;
+ struct mlx4_slave_state *slave_st = &master->slave_state[slave];
+ struct mlx4_set_port_rqp_calc_context *qpn_context;
+ struct mlx4_set_port_general_context *gen_context;
+ struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1;
+ int reset_qkey_viols;
+ int port;
+ int is_eth;
+ int num_gids;
+ int base;
+ u32 in_modifier;
+ u32 promisc;
+ u16 mtu, prev_mtu;
int err;
+ int i, j;
+ int offset;
+ __be32 agg_cap_mask;
+ __be32 slave_cap_mask;
+ __be32 new_cap_mask;
- if (dev->caps.port_type[port] != MLX4_PORT_TYPE_IB)
+ port = in_mod & 0xff;
+ in_modifier = (in_mod >> 8) & 0xff;
+ is_eth = op_mod;
+ port_info = &priv->port[port];
+
+ if (op_mod > 1)
+ return -EINVAL;
+
+ /* Slaves cannot perform SET_PORT operations except changing MTU */
+ if (is_eth) {
+ if (slave != dev->caps.function &&
+ in_modifier != MLX4_SET_PORT_GENERAL &&
+ in_modifier != MLX4_SET_PORT_GID_TABLE) {
+ mlx4_warn(dev, "denying SET_PORT for slave:%d,"
+ "port %d, config_select 0x%x\n",
+ slave, port, in_modifier);
+ return -EINVAL;
+ }
+ switch (in_modifier) {
+ case MLX4_SET_PORT_RQP_CALC:
+ qpn_context = inbox->buf;
+ qpn_context->base_qpn =
+ cpu_to_be32(port_info->base_qpn);
+ qpn_context->n_mac = 0x7;
+ promisc = be32_to_cpu(qpn_context->promisc) >>
+ SET_PORT_PROMISC_SHIFT;
+ qpn_context->promisc = cpu_to_be32(
+ promisc << SET_PORT_PROMISC_SHIFT |
+ port_info->base_qpn);
+ promisc = be32_to_cpu(qpn_context->mcast) >>
+ SET_PORT_MC_PROMISC_SHIFT;
+ qpn_context->mcast = cpu_to_be32(
+ promisc << SET_PORT_MC_PROMISC_SHIFT |
+ port_info->base_qpn);
+ break;
+ case MLX4_SET_PORT_GENERAL:
+ gen_context = inbox->buf;
+ /* Mtu is configured as the max MTU among all the
+ * the functions on the port. */
+ mtu = be16_to_cpu(gen_context->mtu);
+ mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port] +
+ ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
+ prev_mtu = slave_st->mtu[port];
+ slave_st->mtu[port] = mtu;
+ if (mtu > master->max_mtu[port])
+ master->max_mtu[port] = mtu;
+ if (mtu < prev_mtu && prev_mtu ==
+ master->max_mtu[port]) {
+ slave_st->mtu[port] = mtu;
+ master->max_mtu[port] = mtu;
+ for (i = 0; i < dev->num_slaves; i++) {
+ master->max_mtu[port] =
+ max(master->max_mtu[port],
+ master->slave_state[i].mtu[port]);
+ }
+ }
+
+ gen_context->mtu = cpu_to_be16(master->max_mtu[port]);
+ break;
+ case MLX4_SET_PORT_GID_TABLE:
+ /* change to MULTIPLE entries: number of guest's gids
+ * need a FOR-loop here over number of gids the guest has.
+ * 1. Check no duplicates in gids passed by slave
+ */
+ num_gids = mlx4_get_slave_num_gids(dev, slave);
+ base = mlx4_get_base_gid_ix(dev, slave);
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
+ for (i = 0; i < num_gids; gid_entry_mbox++, i++) {
+ if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
+ sizeof(zgid_entry)))
+ continue;
+ gid_entry_mb1 = gid_entry_mbox + 1;
+ for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) {
+ if (!memcmp(gid_entry_mb1->raw,
+ zgid_entry.raw, sizeof(zgid_entry)))
+ continue;
+ if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw,
+ sizeof(gid_entry_mbox->raw))) {
+ /* found duplicate */
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* 2. Check that do not have duplicates in OTHER
+ * entries in the port GID table
+ */
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
+ if (i >= base && i < base + num_gids)
+ continue; /* don't compare to slave's current gids */
+ gid_entry_tbl = &priv->roce_gids[port - 1][i];
+ if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry)))
+ continue;
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
+ for (j = 0; j < num_gids; gid_entry_mbox++, j++) {
+ if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
+ sizeof(zgid_entry)))
+ continue;
+ if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw,
+ sizeof(gid_entry_tbl->raw))) {
+ /* found duplicate */
+ mlx4_warn(dev, "requested gid entry for slave:%d "
+ "is a duplicate of gid at index %d\n",
+ slave, i);
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* insert slave GIDs with memcpy, starting at slave's base index */
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
+ for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++)
+ memcpy(priv->roce_gids[port - 1][offset].raw, gid_entry_mbox->raw, 16);
+
+ /* Now, copy roce port gids table to current mailbox for passing to FW */
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++)
+ memcpy(gid_entry_mbox->raw, priv->roce_gids[port - 1][i].raw, 16);
+
+ break;
+ }
+ return mlx4_cmd(dev, inbox->dma, in_mod & 0xffff, op_mod,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+ }
+
+ /* For IB, we only consider:
+ * - The capability mask, which is set to the aggregate of all
+ * slave function capabilities
+ * - The QKey violatin counter - reset according to each request.
+ */
+
+ if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+ reset_qkey_viols = (*(u8 *) inbox->buf) & 0x40;
+ new_cap_mask = ((__be32 *) inbox->buf)[2];
+ } else {
+ reset_qkey_viols = ((u8 *) inbox->buf)[3] & 0x1;
+ new_cap_mask = ((__be32 *) inbox->buf)[1];
+ }
+
+ /* slave may not set the IS_SM capability for the port */
+ if (slave != mlx4_master_func_num(dev) &&
+ (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM))
+ return -EINVAL;
+
+ /* No DEV_MGMT in multifunc mode */
+ if (mlx4_is_mfunc(dev) &&
+ (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP))
+ return -EINVAL;
+
+ agg_cap_mask = 0;
+ slave_cap_mask =
+ priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+ priv->mfunc.master.slave_state[slave].ib_cap_mask[port] = new_cap_mask;
+ for (i = 0; i < dev->num_slaves; i++)
+ agg_cap_mask |=
+ priv->mfunc.master.slave_state[i].ib_cap_mask[port];
+
+ /* only clear mailbox for guests. Master may be setting
+ * MTU or PKEY table size
+ */
+ if (slave != dev->caps.function)
+ memset(inbox->buf, 0, 256);
+ if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+ *(u8 *) inbox->buf |= !!reset_qkey_viols << 6;
+ ((__be32 *) inbox->buf)[2] = agg_cap_mask;
+ } else {
+ ((u8 *) inbox->buf)[3] |= !!reset_qkey_viols;
+ ((__be32 *) inbox->buf)[1] = agg_cap_mask;
+ }
+
+ err = mlx4_cmd(dev, inbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err)
+ priv->mfunc.master.slave_state[slave].ib_cap_mask[port] =
+ slave_cap_mask;
+ return err;
+}
+
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ return mlx4_common_set_port(dev, slave, vhcr->in_modifier,
+ vhcr->op_modifier, inbox);
+}
+
+/* bit locations for set port command with zero op modifier */
+enum {
+ MLX4_SET_PORT_VL_CAP = 4, /* bits 7:4 */
+ MLX4_SET_PORT_MTU_CAP = 12, /* bits 15:12 */
+ MLX4_CHANGE_PORT_PKEY_TBL_SZ = 20,
+ MLX4_CHANGE_PORT_VL_CAP = 21,
+ MLX4_CHANGE_PORT_MTU_CAP = 22,
+};
+
+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err = -EINVAL, vl_cap, pkey_tbl_flag = 0;
+ u32 in_mod;
+
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_NONE)
return 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -342,13 +765,458 @@
memset(mailbox->buf, 0, 256);
- if (mlx4_ib_set_4k_mtu)
- ((__be32 *) mailbox->buf)[0] |= cpu_to_be32((1 << 22) | (1 << 21) | (5 << 12) | (2 << 4));
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
+ in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ } else {
+ ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port];
- ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port];
- err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ if (pkey_tbl_sz >= 0 && mlx4_is_master(dev)) {
+ pkey_tbl_flag = 1;
+ ((__be16 *) mailbox->buf)[20] = cpu_to_be16(pkey_tbl_sz);
+ }
+ /* IB VL CAP enum isn't used by the firmware, just numerical values */
+ for (vl_cap = dev->caps.vl_cap[port];
+ vl_cap >= 1; vl_cap >>= 1) {
+ ((__be32 *) mailbox->buf)[0] = cpu_to_be32(
+ (1 << MLX4_CHANGE_PORT_MTU_CAP) |
+ (1 << MLX4_CHANGE_PORT_VL_CAP) |
+ (pkey_tbl_flag << MLX4_CHANGE_PORT_PKEY_TBL_SZ) |
+ (dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) |
+ (vl_cap << MLX4_SET_PORT_VL_CAP));
+ err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+ if (err != -ENOMEM)
+ break;
+ }
+ }
+
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
+
+int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+ u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_general_context *context;
+ int err;
+ u32 in_mod;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof *context);
+
+ context->flags = SET_PORT_GEN_ALL_VALID;
+ context->mtu = cpu_to_be16(mtu);
+ context->pptx = (pptx * (!pfctx)) << 7;
+ context->pfctx = pfctx;
+ context->pprx = (pprx * (!pfcrx)) << 7;
+ context->pfcrx = pfcrx;
+
+ in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_general);
+
+int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+ u8 promisc)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_rqp_calc_context *context;
+ int err;
+ u32 in_mod;
+ u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ?
+ MCAST_DIRECT : MCAST_DEFAULT;
+
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
+ return 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof *context);
+
+ context->base_qpn = cpu_to_be32(base_qpn);
+ context->n_mac = dev->caps.log_num_macs;
+ context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
+ base_qpn);
+ context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
+ base_qpn);
+ context->intra_no_vlan = 0;
+ context->no_vlan = MLX4_NO_VLAN_IDX;
+ context->intra_vlan_miss = 0;
+ context->vlan_miss = MLX4_VLAN_MISS_IDX;
+
+ in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc);
+
+int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_prio2tc_context *context;
+ int err;
+ u32 in_mod;
+ int i;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof *context);
+
+ for (i = 0; i < MLX4_NUM_UP; i += 2)
+ context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1];
+
+ in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC);
+
+int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
+ u8 *pg, u16 *ratelimit)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_scheduler_context *context;
+ int err;
+ u32 in_mod;
+ int i;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof *context);
+
+ for (i = 0; i < MLX4_NUM_TC; i++) {
+ struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i];
+ u16 r;
+ if (ratelimit && ratelimit[i]) {
+ if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) {
+ r = ratelimit[i];
+ tc->max_bw_units =
+ htons(MLX4_RATELIMIT_100M_UNITS);
+ } else {
+ r = ratelimit[i]/10;
+ tc->max_bw_units =
+ htons(MLX4_RATELIMIT_1G_UNITS);
+ }
+ tc->max_bw_value = htons(r);
+ } else {
+ tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT);
+ tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS);
+ }
+
+ tc->pg = htons(pg[i]);
+ tc->bw_precentage = htons(tc_tx_bw[i]);
+ }
+
+ in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER);
+
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err = 0;
+
+ return err;
+}
+
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
+ u64 mac, u64 clear, u8 mode)
+{
+ return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
+ MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+}
+EXPORT_SYMBOL(mlx4_SET_MCAST_FLTR);
+
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err = 0;
+
+ return err;
+}
+
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ return 0;
+}
+
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, unsigned long *stats_bitmap)
+{
+ int last_i = 0;
+
+ bitmap_zero(stats_bitmap, NUM_ALL_STATS);
+
+ if (mlx4_is_slave(dev)) {
+ last_i = dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN ?
+ NUM_PKT_STATS + NUM_FLOW_STATS : NUM_PKT_STATS;
+ } else {
+ bitmap_set(stats_bitmap, last_i, NUM_PKT_STATS);
+ last_i = NUM_PKT_STATS;
+
+ if (dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
+ bitmap_set(stats_bitmap, last_i, NUM_FLOW_STATS);
+ last_i += NUM_FLOW_STATS;
+ }
+ }
+
+ if (mlx4_is_slave(dev))
+ bitmap_set(stats_bitmap, last_i, NUM_VF_STATS);
+ last_i += NUM_VF_STATS;
+
+ if (mlx4_is_master(dev))
+ bitmap_set(stats_bitmap, last_i, NUM_VPORT_STATS);
+ last_i += NUM_VPORT_STATS;
+
+ bitmap_set(stats_bitmap, last_i, NUM_PORT_STATS);
+}
+EXPORT_SYMBOL(mlx4_set_stats_bitmap);
+
+int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, found_ix = -1;
+ int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+
+ if (!mlx4_is_mfunc(dev))
+ return -EINVAL;
+
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
+ if (!memcmp(priv->roce_gids[port - 1][i].raw, gid, 16)) {
+ found_ix = i;
+ break;
+ }
+ }
+
+ if (found_ix >= 0) {
+ if (found_ix < MLX4_ROCE_PF_GIDS)
+ *slave_id = 0;
+ else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) *
+ (vf_gids / dev->num_vfs + 1))
+ *slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) /
+ (vf_gids / dev->num_vfs + 1)) + 1;
+ else
+ *slave_id =
+ ((found_ix - MLX4_ROCE_PF_GIDS -
+ ((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) /
+ (vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1;
+ }
+
+ return (found_ix >= 0) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid);
+
+int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (!mlx4_is_master(dev))
+ return -EINVAL;
+
+ memcpy(gid, priv->roce_gids[port - 1][slave_id].raw, 16);
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);
+
+/* Cable Module Info */
+#define MODULE_INFO_MAX_READ 48
+
+#define I2C_ADDR_LOW 0x50
+#define I2C_ADDR_HIGH 0x51
+#define I2C_PAGE_SIZE 256
+
+/* Module Info Data */
+struct mlx4_cable_info {
+ u8 i2c_addr;
+ u8 page_num;
+ __be16 dev_mem_address;
+ __be16 reserved1;
+ __be16 size;
+ __be32 reserved2[2];
+ u8 data[MODULE_INFO_MAX_READ];
+};
+
+enum cable_info_err {
+ CABLE_INF_INV_PORT = 0x1,
+ CABLE_INF_OP_NOSUP = 0x2,
+ CABLE_INF_NOT_CONN = 0x3,
+ CABLE_INF_NO_EEPRM = 0x4,
+ CABLE_INF_PAGE_ERR = 0x5,
+ CABLE_INF_INV_ADDR = 0x6,
+ CABLE_INF_I2C_ADDR = 0x7,
+ CABLE_INF_QSFP_VIO = 0x8,
+ CABLE_INF_I2C_BUSY = 0x9,
+};
+
+#define MAD_STATUS_2_CABLE_ERR(mad_status) ((mad_status >> 8) & 0xFF)
+
+#ifdef DEBUG
+static inline const char *cable_info_mad_err_str(u16 mad_status)
+{
+ u8 err = MAD_STATUS_2_CABLE_ERR(mad_status);
+
+ switch (err) {
+ case CABLE_INF_INV_PORT:
+ return "invalid port selected";
+ case CABLE_INF_OP_NOSUP:
+ return "operation not supported for this port (the port is of type CX4 or internal)";
+ case CABLE_INF_NOT_CONN:
+ return "cable is not connected";
+ case CABLE_INF_NO_EEPRM:
+ return "the connected cable has no EPROM (passive copper cable)";
+ case CABLE_INF_PAGE_ERR:
+ return "page number is greater than 15";
+ case CABLE_INF_INV_ADDR:
+ return "invalid device_address or size (that is, size equals 0 or address+size is greater than 256)";
+ case CABLE_INF_I2C_ADDR:
+ return "invalid I2C slave address";
+ case CABLE_INF_QSFP_VIO:
+ return "at least one cable violates the QSFP specification and ignores the modsel signal";
+ case CABLE_INF_I2C_BUSY:
+ return "I2C bus is constantly busy";
+ }
+ return "Unknown Error";
+}
+#endif /* DEBUG */
+
+/**
+ * mlx4_get_module_info - Read cable module eeprom data
+ * @dev: mlx4_dev.
+ * @port: port number.
+ * @offset: byte offset in eeprom to start reading data from.
+ * @size: num of bytes to read.
+ * @data: output buffer to put the requested data into.
+ *
+ * Reads cable module eeprom data, puts the outcome data into
+ * data pointer paramer.
+ * Returns num of read bytes on success or a negative error
+ * code.
+ */
+int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, u16 offset,
+ u16 size, u8 *data)
+{
+ struct mlx4_cmd_mailbox *inbox, *outbox;
+ struct mlx4_mad_ifc *inmad, *outmad;
+ struct mlx4_cable_info *cable_info;
+ u16 i2c_addr;
+ int ret;
+
+ if (size > MODULE_INFO_MAX_READ)
+ size = MODULE_INFO_MAX_READ;
+
+ inbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(inbox)) {
+ mlx4_err(dev,
+ "mlx4_alloc_cmd_mailbox returned with error(%lx)", PTR_ERR(inbox));
+ return PTR_ERR(inbox);
+ }
+
+ outbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(outbox)) {
+ mlx4_free_cmd_mailbox(dev, inbox);
+ mlx4_err(dev,
+ "mlx4_alloc_cmd_mailbox returned with error(%lx)", PTR_ERR(outbox));
+ return PTR_ERR(outbox);
+ }
+
+ inmad = (struct mlx4_mad_ifc *)(inbox->buf);
+ outmad = (struct mlx4_mad_ifc *)(outbox->buf);
+
+ inmad->method = 0x1; /* Get */
+ inmad->class_version = 0x1;
+ inmad->mgmt_class = 0x1;
+ inmad->base_version = 0x1;
+ inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */
+
+ if (offset < I2C_PAGE_SIZE && offset + size > I2C_PAGE_SIZE)
+ /* Cross pages reads are not allowed
+ * read until offset 256 in low page
+ */
+ size -= offset + size - I2C_PAGE_SIZE;
+
+ i2c_addr = I2C_ADDR_LOW;
+ if (offset >= I2C_PAGE_SIZE) {
+ /* Reset offset to high page */
+ i2c_addr = I2C_ADDR_HIGH;
+ offset -= I2C_PAGE_SIZE;
+ }
+
+ cable_info = (struct mlx4_cable_info *)inmad->data;
+ cable_info->dev_mem_address = cpu_to_be16(offset);
+ cable_info->page_num = 0;
+ cable_info->i2c_addr = i2c_addr;
+ cable_info->size = cpu_to_be16(size);
+
+ ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ if (ret)
+ goto out;
+
+ if (be16_to_cpu(outmad->status)) {
+ /* Mad returned with bad status */
+ ret = be16_to_cpu(outmad->status);
+#ifdef DEBUG
+ mlx4_warn(dev, "MLX4_CMD_MAD_IFC Get Module info attr(%x) "
+ "port(%d) i2c_addr(%x) offset(%d) size(%d): Response "
+ "Mad Status(%x) - %s\n", 0xFF60, port, i2c_addr, offset,
+ size, ret, cable_info_mad_err_str(ret));
+#endif
+ if (i2c_addr == I2C_ADDR_HIGH &&
+ MAD_STATUS_2_CABLE_ERR(ret) == CABLE_INF_I2C_ADDR)
+ /* Some SFP cables do not support i2c slave
+ * address 0x51 (high page), abort silently.
+ */
+ ret = 0;
+ else
+ ret = -ret;
+ goto out;
+ }
+ cable_info = (struct mlx4_cable_info *)outmad->data;
+ memcpy(data, cable_info->data, size);
+ ret = size;
+out:
+ mlx4_free_cmd_mailbox(dev, inbox);
+ mlx4_free_cmd_mailbox(dev, outbox);
+ return ret;
+}
+EXPORT_SYMBOL(mlx4_get_module_info);
Modified: trunk/sys/ofed/drivers/net/mlx4/profile.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/profile.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/profile.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,7 +32,7 @@
* SOFTWARE.
*/
-#include <linux/init.h>
+#include <linux/slab.h>
#include "mlx4.h"
#include "fw.h"
@@ -76,7 +76,7 @@
u64 size;
u64 start;
int type;
- int num;
+ u64 num;
int log_num;
};
@@ -85,7 +85,7 @@
struct mlx4_resource tmp;
int i, j;
- profile = kzalloc(MLX4_RES_NUM * sizeof *profile, GFP_KERNEL);
+ profile = kcalloc(MLX4_RES_NUM, sizeof(*profile), GFP_KERNEL);
if (!profile)
return -ENOMEM;
@@ -98,8 +98,8 @@
profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz;
profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz;
profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz;
- profile[MLX4_RES_MTT].size = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
- profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE;
+ profile[MLX4_RES_MTT].size = dev_cap->mtt_entry_sz;
+ profile[MLX4_RES_MCG].size = mlx4_get_mgm_entry_size(dev);
profile[MLX4_RES_QP].num = request->num_qp;
profile[MLX4_RES_RDMARC].num = request->num_qp * request->rdmarc_per_qp;
@@ -107,12 +107,13 @@
profile[MLX4_RES_AUXC].num = request->num_qp;
profile[MLX4_RES_SRQ].num = request->num_srq;
profile[MLX4_RES_CQ].num = request->num_cq;
- profile[MLX4_RES_EQ].num = min_t(unsigned, dev_cap->max_eqs,
- dev_cap->reserved_eqs +
- num_possible_cpus() + 1);
+ profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ?
+ dev->phys_caps.num_phys_eqs :
+ min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
profile[MLX4_RES_DMPT].num = request->num_mpt;
profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS;
- profile[MLX4_RES_MTT].num = request->num_mtt;
+ profile[MLX4_RES_MTT].num = ((u64)request->num_mtt_segs) *
+ (1 << log_mtts_per_seg);
profile[MLX4_RES_MCG].num = request->num_mcg;
for (i = 0; i < MLX4_RES_NUM; ++i) {
@@ -198,9 +199,10 @@
init_hca->log_num_cqs = profile[i].log_num;
break;
case MLX4_RES_EQ:
- dev->caps.num_eqs = profile[i].num;
+ dev->caps.num_eqs = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs,
+ MAX_MSIX));
init_hca->eqc_base = profile[i].start;
- init_hca->log_num_eqs = profile[i].log_num;
+ init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
break;
case MLX4_RES_DMPT:
dev->caps.num_mpts = profile[i].num;
@@ -212,17 +214,24 @@
init_hca->cmpt_base = profile[i].start;
break;
case MLX4_RES_MTT:
- dev->caps.num_mtt_segs = profile[i].num;
+ dev->caps.num_mtts = profile[i].num;
priv->mr_table.mtt_base = profile[i].start;
init_hca->mtt_base = profile[i].start;
break;
case MLX4_RES_MCG:
- dev->caps.num_mgms = profile[i].num >> 1;
- dev->caps.num_amgms = profile[i].num >> 1;
init_hca->mc_base = profile[i].start;
- init_hca->log_mc_entry_sz = ilog2(MLX4_MGM_ENTRY_SIZE);
+ init_hca->log_mc_entry_sz =
+ ilog2(mlx4_get_mgm_entry_size(dev));
init_hca->log_mc_table_sz = profile[i].log_num;
- init_hca->log_mc_hash_sz = profile[i].log_num - 1;
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ dev->caps.num_mgms = profile[i].num;
+ } else {
+ init_hca->log_mc_hash_sz =
+ profile[i].log_num - 1;
+ dev->caps.num_mgms = profile[i].num >> 1;
+ dev->caps.num_amgms = profile[i].num >> 1;
+ }
break;
default:
break;
Modified: trunk/sys/ofed/drivers/net/mlx4/qp.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/qp.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/qp.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,7 +33,9 @@
* SOFTWARE.
*/
-#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
#include <linux/mlx4/cmd.h>
#include <linux/mlx4/qp.h>
@@ -41,6 +43,12 @@
#include "mlx4.h"
#include "icm.h"
+/*
+ * QP to support BF should have bits 6,7 cleared
+ */
+#define MLX4_BF_QP_SKIP_MASK 0xc0
+#define MLX4_MAX_BF_QP_RANGE 0x40
+
void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
@@ -55,7 +63,7 @@
spin_unlock(&qp_table->lock);
if (!qp) {
- mlx4_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ mlx4_dbg(dev, "Async event for none existent QP %08x\n", qpn);
return;
}
@@ -65,11 +73,26 @@
complete(&qp->free);
}
-int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
- enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
- struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
- int sqd_event, struct mlx4_qp *qp)
+/* used for INIT/CLOSE port logic */
+static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
{
+ /* this procedure is called after we already know we are on the master */
+ /* qp0 is either the proxy qp0, or the real qp0 */
+ u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev);
+ *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1;
+
+ *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn &&
+ qp->qpn <= dev->phys_caps.base_sqpn + 1;
+
+ return *real_qp0 || *proxy_qp0;
+}
+
+static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
+ struct mlx4_qp_context *context,
+ enum mlx4_qp_optpar optpar,
+ int sqd_event, struct mlx4_qp *qp, int native)
+{
static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = {
[MLX4_QP_STATE_RST] = {
[MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
@@ -110,16 +133,31 @@
}
};
+ struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
int ret = 0;
+ int real_qp0 = 0;
+ int proxy_qp0 = 0;
+ u8 port;
if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE ||
!op[cur_state][new_state])
return -EINVAL;
- if (op[cur_state][new_state] == MLX4_CMD_2RST_QP)
- return mlx4_cmd(dev, 0, qp->qpn, 2,
- MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A);
+ if (op[cur_state][new_state] == MLX4_CMD_2RST_QP) {
+ ret = mlx4_cmd(dev, 0, qp->qpn, 2,
+ MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native);
+ if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR &&
+ cur_state != MLX4_QP_STATE_RST &&
+ is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
+ port = (qp->qpn & 1) + 1;
+ if (proxy_qp0)
+ priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
+ else
+ priv->mfunc.master.qp0_state[port].qp0_active = 0;
+ }
+ return ret;
+ }
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -138,116 +176,236 @@
((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn =
cpu_to_be32(qp->qpn);
- ret = mlx4_cmd(dev, mailbox->dma, qp->qpn | (!!sqd_event << 31),
+ ret = mlx4_cmd(dev, mailbox->dma,
+ qp->qpn | (!!sqd_event << 31),
new_state == MLX4_QP_STATE_RST ? 2 : 0,
- op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C);
+ op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native);
+ if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
+ port = (qp->qpn & 1) + 1;
+ if (cur_state != MLX4_QP_STATE_ERR &&
+ cur_state != MLX4_QP_STATE_RST &&
+ new_state == MLX4_QP_STATE_ERR) {
+ if (proxy_qp0)
+ priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
+ else
+ priv->mfunc.master.qp0_state[port].qp0_active = 0;
+ } else if (new_state == MLX4_QP_STATE_RTR) {
+ if (proxy_qp0)
+ priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1;
+ else
+ priv->mfunc.master.qp0_state[port].qp0_active = 1;
+ }
+ }
+
mlx4_free_cmd_mailbox(dev, mailbox);
return ret;
}
+
+int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
+ struct mlx4_qp_context *context,
+ enum mlx4_qp_optpar optpar,
+ int sqd_event, struct mlx4_qp *qp)
+{
+ return __mlx4_qp_modify(dev, mtt, cur_state, new_state, context,
+ optpar, sqd_event, qp, 0);
+}
EXPORT_SYMBOL_GPL(mlx4_qp_modify);
-int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base)
+int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 flags)
{
+ int bf_qp = !!(flags & (u8) MLX4_RESERVE_BF_QP);
+
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
- int qpn;
- qpn = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align);
- if (qpn == -1)
+ /* Only IPoIB uses a large cnt. In this case, just allocate
+ * as usual, ignoring bf skipping, since IPoIB does not run over RoCE
+ */
+ if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp)
+ bf_qp = 0;
+
+ *base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
+ bf_qp ? MLX4_BF_QP_SKIP_MASK : 0);
+ if (*base == -1)
return -ENOMEM;
- *base = qpn;
return 0;
}
+
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 flags)
+{
+ u64 in_param = 0;
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, (((u32) flags) << 24) | (u32) cnt);
+ set_param_h(&in_param, align);
+ err = mlx4_cmd_imm(dev, in_param, &out_param,
+ RES_QP, RES_OP_RESERVE,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+
+ *base = get_param_l(&out_param);
+ return 0;
+ }
+ return __mlx4_qp_reserve_range(dev, cnt, align, base, flags);
+}
EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range);
-void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
+void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
- if (base_qpn < dev->caps.sqp_start + 8)
+
+ if (mlx4_is_qp_reserved(dev, (u32) base_qpn))
return;
+ mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, MLX4_USE_RR);
+}
- mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
+void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, base_qpn);
+ set_param_h(&in_param, cnt);
+ err = mlx4_cmd(dev, in_param, RES_QP, RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err) {
+ mlx4_warn(dev, "Failed to release qp range"
+ " base:%d cnt:%d\n", base_qpn, cnt);
+ }
+ } else
+ __mlx4_qp_release_range(dev, base_qpn, cnt);
}
EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
int err;
- if (!qpn)
- return -EINVAL;
-
- qp->qpn = qpn;
-
- err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
if (err)
goto err_out;
- err = mlx4_table_get(dev, &qp_table->auxc_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
if (err)
goto err_put_qp;
- err = mlx4_table_get(dev, &qp_table->altc_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
if (err)
goto err_put_auxc;
- err = mlx4_table_get(dev, &qp_table->rdmarc_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
if (err)
goto err_put_altc;
- err = mlx4_table_get(dev, &qp_table->cmpt_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
if (err)
goto err_put_rdmarc;
- spin_lock_irq(&qp_table->lock);
- err = radix_tree_insert(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1), qp);
- spin_unlock_irq(&qp_table->lock);
- if (err)
- goto err_put_cmpt;
-
- atomic_set(&qp->refcount, 1);
- init_completion(&qp->free);
-
return 0;
-err_put_cmpt:
- mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
-
err_put_rdmarc:
- mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
+ mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
err_put_altc:
- mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
+ mlx4_table_put(dev, &qp_table->altc_table, qpn);
err_put_auxc:
- mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
+ mlx4_table_put(dev, &qp_table->auxc_table, qpn);
err_put_qp:
- mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
+ mlx4_table_put(dev, &qp_table->qp_table, qpn);
err_out:
return err;
}
-EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
-struct mlx4_qp *mlx4_qp_lookup_lock(struct mlx4_dev *dev, u32 qpn)
+static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
{
- struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
- unsigned long flags;
- struct mlx4_qp *qp;
+ u64 param = 0;
- spin_lock_irqsave(&qp_table->lock, flags);
- qp = radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
- spin_unlock_irqrestore(&qp_table->lock, flags);
- return qp;
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(¶m, qpn);
+ return mlx4_cmd_imm(dev, param, ¶m, RES_QP, RES_OP_MAP_ICM,
+ MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ }
+ return __mlx4_qp_alloc_icm(dev, qpn);
}
-EXPORT_SYMBOL_GPL(mlx4_qp_lookup_lock);
+void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_qp_table *qp_table = &priv->qp_table;
+
+ mlx4_table_put(dev, &qp_table->cmpt_table, qpn);
+ mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
+ mlx4_table_put(dev, &qp_table->altc_table, qpn);
+ mlx4_table_put(dev, &qp_table->auxc_table, qpn);
+ mlx4_table_put(dev, &qp_table->qp_table, qpn);
+}
+
+static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, qpn);
+ if (mlx4_cmd(dev, in_param, RES_QP, RES_OP_MAP_ICM,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed to free icm of qp:%d\n", qpn);
+ } else
+ __mlx4_qp_free_icm(dev, qpn);
+}
+
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_qp_table *qp_table = &priv->qp_table;
+ int err;
+
+ if (!qpn)
+ return -EINVAL;
+
+ qp->qpn = qpn;
+
+ err = mlx4_qp_alloc_icm(dev, qpn);
+ if (err)
+ return err;
+
+ spin_lock_irq(&qp_table->lock);
+ err = radix_tree_insert(&dev->qp_table_tree, qp->qpn &
+ (dev->caps.num_qps - 1), qp);
+ spin_unlock_irq(&qp_table->lock);
+ if (err)
+ goto err_icm;
+
+ atomic_set(&qp->refcount, 1);
+ init_completion(&qp->free);
+
+ return 0;
+
+err_icm:
+ mlx4_qp_free_icm(dev, qpn);
+ return err;
+}
+
+EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
+
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
@@ -261,25 +419,18 @@
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
{
- struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
-
if (atomic_dec_and_test(&qp->refcount))
complete(&qp->free);
wait_for_completion(&qp->free);
- mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
- mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
- mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
- mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
- mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
+ mlx4_qp_free_icm(dev, qp->qpn);
}
EXPORT_SYMBOL_GPL(mlx4_qp_free);
static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
{
- return mlx4_cmd(dev, 0, base_qpn,
- (dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY) ? 4 : 0,
- MLX4_CMD_CONF_SPECIAL_QP, MLX4_CMD_TIME_CLASS_B);
+ return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
int mlx4_init_qp_table(struct mlx4_dev *dev)
@@ -287,18 +438,23 @@
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
int err;
int reserved_from_top = 0;
+ int reserved_from_bot;
+ int k;
spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
+ if (mlx4_is_slave(dev))
+ return 0;
/*
* We reserve 2 extra QPs per port for the special QPs. The
* block of special QPs must be aligned to a multiple of 8, so
* round up.
+ *
* We also reserve the MSB of the 24-bit QP number to indicate
- * an XRC qp.
+ * that a QP is an XRC QP.
*/
- dev->caps.sqp_start =
+ dev->phys_caps.base_sqpn =
ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
{
@@ -329,34 +485,82 @@
}
+ /* Reserve 8 real SQPs in both native and SRIOV modes.
+ * In addition, in SRIOV mode, reserve 8 proxy SQPs per function
+ * (for all PFs and VFs), and 8 corresponding tunnel QPs.
+ * Each proxy SQP works opposite its own tunnel QP.
+ *
+ * The QPs are arranged as follows:
+ * a. 8 real SQPs
+ * b. All the proxy SQPs (8 per function)
+ * c. All the tunnel QPs (8 per function)
+ */
+ reserved_from_bot = mlx4_num_reserved_sqps(dev);
+ if (reserved_from_bot + reserved_from_top > dev->caps.num_qps) {
+ mlx4_err(dev, "Number of reserved QPs is higher than number "
+ "of QPs, increase the value of log_num_qp\n");
+ return -EINVAL;
+ }
+
err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
- (1 << 23) - 1, dev->caps.sqp_start + 8,
+ (1 << 23) - 1, reserved_from_bot,
reserved_from_top);
if (err)
return err;
- return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start);
+ if (mlx4_is_mfunc(dev)) {
+ /* for PPF use */
+ dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8;
+ dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX;
+
+ /* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
+ * since the PF does not call mlx4_slave_caps */
+ dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+
+ if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
+ !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ for (k = 0; k < dev->caps.num_ports; k++) {
+ dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn +
+ 8 * mlx4_master_func_num(dev) + k;
+ dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX;
+ dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn +
+ 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
+ dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX;
+ }
+ }
+
+
+ err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
+ if (err)
+ goto err_mem;
+ return 0;
+
+err_mem:
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
+ dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+ return err;
}
void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
{
+ if (mlx4_is_slave(dev))
+ return;
+
mlx4_CONF_SPECIAL_QP(dev, 0);
mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
}
-int mlx4_qp_get_region(struct mlx4_dev *dev, enum mlx4_qp_region region,
- int *base_qpn, int *cnt)
-{
- if ((region < 0) || (region >= MLX4_NUM_QP_REGION))
- return -EINVAL;
-
- *base_qpn = dev->caps.reserved_qps_base[region];
- *cnt = dev->caps.reserved_qps_cnt[region];
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx4_qp_get_region);
-
int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
struct mlx4_qp_context *context)
{
@@ -368,7 +572,8 @@
return PTR_ERR(mailbox);
err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
- MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
if (!err)
memcpy(context, mailbox->buf + 8, sizeof *context);
Modified: trunk/sys/ofed/drivers/net/mlx4/reset.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/reset.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/reset.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,7 +31,6 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/delay.h>
@@ -78,7 +77,7 @@
goto out;
}
- pcie_cap = pci_find_capability(dev->pdev, PCI_CAP_ID_EXP);
+ pcie_cap = pci_pcie_cap(dev->pdev);
for (i = 0; i < 64; ++i) {
if (i == 22 || i == 23)
@@ -120,8 +119,8 @@
writel(MLX4_RESET_VALUE, reset + MLX4_RESET_OFFSET);
iounmap(reset);
- /* Docs say to wait one second before accessing device */
- msleep(1000);
+ /* wait half a second before accessing device */
+ msleep(500);
end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES;
do {
@@ -142,8 +141,8 @@
/* Now restore the PCI headers */
if (pcie_cap) {
devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4];
- if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_DEVCTL,
- devctl)) {
+ if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL,
+ devctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express "
"Device Control register, aborting.\n");
@@ -150,8 +149,8 @@
goto out;
}
linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
- if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_LNKCTL,
- linkctl)) {
+ if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL,
+ linkctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express "
"Link control register, aborting.\n");
Added: trunk/sys/ofed/drivers/net/mlx4/resource_tracker.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/resource_tracker.c (rev 0)
+++ trunk/sys/ofed/drivers/net/mlx4/resource_tracker.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,4686 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+
+#include "mlx4.h"
+#include "fw.h"
+
+#define MLX4_MAC_VALID (1ull << 63)
+
+struct mac_res {
+ struct list_head list;
+ u64 mac;
+ int ref_count;
+ u8 smac_index;
+ u8 port;
+};
+
+struct vlan_res {
+ struct list_head list;
+ u16 vlan;
+ int ref_count;
+ int vlan_index;
+ u8 port;
+};
+
+struct res_common {
+ struct list_head list;
+ struct rb_node node;
+ u64 res_id;
+ int owner;
+ int state;
+ int from_state;
+ int to_state;
+ int removing;
+};
+
+enum {
+ RES_ANY_BUSY = 1
+};
+
+struct res_gid {
+ struct list_head list;
+ u8 gid[16];
+ enum mlx4_protocol prot;
+ enum mlx4_steer_type steer;
+ u64 reg_id;
+};
+
+enum res_qp_states {
+ RES_QP_BUSY = RES_ANY_BUSY,
+
+ /* QP number was allocated */
+ RES_QP_RESERVED,
+
+ /* ICM memory for QP context was mapped */
+ RES_QP_MAPPED,
+
+ /* QP is in hw ownership */
+ RES_QP_HW
+};
+
+struct res_qp {
+ struct res_common com;
+ struct res_mtt *mtt;
+ struct res_cq *rcq;
+ struct res_cq *scq;
+ struct res_srq *srq;
+ struct list_head mcg_list;
+ spinlock_t mcg_spl;
+ int local_qpn;
+ atomic_t ref_count;
+ u32 qpc_flags;
+ /* saved qp params before VST enforcement in order to restore on VGT */
+ u8 sched_queue;
+ __be32 param3;
+ u8 vlan_control;
+ u8 fvl_rx;
+ u8 pri_path_fl;
+ u8 vlan_index;
+ u8 feup;
+};
+
+enum res_mtt_states {
+ RES_MTT_BUSY = RES_ANY_BUSY,
+ RES_MTT_ALLOCATED,
+};
+
+static inline const char *mtt_states_str(enum res_mtt_states state)
+{
+ switch (state) {
+ case RES_MTT_BUSY: return "RES_MTT_BUSY";
+ case RES_MTT_ALLOCATED: return "RES_MTT_ALLOCATED";
+ default: return "Unknown";
+ }
+}
+
+struct res_mtt {
+ struct res_common com;
+ int order;
+ atomic_t ref_count;
+};
+
+enum res_mpt_states {
+ RES_MPT_BUSY = RES_ANY_BUSY,
+ RES_MPT_RESERVED,
+ RES_MPT_MAPPED,
+ RES_MPT_HW,
+};
+
+struct res_mpt {
+ struct res_common com;
+ struct res_mtt *mtt;
+ int key;
+};
+
+enum res_eq_states {
+ RES_EQ_BUSY = RES_ANY_BUSY,
+ RES_EQ_RESERVED,
+ RES_EQ_HW,
+};
+
+struct res_eq {
+ struct res_common com;
+ struct res_mtt *mtt;
+};
+
+enum res_cq_states {
+ RES_CQ_BUSY = RES_ANY_BUSY,
+ RES_CQ_ALLOCATED,
+ RES_CQ_HW,
+};
+
+struct res_cq {
+ struct res_common com;
+ struct res_mtt *mtt;
+ atomic_t ref_count;
+};
+
+enum res_srq_states {
+ RES_SRQ_BUSY = RES_ANY_BUSY,
+ RES_SRQ_ALLOCATED,
+ RES_SRQ_HW,
+};
+
+struct res_srq {
+ struct res_common com;
+ struct res_mtt *mtt;
+ struct res_cq *cq;
+ atomic_t ref_count;
+};
+
+enum res_counter_states {
+ RES_COUNTER_BUSY = RES_ANY_BUSY,
+ RES_COUNTER_ALLOCATED,
+};
+
+struct res_counter {
+ struct res_common com;
+ int port;
+};
+
+enum res_xrcdn_states {
+ RES_XRCD_BUSY = RES_ANY_BUSY,
+ RES_XRCD_ALLOCATED,
+};
+
+struct res_xrcdn {
+ struct res_common com;
+ int port;
+};
+
+enum res_fs_rule_states {
+ RES_FS_RULE_BUSY = RES_ANY_BUSY,
+ RES_FS_RULE_ALLOCATED,
+};
+
+struct res_fs_rule {
+ struct res_common com;
+ int qpn;
+};
+
+static int mlx4_is_eth(struct mlx4_dev *dev, int port)
+{
+ return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1;
+}
+
+static void *res_tracker_lookup(struct rb_root *root, u64 res_id)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct res_common *res = container_of(node, struct res_common,
+ node);
+
+ if (res_id < res->res_id)
+ node = node->rb_left;
+ else if (res_id > res->res_id)
+ node = node->rb_right;
+ else
+ return res;
+ }
+ return NULL;
+}
+
+static int res_tracker_insert(struct rb_root *root, struct res_common *res)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct res_common *this = container_of(*new, struct res_common,
+ node);
+
+ parent = *new;
+ if (res->res_id < this->res_id)
+ new = &((*new)->rb_left);
+ else if (res->res_id > this->res_id)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&res->node, parent, new);
+ rb_insert_color(&res->node, root);
+
+ return 0;
+}
+
+enum qp_transition {
+ QP_TRANS_INIT2RTR,
+ QP_TRANS_RTR2RTS,
+ QP_TRANS_RTS2RTS,
+ QP_TRANS_SQERR2RTS,
+ QP_TRANS_SQD2SQD,
+ QP_TRANS_SQD2RTS
+};
+
+/* For Debug uses */
+static const char *ResourceType(enum mlx4_resource rt)
+{
+ switch (rt) {
+ case RES_QP: return "RES_QP";
+ case RES_CQ: return "RES_CQ";
+ case RES_SRQ: return "RES_SRQ";
+ case RES_MPT: return "RES_MPT";
+ case RES_MTT: return "RES_MTT";
+ case RES_MAC: return "RES_MAC";
+ case RES_VLAN: return "RES_VLAN";
+ case RES_EQ: return "RES_EQ";
+ case RES_COUNTER: return "RES_COUNTER";
+ case RES_FS_RULE: return "RES_FS_RULE";
+ case RES_XRCD: return "RES_XRCD";
+ default: return "Unknown resource type !!!";
+ };
+}
+
+static void rem_slave_vlans(struct mlx4_dev *dev, int slave);
+static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource res_type, int count,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[res_type];
+ int err = -EINVAL;
+ int allocated, free, reserved, guaranteed, from_free;
+
+ spin_lock(&res_alloc->alloc_lock);
+ allocated = (port > 0) ?
+ res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
+ res_alloc->allocated[slave];
+ free = (port > 0) ? res_alloc->res_port_free[port - 1] :
+ res_alloc->res_free;
+ reserved = (port > 0) ? res_alloc->res_port_rsvd[port - 1] :
+ res_alloc->res_reserved;
+ guaranteed = res_alloc->guaranteed[slave];
+
+ if (allocated + count > res_alloc->quota[slave])
+ goto out;
+
+ if (allocated + count <= guaranteed) {
+ err = 0;
+ } else {
+ /* portion may need to be obtained from free area */
+ if (guaranteed - allocated > 0)
+ from_free = count - (guaranteed - allocated);
+ else
+ from_free = count;
+
+ if (free - from_free > reserved)
+ err = 0;
+ }
+
+ if (!err) {
+ /* grant the request */
+ if (port > 0) {
+ res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count;
+ res_alloc->res_port_free[port - 1] -= count;
+ } else {
+ res_alloc->allocated[slave] += count;
+ res_alloc->res_free -= count;
+ }
+ }
+
+out:
+ spin_unlock(&res_alloc->alloc_lock);
+ return err;
+
+}
+
+static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource res_type, int count,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[res_type];
+
+ spin_lock(&res_alloc->alloc_lock);
+ if (port > 0) {
+ res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count;
+ res_alloc->res_port_free[port - 1] += count;
+ } else {
+ res_alloc->allocated[slave] -= count;
+ res_alloc->res_free += count;
+ }
+
+ spin_unlock(&res_alloc->alloc_lock);
+ return;
+}
+
+static inline void initialize_res_quotas(struct mlx4_dev *dev,
+ struct resource_allocator *res_alloc,
+ enum mlx4_resource res_type,
+ int vf, int num_instances)
+{
+ res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1));
+ res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf];
+ if (vf == mlx4_master_func_num(dev)) {
+ res_alloc->res_free = num_instances;
+ if (res_type == RES_MTT) {
+ /* reserved mtts will be taken out of the PF allocation */
+ res_alloc->res_free += dev->caps.reserved_mtts;
+ res_alloc->guaranteed[vf] += dev->caps.reserved_mtts;
+ res_alloc->quota[vf] += dev->caps.reserved_mtts;
+ }
+ }
+}
+
+void mlx4_init_quotas(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int pf;
+
+ /* quotas for VFs are initialized in mlx4_slave_cap */
+ if (mlx4_is_slave(dev))
+ return;
+
+ if (!mlx4_is_mfunc(dev)) {
+ dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps -
+ mlx4_num_reserved_sqps(dev);
+ dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs;
+ dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs;
+ dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts;
+ dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws;
+ return;
+ }
+
+ pf = mlx4_master_func_num(dev);
+ dev->quotas.qp =
+ priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf];
+ dev->quotas.cq =
+ priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf];
+ dev->quotas.srq =
+ priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf];
+ dev->quotas.mtt =
+ priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf];
+ dev->quotas.mpt =
+ priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
+}
+int mlx4_init_resource_tracker(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, j;
+ int t;
+
+ priv->mfunc.master.res_tracker.slave_list =
+ kzalloc(dev->num_slaves * sizeof(struct slave_list),
+ GFP_KERNEL);
+ if (!priv->mfunc.master.res_tracker.slave_list)
+ return -ENOMEM;
+
+ for (i = 0 ; i < dev->num_slaves; i++) {
+ for (t = 0; t < MLX4_NUM_OF_RESOURCE_TYPE; ++t)
+ INIT_LIST_HEAD(&priv->mfunc.master.res_tracker.
+ slave_list[i].res_list[t]);
+ mutex_init(&priv->mfunc.master.res_tracker.slave_list[i].mutex);
+ }
+
+ mlx4_dbg(dev, "Started init_resource_tracker: %ld slaves\n",
+ dev->num_slaves);
+ for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++)
+ priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT;
+
+ for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[i];
+ res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+ res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+ if (i == RES_MAC || i == RES_VLAN)
+ res_alloc->allocated = kzalloc(MLX4_MAX_PORTS *
+ (dev->num_vfs + 1) * sizeof(int),
+ GFP_KERNEL);
+ else
+ res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+
+ if (!res_alloc->quota || !res_alloc->guaranteed ||
+ !res_alloc->allocated)
+ goto no_mem_err;
+
+ spin_lock_init(&res_alloc->alloc_lock);
+ for (t = 0; t < dev->num_vfs + 1; t++) {
+ switch (i) {
+ case RES_QP:
+ initialize_res_quotas(dev, res_alloc, RES_QP,
+ t, dev->caps.num_qps -
+ dev->caps.reserved_qps -
+ mlx4_num_reserved_sqps(dev));
+ break;
+ case RES_CQ:
+ initialize_res_quotas(dev, res_alloc, RES_CQ,
+ t, dev->caps.num_cqs -
+ dev->caps.reserved_cqs);
+ break;
+ case RES_SRQ:
+ initialize_res_quotas(dev, res_alloc, RES_SRQ,
+ t, dev->caps.num_srqs -
+ dev->caps.reserved_srqs);
+ break;
+ case RES_MPT:
+ initialize_res_quotas(dev, res_alloc, RES_MPT,
+ t, dev->caps.num_mpts -
+ dev->caps.reserved_mrws);
+ break;
+ case RES_MTT:
+ initialize_res_quotas(dev, res_alloc, RES_MTT,
+ t, dev->caps.num_mtts -
+ dev->caps.reserved_mtts);
+ break;
+ case RES_MAC:
+ if (t == mlx4_master_func_num(dev)) {
+ res_alloc->quota[t] =
+ MLX4_MAX_MAC_NUM - 2 * dev->num_vfs;
+ res_alloc->guaranteed[t] = res_alloc->quota[t];
+ for (j = 0; j < MLX4_MAX_PORTS; j++)
+ res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM;
+ } else {
+ res_alloc->quota[t] = 2;
+ res_alloc->guaranteed[t] = 2;
+ }
+ break;
+ case RES_VLAN:
+ if (t == mlx4_master_func_num(dev)) {
+ res_alloc->quota[t] = MLX4_MAX_VLAN_NUM;
+ res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2;
+ for (j = 0; j < MLX4_MAX_PORTS; j++)
+ res_alloc->res_port_free[j] =
+ res_alloc->quota[t];
+ } else {
+ res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2;
+ res_alloc->guaranteed[t] = 0;
+ }
+ break;
+ case RES_COUNTER:
+ res_alloc->quota[t] = dev->caps.max_counters;
+ res_alloc->guaranteed[t] = 0;
+ if (t == mlx4_master_func_num(dev))
+ res_alloc->res_free = res_alloc->quota[t];
+ break;
+ default:
+ break;
+ }
+ if (i == RES_MAC || i == RES_VLAN) {
+ for (j = 0; j < MLX4_MAX_PORTS; j++)
+ res_alloc->res_port_rsvd[j] +=
+ res_alloc->guaranteed[t];
+ } else {
+ res_alloc->res_reserved += res_alloc->guaranteed[t];
+ }
+ }
+ }
+ spin_lock_init(&priv->mfunc.master.res_tracker.lock);
+ return 0;
+
+no_mem_err:
+ for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+ priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+ priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+ priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+ }
+ return -ENOMEM;
+}
+
+void mlx4_free_resource_tracker(struct mlx4_dev *dev,
+ enum mlx4_res_tracker_free_type type)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i;
+
+ if (priv->mfunc.master.res_tracker.slave_list) {
+ if (type != RES_TR_FREE_STRUCTS_ONLY) {
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (type == RES_TR_FREE_ALL ||
+ dev->caps.function != i)
+ mlx4_delete_all_resources_for_slave(dev, i);
+ }
+ /* free master's vlans */
+ i = dev->caps.function;
+ mutex_lock(&priv->mfunc.master.res_tracker.slave_list[i].mutex);
+ rem_slave_vlans(dev, i);
+ mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[i].mutex);
+ }
+
+ if (type != RES_TR_FREE_SLAVES_ONLY) {
+ for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+ priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+ priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+ priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+ }
+ kfree(priv->mfunc.master.res_tracker.slave_list);
+ priv->mfunc.master.res_tracker.slave_list = NULL;
+ }
+ }
+}
+
+static void update_pkey_index(struct mlx4_dev *dev, int slave,
+ struct mlx4_cmd_mailbox *inbox)
+{
+ u8 sched = *(u8 *)(inbox->buf + 64);
+ u8 orig_index = *(u8 *)(inbox->buf + 35);
+ u8 new_index;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port;
+
+ port = (sched >> 6 & 1) + 1;
+
+ new_index = priv->virt2phys_pkey[slave][port - 1][orig_index];
+ *(u8 *)(inbox->buf + 35) = new_index;
+}
+
+static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox,
+ u8 slave)
+{
+ struct mlx4_qp_context *qp_ctx = inbox->buf + 8;
+ enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf);
+ u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
+ int port;
+
+ if (MLX4_QP_ST_UD == ts) {
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port))
+ qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80;
+ else
+ qp_ctx->pri_path.mgid_index = 0x80 | slave;
+
+ } else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) {
+ if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port)) {
+ qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave);
+ qp_ctx->pri_path.mgid_index &= 0x7f;
+ } else {
+ qp_ctx->pri_path.mgid_index = slave & 0x7F;
+ }
+ }
+ if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
+ port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port)) {
+ qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave);
+ qp_ctx->alt_path.mgid_index &= 0x7f;
+ } else {
+ qp_ctx->alt_path.mgid_index = slave & 0x7F;
+ }
+ }
+ }
+}
+
+static int check_counter_index_validity(struct mlx4_dev *dev, int slave, int port, int idx)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct counter_index *counter, *tmp_counter;
+
+ if (slave == 0) {
+ list_for_each_entry_safe(counter, tmp_counter,
+ &priv->counters_table.global_port_list[port - 1],
+ list) {
+ if (counter->index == idx)
+ return 0;
+ }
+ return -EINVAL;
+ } else {
+ list_for_each_entry_safe(counter, tmp_counter,
+ &priv->counters_table.vf_list[slave - 1][port - 1],
+ list) {
+ if (counter->index == idx)
+ return 0;
+ }
+ return -EINVAL;
+ }
+}
+
+static int update_vport_qp_param(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *inbox,
+ u8 slave, u32 qpn)
+{
+ struct mlx4_qp_context *qpc = inbox->buf + 8;
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_priv *priv;
+ u32 qp_type;
+ int port;
+
+ port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1;
+ priv = mlx4_priv(dev);
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH &&
+ qpc->pri_path.counter_index != MLX4_SINK_COUNTER_INDEX) {
+ if (check_counter_index_validity(dev, slave, port,
+ qpc->pri_path.counter_index))
+ return -EINVAL;
+ }
+
+ mlx4_dbg(dev, "%s: QP counter_index %d for slave %d port %d\n",
+ __func__, qpc->pri_path.counter_index, slave, port);
+
+ if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) &&
+ dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH &&
+ !mlx4_is_qp_reserved(dev, qpn) &&
+ qp_type == MLX4_QP_ST_MLX &&
+ qpc->pri_path.counter_index != 0xFF) {
+ /* disable multicast loopback to qp with same counter */
+ qpc->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB;
+ qpc->pri_path.vlan_control |=
+ MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+ }
+
+ if (MLX4_VGT != vp_oper->state.default_vlan) {
+ /* the reserved QPs (special, proxy, tunnel)
+ * do not operate over vlans
+ */
+ if (mlx4_is_qp_reserved(dev, qpn))
+ return 0;
+
+ /* force strip vlan by clear vsd */
+ qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN);
+ /* preserve IF_COUNTER flag */
+ qpc->pri_path.vlan_control &=
+ MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+ if (MLX4_QP_ST_RC != qp_type) {
+ if (0 != vp_oper->state.default_vlan) {
+ qpc->pri_path.vlan_control |=
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+ } else { /* priority tagged */
+ qpc->pri_path.vlan_control |=
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
+ }
+ }
+ qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN;
+ qpc->pri_path.vlan_index = vp_oper->vlan_idx;
+ qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN;
+ qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
+ qpc->pri_path.sched_queue &= 0xC7;
+ qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3;
+ }
+ if (vp_oper->state.spoofchk) {
+ qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC;
+ qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx;
+ }
+ return 0;
+}
+
+static int mpt_mask(struct mlx4_dev *dev)
+{
+ return dev->caps.num_mpts - 1;
+}
+
+static void *find_res(struct mlx4_dev *dev, u64 res_id,
+ enum mlx4_resource type)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return res_tracker_lookup(&priv->mfunc.master.res_tracker.res_tree[type],
+ res_id);
+}
+
+static int get_res(struct mlx4_dev *dev, int slave, u64 res_id,
+ enum mlx4_resource type,
+ void *res)
+{
+ struct res_common *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = find_res(dev, res_id, type);
+ if (!r) {
+ err = -ENONET;
+ goto exit;
+ }
+
+ if (r->state == RES_ANY_BUSY) {
+ err = -EBUSY;
+ goto exit;
+ }
+
+ if (r->owner != slave) {
+ err = -EPERM;
+ goto exit;
+ }
+
+ r->from_state = r->state;
+ r->state = RES_ANY_BUSY;
+
+ if (res)
+ *((struct res_common **)res) = r;
+
+exit:
+ spin_unlock_irq(mlx4_tlock(dev));
+ return err;
+}
+
+int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
+ enum mlx4_resource type,
+ u64 res_id, int *slave)
+{
+
+ struct res_common *r;
+ int err = -ENOENT;
+ int id = res_id;
+
+ if (type == RES_QP)
+ id &= 0x7fffff;
+ spin_lock(mlx4_tlock(dev));
+
+ r = find_res(dev, id, type);
+ if (r) {
+ *slave = r->owner;
+ err = 0;
+ }
+ spin_unlock(mlx4_tlock(dev));
+
+ return err;
+}
+
+static void put_res(struct mlx4_dev *dev, int slave, u64 res_id,
+ enum mlx4_resource type)
+{
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = find_res(dev, res_id, type);
+ if (r)
+ r->state = r->from_state;
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static struct res_common *alloc_qp_tr(int id)
+{
+ struct res_qp *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_QP_RESERVED;
+ ret->local_qpn = id;
+ INIT_LIST_HEAD(&ret->mcg_list);
+ spin_lock_init(&ret->mcg_spl);
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_mtt_tr(int id, int order)
+{
+ struct res_mtt *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->order = order;
+ ret->com.state = RES_MTT_ALLOCATED;
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_mpt_tr(int id, int key)
+{
+ struct res_mpt *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_MPT_RESERVED;
+ ret->key = key;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_eq_tr(int id)
+{
+ struct res_eq *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_EQ_RESERVED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_cq_tr(int id)
+{
+ struct res_cq *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_CQ_ALLOCATED;
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_srq_tr(int id)
+{
+ struct res_srq *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_SRQ_ALLOCATED;
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_counter_tr(int id)
+{
+ struct res_counter *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_COUNTER_ALLOCATED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_xrcdn_tr(int id)
+{
+ struct res_xrcdn *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_XRCD_ALLOCATED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_fs_rule_tr(u64 id, int qpn)
+{
+ struct res_fs_rule *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_FS_RULE_ALLOCATED;
+ ret->qpn = qpn;
+ return &ret->com;
+}
+
+static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave,
+ int extra)
+{
+ struct res_common *ret;
+
+ switch (type) {
+ case RES_QP:
+ ret = alloc_qp_tr(id);
+ break;
+ case RES_MPT:
+ ret = alloc_mpt_tr(id, extra);
+ break;
+ case RES_MTT:
+ ret = alloc_mtt_tr(id, extra);
+ break;
+ case RES_EQ:
+ ret = alloc_eq_tr(id);
+ break;
+ case RES_CQ:
+ ret = alloc_cq_tr(id);
+ break;
+ case RES_SRQ:
+ ret = alloc_srq_tr(id);
+ break;
+ case RES_MAC:
+ printk(KERN_ERR "implementation missing\n");
+ return NULL;
+ case RES_COUNTER:
+ ret = alloc_counter_tr(id);
+ break;
+ case RES_XRCD:
+ ret = alloc_xrcdn_tr(id);
+ break;
+ case RES_FS_RULE:
+ ret = alloc_fs_rule_tr(id, extra);
+ break;
+ default:
+ return NULL;
+ }
+ if (ret)
+ ret->owner = slave;
+
+ return ret;
+}
+
+static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count,
+ enum mlx4_resource type, int extra)
+{
+ int i;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct res_common **res_arr;
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct rb_root *root = &tracker->res_tree[type];
+
+ res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL);
+ if (!res_arr)
+ return -ENOMEM;
+
+ for (i = 0; i < count; ++i) {
+ res_arr[i] = alloc_tr(base + i, type, slave, extra);
+ if (!res_arr[i]) {
+ for (--i; i >= 0; --i)
+ kfree(res_arr[i]);
+
+ kfree(res_arr);
+ return -ENOMEM;
+ }
+ }
+
+ spin_lock_irq(mlx4_tlock(dev));
+ for (i = 0; i < count; ++i) {
+ if (find_res(dev, base + i, type)) {
+ err = -EEXIST;
+ goto undo;
+ }
+ err = res_tracker_insert(root, res_arr[i]);
+ if (err)
+ goto undo;
+ list_add_tail(&res_arr[i]->list,
+ &tracker->slave_list[slave].res_list[type]);
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(res_arr);
+
+ return 0;
+
+undo:
+ for (--i; i >= 0; --i) {
+ rb_erase(&res_arr[i]->node, root);
+ list_del_init(&res_arr[i]->list);
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ for (i = 0; i < count; ++i)
+ kfree(res_arr[i]);
+
+ kfree(res_arr);
+
+ return err;
+}
+
+static int remove_qp_ok(struct res_qp *res)
+{
+ if (res->com.state == RES_QP_BUSY || atomic_read(&res->ref_count) ||
+ !list_empty(&res->mcg_list)) {
+ pr_err("resource tracker: fail to remove qp, state %d, ref_count %d\n",
+ res->com.state, atomic_read(&res->ref_count));
+ return -EBUSY;
+ } else if (res->com.state != RES_QP_RESERVED) {
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+static int remove_mtt_ok(struct res_mtt *res, int order)
+{
+ if (res->com.state == RES_MTT_BUSY ||
+ atomic_read(&res->ref_count)) {
+ printk(KERN_DEBUG "%s-%d: state %s, ref_count %d\n",
+ __func__, __LINE__,
+ mtt_states_str(res->com.state),
+ atomic_read(&res->ref_count));
+ return -EBUSY;
+ } else if (res->com.state != RES_MTT_ALLOCATED)
+ return -EPERM;
+ else if (res->order != order)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int remove_mpt_ok(struct res_mpt *res)
+{
+ if (res->com.state == RES_MPT_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_MPT_RESERVED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_eq_ok(struct res_eq *res)
+{
+ if (res->com.state == RES_MPT_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_MPT_RESERVED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_counter_ok(struct res_counter *res)
+{
+ if (res->com.state == RES_COUNTER_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_COUNTER_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_xrcdn_ok(struct res_xrcdn *res)
+{
+ if (res->com.state == RES_XRCD_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_XRCD_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_fs_rule_ok(struct res_fs_rule *res)
+{
+ if (res->com.state == RES_FS_RULE_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_FS_RULE_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_cq_ok(struct res_cq *res)
+{
+ if (res->com.state == RES_CQ_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_CQ_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_srq_ok(struct res_srq *res)
+{
+ if (res->com.state == RES_SRQ_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_SRQ_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra)
+{
+ switch (type) {
+ case RES_QP:
+ return remove_qp_ok((struct res_qp *)res);
+ case RES_CQ:
+ return remove_cq_ok((struct res_cq *)res);
+ case RES_SRQ:
+ return remove_srq_ok((struct res_srq *)res);
+ case RES_MPT:
+ return remove_mpt_ok((struct res_mpt *)res);
+ case RES_MTT:
+ return remove_mtt_ok((struct res_mtt *)res, extra);
+ case RES_MAC:
+ return -ENOSYS;
+ case RES_EQ:
+ return remove_eq_ok((struct res_eq *)res);
+ case RES_COUNTER:
+ return remove_counter_ok((struct res_counter *)res);
+ case RES_XRCD:
+ return remove_xrcdn_ok((struct res_xrcdn *)res);
+ case RES_FS_RULE:
+ return remove_fs_rule_ok((struct res_fs_rule *)res);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int rem_res_range(struct mlx4_dev *dev, int slave, u64 base, int count,
+ enum mlx4_resource type, int extra)
+{
+ u64 i;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ for (i = base; i < base + count; ++i) {
+ r = res_tracker_lookup(&tracker->res_tree[type], i);
+ if (!r) {
+ err = -ENOENT;
+ goto out;
+ }
+ if (r->owner != slave) {
+ err = -EPERM;
+ goto out;
+ }
+ err = remove_ok(r, type, extra);
+ if (err)
+ goto out;
+ }
+
+ for (i = base; i < base + count; ++i) {
+ r = res_tracker_lookup(&tracker->res_tree[type], i);
+ rb_erase(&r->node, &tracker->res_tree[type]);
+ list_del(&r->list);
+ kfree(r);
+ }
+ err = 0;
+
+out:
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn,
+ enum res_qp_states state, struct res_qp **qp,
+ int alloc)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_qp *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_QP], qpn);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_QP_BUSY:
+ mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n",
+ __func__, (unsigned long long)r->com.res_id);
+ err = -EBUSY;
+ break;
+
+ case RES_QP_RESERVED:
+ if (r->com.state == RES_QP_MAPPED && !alloc)
+ break;
+
+ mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", (unsigned long long)r->com.res_id);
+ err = -EINVAL;
+ break;
+
+ case RES_QP_MAPPED:
+ if ((r->com.state == RES_QP_RESERVED && alloc) ||
+ r->com.state == RES_QP_HW)
+ break;
+ else {
+ mlx4_dbg(dev, "failed RES_QP, 0x%llx\n",
+ (unsigned long long)r->com.res_id);
+ err = -EINVAL;
+ }
+
+ break;
+
+ case RES_QP_HW:
+ if (r->com.state != RES_QP_MAPPED)
+ err = -EINVAL;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_QP_BUSY;
+ if (qp)
+ *qp = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
+ enum res_mpt_states state, struct res_mpt **mpt)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_mpt *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_MPT], index);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_MPT_BUSY:
+ err = -EINVAL;
+ break;
+
+ case RES_MPT_RESERVED:
+ if (r->com.state != RES_MPT_MAPPED)
+ err = -EINVAL;
+ break;
+
+ case RES_MPT_MAPPED:
+ if (r->com.state != RES_MPT_RESERVED &&
+ r->com.state != RES_MPT_HW)
+ err = -EINVAL;
+ break;
+
+ case RES_MPT_HW:
+ if (r->com.state != RES_MPT_MAPPED)
+ err = -EINVAL;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_MPT_BUSY;
+ if (mpt)
+ *mpt = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
+ enum res_eq_states state, struct res_eq **eq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_eq *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_EQ], index);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_EQ_BUSY:
+ err = -EINVAL;
+ break;
+
+ case RES_EQ_RESERVED:
+ if (r->com.state != RES_EQ_HW)
+ err = -EINVAL;
+ break;
+
+ case RES_EQ_HW:
+ if (r->com.state != RES_EQ_RESERVED)
+ err = -EINVAL;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_EQ_BUSY;
+ if (eq)
+ *eq = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn,
+ enum res_cq_states state, struct res_cq **cq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_cq *r;
+ int err;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_CQ_BUSY:
+ err = -EBUSY;
+ break;
+
+ case RES_CQ_ALLOCATED:
+ if (r->com.state != RES_CQ_HW)
+ err = -EINVAL;
+ else if (atomic_read(&r->ref_count))
+ err = -EBUSY;
+ else
+ err = 0;
+ break;
+
+ case RES_CQ_HW:
+ if (r->com.state != RES_CQ_ALLOCATED)
+ err = -EINVAL;
+ else
+ err = 0;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_CQ_BUSY;
+ if (cq)
+ *cq = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
+ enum res_srq_states state, struct res_srq **srq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_srq *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_SRQ_BUSY:
+ err = -EINVAL;
+ break;
+
+ case RES_SRQ_ALLOCATED:
+ if (r->com.state != RES_SRQ_HW)
+ err = -EINVAL;
+ else if (atomic_read(&r->ref_count))
+ err = -EBUSY;
+ break;
+
+ case RES_SRQ_HW:
+ if (r->com.state != RES_SRQ_ALLOCATED)
+ err = -EINVAL;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_SRQ_BUSY;
+ if (srq)
+ *srq = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static void res_abort_move(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type, int id)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[type], id);
+ if (r && (r->owner == slave))
+ r->state = r->from_state;
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void res_end_move(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type, int id)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[type], id);
+ if (r && (r->owner == slave))
+ r->state = r->to_state;
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn)
+{
+ return mlx4_is_qp_reserved(dev, qpn) &&
+ (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn));
+}
+
+static int fw_reserved(struct mlx4_dev *dev, int qpn)
+{
+ return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+}
+
+static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err;
+ int count;
+ int align;
+ int base;
+ int qpn;
+ u8 flags;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ count = get_param_l(&in_param) & 0xffffff;
+ flags = get_param_l(&in_param) >> 24;
+ align = get_param_h(&in_param);
+ err = mlx4_grant_resource(dev, slave, RES_QP, count, 0);
+ if (err)
+ return err;
+
+ err = __mlx4_qp_reserve_range(dev, count, align, &base, flags);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ return err;
+ }
+
+ err = add_res_range(dev, slave, base, count, RES_QP, 0);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ __mlx4_qp_release_range(dev, base, count);
+ return err;
+ }
+ set_param_l(out_param, base);
+ break;
+ case RES_OP_MAP_ICM:
+ qpn = get_param_l(&in_param) & 0x7fffff;
+ if (valid_reserved(dev, slave, qpn)) {
+ err = add_res_range(dev, slave, qpn, 1, RES_QP, 0);
+ if (err)
+ return err;
+ }
+
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED,
+ NULL, 1);
+ if (err)
+ return err;
+
+ if (!fw_reserved(dev, qpn)) {
+ err = __mlx4_qp_alloc_icm(dev, qpn);
+ if (err) {
+ res_abort_move(dev, slave, RES_QP, qpn);
+ return err;
+ }
+ }
+
+ res_end_move(dev, slave, RES_QP, qpn);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+ return err;
+}
+
+static int mtt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err = -EINVAL;
+ int base;
+ int order;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ order = get_param_l(&in_param);
+
+ err = mlx4_grant_resource(dev, slave, RES_MTT, 1 << order, 0);
+ if (err)
+ return err;
+
+ base = __mlx4_alloc_mtt_range(dev, order);
+ if (base == -1) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
+ return -ENOMEM;
+ }
+
+ err = add_res_range(dev, slave, base, 1, RES_MTT, order);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
+ __mlx4_free_mtt_range(dev, base, order);
+ } else
+ set_param_l(out_param, base);
+
+ return err;
+}
+
+static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err = -EINVAL;
+ int index;
+ int id;
+ struct res_mpt *mpt;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ err = mlx4_grant_resource(dev, slave, RES_MPT, 1, 0);
+ if (err)
+ break;
+
+ index = __mlx4_mpt_reserve(dev);
+ if (index == -1) {
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ break;
+ }
+ id = index & mpt_mask(dev);
+
+ err = add_res_range(dev, slave, id, 1, RES_MPT, index);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ __mlx4_mpt_release(dev, index);
+ break;
+ }
+ set_param_l(out_param, index);
+ break;
+ case RES_OP_MAP_ICM:
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id,
+ RES_MPT_MAPPED, &mpt);
+ if (err)
+ return err;
+
+ err = __mlx4_mpt_alloc_icm(dev, mpt->key);
+ if (err) {
+ res_abort_move(dev, slave, RES_MPT, id);
+ return err;
+ }
+
+ res_end_move(dev, slave, RES_MPT, id);
+ break;
+ }
+ return err;
+}
+
+static int cq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int cqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ err = mlx4_grant_resource(dev, slave, RES_CQ, 1, 0);
+ if (err)
+ break;
+
+ err = __mlx4_cq_alloc_icm(dev, &cqn);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ break;
+ }
+
+ err = add_res_range(dev, slave, cqn, 1, RES_CQ, 0);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ __mlx4_cq_free_icm(dev, cqn);
+ break;
+ }
+
+ set_param_l(out_param, cqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int srq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int srqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ err = mlx4_grant_resource(dev, slave, RES_SRQ, 1, 0);
+ if (err)
+ break;
+
+ err = __mlx4_srq_alloc_icm(dev, &srqn);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ break;
+ }
+
+ err = add_res_range(dev, slave, srqn, 1, RES_SRQ, 0);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ __mlx4_srq_free_icm(dev, srqn);
+ break;
+ }
+
+ set_param_l(out_param, srqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int mac_find_smac_ix_in_slave(struct mlx4_dev *dev, int slave, int port,
+ u8 smac_index, u64 *mac)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ if (res->smac_index == smac_index && res->port == (u8) port) {
+ *mac = res->mac;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, u8 smac_index)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ if (res->mac == mac && res->port == (u8) port) {
+ /* mac found. update ref count */
+ ++res->ref_count;
+ return 0;
+ }
+ }
+
+ if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port))
+ return -EINVAL;
+ res = kzalloc(sizeof *res, GFP_KERNEL);
+ if (!res) {
+ mlx4_release_resource(dev, slave, RES_MAC, 1, port);
+ return -ENOMEM;
+ }
+ res->mac = mac;
+ res->port = (u8) port;
+ res->smac_index = smac_index;
+ res->ref_count = 1;
+ list_add_tail(&res->list,
+ &tracker->slave_list[slave].res_list[RES_MAC]);
+ return 0;
+}
+
+
+static void mac_del_from_slave(struct mlx4_dev *dev, int slave, u64 mac,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ if (res->mac == mac && res->port == (u8) port) {
+ if (!--res->ref_count) {
+ list_del(&res->list);
+ mlx4_release_resource(dev, slave, RES_MAC, 1, port);
+ kfree(res);
+ }
+ break;
+ }
+ }
+}
+
+static void rem_slave_macs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+ int i;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ list_del(&res->list);
+ /* dereference the mac the num times the slave referenced it */
+ for (i = 0; i < res->ref_count; i++)
+ __mlx4_unregister_mac(dev, res->port, res->mac);
+ mlx4_release_resource(dev, slave, RES_MAC, 1, res->port);
+ kfree(res);
+ }
+}
+
+static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int in_port)
+{
+ int err = -EINVAL;
+ int port;
+ u64 mac;
+ u8 smac_index = 0;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ port = !in_port ? get_param_l(out_param) : in_port;
+ mac = in_param;
+
+ err = __mlx4_register_mac(dev, port, mac);
+ if (err >= 0) {
+ smac_index = err;
+ set_param_l(out_param, err);
+ err = 0;
+ }
+
+ if (!err) {
+ err = mac_add_to_slave(dev, slave, mac, port, smac_index);
+ if (err)
+ __mlx4_unregister_mac(dev, port, mac);
+ }
+ return err;
+}
+
+static int vlan_add_to_slave(struct mlx4_dev *dev, int slave, u16 vlan,
+ int port, int vlan_index)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *vlan_list =
+ &tracker->slave_list[slave].res_list[RES_VLAN];
+ struct vlan_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, vlan_list, list) {
+ if (res->vlan == vlan && res->port == (u8) port) {
+ /* vlan found. update ref count */
+ ++res->ref_count;
+ return 0;
+ }
+ }
+
+ if (mlx4_grant_resource(dev, slave, RES_VLAN, 1, port))
+ return -EINVAL;
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ mlx4_release_resource(dev, slave, RES_VLAN, 1, port);
+ return -ENOMEM;
+ }
+ res->vlan = vlan;
+ res->port = (u8) port;
+ res->vlan_index = vlan_index;
+ res->ref_count = 1;
+ list_add_tail(&res->list,
+ &tracker->slave_list[slave].res_list[RES_VLAN]);
+ return 0;
+}
+
+
+static void vlan_del_from_slave(struct mlx4_dev *dev, int slave, u16 vlan,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *vlan_list =
+ &tracker->slave_list[slave].res_list[RES_VLAN];
+ struct vlan_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, vlan_list, list) {
+ if (res->vlan == vlan && res->port == (u8) port) {
+ if (!--res->ref_count) {
+ list_del(&res->list);
+ mlx4_release_resource(dev, slave, RES_VLAN,
+ 1, port);
+ kfree(res);
+ }
+ break;
+ }
+ }
+}
+
+static void rem_slave_vlans(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *vlan_list =
+ &tracker->slave_list[slave].res_list[RES_VLAN];
+ struct vlan_res *res, *tmp;
+ int i;
+
+ list_for_each_entry_safe(res, tmp, vlan_list, list) {
+ list_del(&res->list);
+ /* dereference the vlan the num times the slave referenced it */
+ for (i = 0; i < res->ref_count; i++)
+ __mlx4_unregister_vlan(dev, res->port, res->vlan);
+ mlx4_release_resource(dev, slave, RES_VLAN, 1, res->port);
+ kfree(res);
+ }
+}
+
+static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int in_port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ int err = -EINVAL;
+ u16 vlan;
+ int vlan_index;
+ int port;
+
+ port = !in_port ? get_param_l(out_param) : in_port;
+
+ if (!port)
+ return err;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ /* upstream kernels had NOP for reg/unreg vlan. Continue this. */
+ if (!in_port && port > 0 && port <= dev->caps.num_ports) {
+ slave_state[slave].old_vlan_api = true;
+ return 0;
+ }
+
+ vlan = (u16) in_param;
+
+ err = __mlx4_register_vlan(dev, port, vlan, &vlan_index);
+ if (!err) {
+ set_param_l(out_param, (u32) vlan_index);
+ err = vlan_add_to_slave(dev, slave, vlan, port, vlan_index);
+ if (err)
+ __mlx4_unregister_vlan(dev, port, vlan);
+ }
+ return err;
+}
+
+static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int port)
+{
+ u32 index;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ err = __mlx4_counter_alloc(dev, slave, port, &index);
+ if (!err)
+ set_param_l(out_param, index);
+
+ return err;
+}
+
+static int xrcdn_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ u32 xrcdn;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ err = __mlx4_xrcd_alloc(dev, &xrcdn);
+ if (err)
+ return err;
+
+ err = add_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0);
+ if (err)
+ __mlx4_xrcd_free(dev, xrcdn);
+ else
+ set_param_l(out_param, xrcdn);
+
+ return err;
+}
+
+int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int alop = vhcr->op_modifier;
+
+ switch (vhcr->in_modifier & 0xFF) {
+ case RES_QP:
+ err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MTT:
+ err = mtt_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MPT:
+ err = mpt_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_CQ:
+ err = cq_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_SRQ:
+ err = srq_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MAC:
+ err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_VLAN:
+ err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_COUNTER:
+ err = counter_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_XRCD:
+ err = xrcdn_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param)
+{
+ int err;
+ int count;
+ int base;
+ int qpn;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ base = get_param_l(&in_param) & 0x7fffff;
+ count = get_param_h(&in_param);
+ err = rem_res_range(dev, slave, base, count, RES_QP, 0);
+ if (err)
+ break;
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ __mlx4_qp_release_range(dev, base, count);
+ break;
+ case RES_OP_MAP_ICM:
+ qpn = get_param_l(&in_param) & 0x7fffff;
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_RESERVED,
+ NULL, 0);
+ if (err)
+ return err;
+
+ if (!fw_reserved(dev, qpn))
+ __mlx4_qp_free_icm(dev, qpn);
+
+ res_end_move(dev, slave, RES_QP, qpn);
+
+ if (valid_reserved(dev, slave, qpn))
+ err = rem_res_range(dev, slave, qpn, 1, RES_QP, 0);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ return err;
+}
+
+static int mtt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err = -EINVAL;
+ int base;
+ int order;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ base = get_param_l(&in_param);
+ order = get_param_h(&in_param);
+ err = rem_res_range(dev, slave, base, 1, RES_MTT, order);
+ if (!err) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
+ __mlx4_free_mtt_range(dev, base, order);
+ }
+ return err;
+}
+
+static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param)
+{
+ int err = -EINVAL;
+ int index;
+ int id;
+ struct res_mpt *mpt;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = get_res(dev, slave, id, RES_MPT, &mpt);
+ if (err)
+ break;
+ index = mpt->key;
+ put_res(dev, slave, id, RES_MPT);
+
+ err = rem_res_range(dev, slave, id, 1, RES_MPT, 0);
+ if (err)
+ break;
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ __mlx4_mpt_release(dev, index);
+ break;
+ case RES_OP_MAP_ICM:
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id,
+ RES_MPT_RESERVED, &mpt);
+ if (err)
+ return err;
+
+ __mlx4_mpt_free_icm(dev, mpt->key);
+ res_end_move(dev, slave, RES_MPT, id);
+ return err;
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ return err;
+}
+
+static int cq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int cqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ cqn = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, cqn, 1, RES_CQ, 0);
+ if (err)
+ break;
+
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ __mlx4_cq_free_icm(dev, cqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int srqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ srqn = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, srqn, 1, RES_SRQ, 0);
+ if (err)
+ break;
+
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ __mlx4_srq_free_icm(dev, srqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int in_port)
+{
+ int port;
+ int err = 0;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ port = !in_port ? get_param_l(out_param) : in_port;
+ mac_del_from_slave(dev, slave, in_param, port);
+ __mlx4_unregister_mac(dev, port, in_param);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+
+}
+
+static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ int err = 0;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ if (slave_state[slave].old_vlan_api == true)
+ return 0;
+ if (!port)
+ return -EINVAL;
+ vlan_del_from_slave(dev, slave, in_param, port);
+ __mlx4_unregister_vlan(dev, port, in_param);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int port)
+{
+ int index;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ index = get_param_l(&in_param);
+
+ __mlx4_counter_free(dev, slave, port, index);
+
+ return 0;
+}
+
+static int xrcdn_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int xrcdn;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ xrcdn = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0);
+ if (err)
+ return err;
+
+ __mlx4_xrcd_free(dev, xrcdn);
+
+ return err;
+}
+
+int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err = -EINVAL;
+ int alop = vhcr->op_modifier;
+
+ switch (vhcr->in_modifier & 0xFF) {
+ case RES_QP:
+ err = qp_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param);
+ break;
+
+ case RES_MTT:
+ err = mtt_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MPT:
+ err = mpt_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param);
+ break;
+
+ case RES_CQ:
+ err = cq_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_SRQ:
+ err = srq_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MAC:
+ err = mac_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_VLAN:
+ err = vlan_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_COUNTER:
+ err = counter_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_XRCD:
+ err = xrcdn_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+
+ default:
+ break;
+ }
+ return err;
+}
+
+/* ugly but other choices are uglier */
+static int mr_phys_mpt(struct mlx4_mpt_entry *mpt)
+{
+ return (be32_to_cpu(mpt->flags) >> 9) & 1;
+}
+
+static int mr_get_mtt_addr(struct mlx4_mpt_entry *mpt)
+{
+ return (int)be64_to_cpu(mpt->mtt_addr) & 0xfffffff8;
+}
+
+static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->mtt_sz);
+}
+
+static u32 mr_get_pd(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->pd_flags) & 0x00ffffff;
+}
+
+static int mr_is_fmr(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->pd_flags) & MLX4_MPT_PD_FLAG_FAST_REG;
+}
+
+static int mr_is_bind_enabled(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_BIND_ENABLE;
+}
+
+static int mr_is_region(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_REGION;
+}
+
+static int qp_get_mtt_addr(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int srq_get_mtt_addr(struct mlx4_srq_context *srqc)
+{
+ return be32_to_cpu(srqc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int qp_get_mtt_size(struct mlx4_qp_context *qpc)
+{
+ int page_shift = (qpc->log_page_size & 0x3f) + 12;
+ int log_sq_size = (qpc->sq_size_stride >> 3) & 0xf;
+ int log_sq_sride = qpc->sq_size_stride & 7;
+ int log_rq_size = (qpc->rq_size_stride >> 3) & 0xf;
+ int log_rq_stride = qpc->rq_size_stride & 7;
+ int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1;
+ int rss = (be32_to_cpu(qpc->flags) >> 13) & 1;
+ u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+ int xrc = (ts == MLX4_QP_ST_XRC) ? 1 : 0;
+ int sq_size;
+ int rq_size;
+ int total_pages;
+ int total_mem;
+ int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f;
+
+ sq_size = 1 << (log_sq_size + log_sq_sride + 4);
+ rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4));
+ total_mem = sq_size + rq_size;
+ total_pages =
+ roundup_pow_of_two((total_mem + (page_offset << 6)) >>
+ page_shift);
+
+ return total_pages;
+}
+
+static int check_mtt_range(struct mlx4_dev *dev, int slave, int start,
+ int size, struct res_mtt *mtt)
+{
+ int res_start = mtt->com.res_id;
+ int res_size = (1 << mtt->order);
+
+ if (start < res_start || start + size > res_start + res_size)
+ return -EPERM;
+ return 0;
+}
+
+int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier;
+ struct res_mtt *mtt;
+ struct res_mpt *mpt;
+ int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz;
+ int phys;
+ int id;
+ u32 pd;
+ int pd_slave;
+
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id, RES_MPT_HW, &mpt);
+ if (err)
+ return err;
+
+ /* Currently disable memory windows since this feature isn't tested yet
+ * under virtualization.
+ */
+ if (!mr_is_region(inbox->buf)) {
+ err = -ENOSYS;
+ goto ex_abort;
+ }
+
+ /* Make sure that the PD bits related to the slave id are zeros. */
+ pd = mr_get_pd(inbox->buf);
+ pd_slave = (pd >> 17) & 0x7f;
+ if (pd_slave != 0 && pd_slave != slave) {
+ err = -EPERM;
+ goto ex_abort;
+ }
+
+ if (mr_is_fmr(inbox->buf)) {
+ /* FMR and Bind Enable are forbidden in slave devices. */
+ if (mr_is_bind_enabled(inbox->buf)) {
+ err = -EPERM;
+ goto ex_abort;
+ }
+ /* FMR and Memory Windows are also forbidden. */
+ if (!mr_is_region(inbox->buf)) {
+ err = -EPERM;
+ goto ex_abort;
+ }
+ }
+
+ phys = mr_phys_mpt(inbox->buf);
+ if (!phys) {
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_abort;
+
+ err = check_mtt_range(dev, slave, mtt_base,
+ mr_get_mtt_size(inbox->buf), mtt);
+ if (err)
+ goto ex_put;
+
+ mpt->mtt = mtt;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put;
+
+ if (!phys) {
+ atomic_inc(&mtt->ref_count);
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ }
+
+ res_end_move(dev, slave, RES_MPT, id);
+ return 0;
+
+ex_put:
+ if (!phys)
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_MPT, id);
+
+ return err;
+}
+
+int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier;
+ struct res_mpt *mpt;
+ int id;
+
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id, RES_MPT_MAPPED, &mpt);
+ if (err)
+ return err;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_abort;
+
+ if (mpt->mtt)
+ atomic_dec(&mpt->mtt->ref_count);
+
+ res_end_move(dev, slave, RES_MPT, id);
+ return 0;
+
+ex_abort:
+ res_abort_move(dev, slave, RES_MPT, id);
+
+ return err;
+}
+
+int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier;
+ struct res_mpt *mpt;
+ int id;
+
+ id = index & mpt_mask(dev);
+ err = get_res(dev, slave, id, RES_MPT, &mpt);
+ if (err)
+ return err;
+
+ if (mpt->com.from_state != RES_MPT_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+
+out:
+ put_res(dev, slave, id, RES_MPT);
+ return err;
+}
+
+static int qp_get_rcqn(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->cqn_recv) & 0xffffff;
+}
+
+static int qp_get_scqn(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->cqn_send) & 0xffffff;
+}
+
+static u32 qp_get_srqn(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->srqn) & 0x1ffffff;
+}
+
+static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr,
+ struct mlx4_qp_context *context)
+{
+ u32 qpn = vhcr->in_modifier & 0xffffff;
+ u32 qkey = 0;
+
+ if (mlx4_get_parav_qkey(dev, qpn, &qkey))
+ return;
+
+ /* adjust qkey in qp context */
+ context->qkey = cpu_to_be32(qkey);
+}
+
+int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_mtt *mtt;
+ struct res_qp *qp;
+ struct mlx4_qp_context *qpc = inbox->buf + 8;
+ int mtt_base = qp_get_mtt_addr(qpc) / dev->caps.mtt_entry_sz;
+ int mtt_size = qp_get_mtt_size(qpc);
+ struct res_cq *rcq;
+ struct res_cq *scq;
+ int rcqn = qp_get_rcqn(qpc);
+ int scqn = qp_get_scqn(qpc);
+ u32 srqn = qp_get_srqn(qpc) & 0xffffff;
+ int use_srq = (qp_get_srqn(qpc) >> 24) & 1;
+ struct res_srq *srq;
+ int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff;
+
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_HW, &qp, 0);
+ if (err)
+ return err;
+ qp->local_qpn = local_qpn;
+ qp->sched_queue = 0;
+ qp->param3 = 0;
+ qp->vlan_control = 0;
+ qp->fvl_rx = 0;
+ qp->pri_path_fl = 0;
+ qp->vlan_index = 0;
+ qp->feup = 0;
+ qp->qpc_flags = be32_to_cpu(qpc->flags);
+
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_abort;
+
+ err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt);
+ if (err)
+ goto ex_put_mtt;
+
+ err = get_res(dev, slave, rcqn, RES_CQ, &rcq);
+ if (err)
+ goto ex_put_mtt;
+
+ if (scqn != rcqn) {
+ err = get_res(dev, slave, scqn, RES_CQ, &scq);
+ if (err)
+ goto ex_put_rcq;
+ } else
+ scq = rcq;
+
+ if (use_srq) {
+ err = get_res(dev, slave, srqn, RES_SRQ, &srq);
+ if (err)
+ goto ex_put_scq;
+ }
+
+ adjust_proxy_tun_qkey(dev, vhcr, qpc);
+ update_pkey_index(dev, slave, inbox);
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put_srq;
+ atomic_inc(&mtt->ref_count);
+ qp->mtt = mtt;
+ atomic_inc(&rcq->ref_count);
+ qp->rcq = rcq;
+ atomic_inc(&scq->ref_count);
+ qp->scq = scq;
+
+ if (scqn != rcqn)
+ put_res(dev, slave, scqn, RES_CQ);
+
+ if (use_srq) {
+ atomic_inc(&srq->ref_count);
+ put_res(dev, slave, srqn, RES_SRQ);
+ qp->srq = srq;
+ }
+ put_res(dev, slave, rcqn, RES_CQ);
+ put_res(dev, slave, mtt_base, RES_MTT);
+ res_end_move(dev, slave, RES_QP, qpn);
+
+ return 0;
+
+ex_put_srq:
+ if (use_srq)
+ put_res(dev, slave, srqn, RES_SRQ);
+ex_put_scq:
+ if (scqn != rcqn)
+ put_res(dev, slave, scqn, RES_CQ);
+ex_put_rcq:
+ put_res(dev, slave, rcqn, RES_CQ);
+ex_put_mtt:
+ put_res(dev, slave, mtt_base, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_QP, qpn);
+
+ return err;
+}
+
+static int eq_get_mtt_addr(struct mlx4_eq_context *eqc)
+{
+ return be32_to_cpu(eqc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int eq_get_mtt_size(struct mlx4_eq_context *eqc)
+{
+ int log_eq_size = eqc->log_eq_size & 0x1f;
+ int page_shift = (eqc->log_page_size & 0x3f) + 12;
+
+ if (log_eq_size + 5 < page_shift)
+ return 1;
+
+ return 1 << (log_eq_size + 5 - page_shift);
+}
+
+static int cq_get_mtt_addr(struct mlx4_cq_context *cqc)
+{
+ return be32_to_cpu(cqc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int cq_get_mtt_size(struct mlx4_cq_context *cqc)
+{
+ int log_cq_size = (be32_to_cpu(cqc->logsize_usrpage) >> 24) & 0x1f;
+ int page_shift = (cqc->log_page_size & 0x3f) + 12;
+
+ if (log_cq_size + 5 < page_shift)
+ return 1;
+
+ return 1 << (log_cq_size + 5 - page_shift);
+}
+
+int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int eqn = vhcr->in_modifier;
+ int res_id = (slave << 8) | eqn;
+ struct mlx4_eq_context *eqc = inbox->buf;
+ int mtt_base = eq_get_mtt_addr(eqc) / dev->caps.mtt_entry_sz;
+ int mtt_size = eq_get_mtt_size(eqc);
+ struct res_eq *eq;
+ struct res_mtt *mtt;
+
+ err = add_res_range(dev, slave, res_id, 1, RES_EQ, 0);
+ if (err)
+ return err;
+ err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_HW, &eq);
+ if (err)
+ goto out_add;
+
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto out_move;
+
+ err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt);
+ if (err)
+ goto out_put;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto out_put;
+
+ atomic_inc(&mtt->ref_count);
+ eq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_EQ, res_id);
+ return 0;
+
+out_put:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+out_move:
+ res_abort_move(dev, slave, RES_EQ, res_id);
+out_add:
+ rem_res_range(dev, slave, res_id, 1, RES_EQ, 0);
+ return err;
+}
+
+static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start,
+ int len, struct res_mtt **res)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_mtt *mtt;
+ int err = -EINVAL;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry(mtt, &tracker->slave_list[slave].res_list[RES_MTT],
+ com.list) {
+ if (!check_mtt_range(dev, slave, start, len, mtt)) {
+ *res = mtt;
+ mtt->com.from_state = mtt->com.state;
+ mtt->com.state = RES_MTT_BUSY;
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int verify_qp_parameters(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *inbox,
+ enum qp_transition transition, u8 slave)
+{
+ u32 qp_type;
+ struct mlx4_qp_context *qp_ctx;
+ enum mlx4_qp_optpar optpar;
+ int port;
+ int num_gids;
+
+ qp_ctx = inbox->buf + 8;
+ qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
+ optpar = be32_to_cpu(*(__be32 *) inbox->buf);
+
+ switch (qp_type) {
+ case MLX4_QP_ST_RC:
+ case MLX4_QP_ST_UC:
+ switch (transition) {
+ case QP_TRANS_INIT2RTR:
+ case QP_TRANS_RTR2RTS:
+ case QP_TRANS_RTS2RTS:
+ case QP_TRANS_SQD2SQD:
+ case QP_TRANS_SQD2RTS:
+ if (slave != mlx4_master_func_num(dev))
+ if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
+ num_gids = mlx4_get_slave_num_gids(dev, slave);
+ else
+ num_gids = 1;
+ if (qp_ctx->pri_path.mgid_index >= num_gids)
+ return -EINVAL;
+ }
+ if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
+ port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
+ num_gids = mlx4_get_slave_num_gids(dev, slave);
+ else
+ num_gids = 1;
+ if (qp_ctx->alt_path.mgid_index >= num_gids)
+ return -EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_mtt mtt;
+ __be64 *page_list = inbox->buf;
+ u64 *pg_list = (u64 *)page_list;
+ int i;
+ struct res_mtt *rmtt = NULL;
+ int start = be64_to_cpu(page_list[0]);
+ int npages = vhcr->in_modifier;
+ int err;
+
+ err = get_containing_mtt(dev, slave, start, npages, &rmtt);
+ if (err)
+ return err;
+
+ /* Call the SW implementation of write_mtt:
+ * - Prepare a dummy mtt struct
+ * - Translate inbox contents to simple addresses in host endianess */
+ mtt.offset = 0; /* TBD this is broken but I don't handle it since
+ we don't really use it */
+ mtt.order = 0;
+ mtt.page_shift = 0;
+ for (i = 0; i < npages; ++i)
+ pg_list[i + 2] = (be64_to_cpu(page_list[i + 2]) & ~1ULL);
+
+ err = __mlx4_write_mtt(dev, &mtt, be64_to_cpu(page_list[0]), npages,
+ ((u64 *)page_list + 2));
+
+ if (rmtt)
+ put_res(dev, slave, rmtt->com.res_id, RES_MTT);
+
+ return err;
+}
+
+int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int eqn = vhcr->in_modifier;
+ int res_id = eqn | (slave << 8);
+ struct res_eq *eq;
+ int err;
+
+ err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_RESERVED, &eq);
+ if (err)
+ return err;
+
+ err = get_res(dev, slave, eq->mtt->com.res_id, RES_MTT, NULL);
+ if (err)
+ goto ex_abort;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put;
+
+ atomic_dec(&eq->mtt->ref_count);
+ put_res(dev, slave, eq->mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_EQ, res_id);
+ rem_res_range(dev, slave, res_id, 1, RES_EQ, 0);
+
+ return 0;
+
+ex_put:
+ put_res(dev, slave, eq->mtt->com.res_id, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_EQ, res_id);
+
+ return err;
+}
+
+int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_event_eq_info *event_eq;
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 in_modifier = 0;
+ int err;
+ int res_id;
+ struct res_eq *req;
+
+ if (!priv->mfunc.master.slave_state)
+ return -EINVAL;
+
+ /* check for slave valid, slave not PF, and slave active */
+ if (slave < 0 || slave >= dev->num_slaves ||
+ slave == dev->caps.function ||
+ !priv->mfunc.master.slave_state[slave].active)
+ return 0;
+
+ event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type];
+
+ /* Create the event only if the slave is registered */
+ if (event_eq->eqn < 0)
+ return 0;
+
+ mutex_lock(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ res_id = (slave << 8) | event_eq->eqn;
+ err = get_res(dev, slave, res_id, RES_EQ, &req);
+ if (err)
+ goto unlock;
+
+ if (req->com.from_state != RES_EQ_HW) {
+ err = -EINVAL;
+ goto put;
+ }
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto put;
+ }
+
+ if (eqe->type == MLX4_EVENT_TYPE_CMD) {
+ ++event_eq->token;
+ eqe->event.cmd.token = cpu_to_be16(event_eq->token);
+ }
+
+ memcpy(mailbox->buf, (u8 *) eqe, 28);
+
+ in_modifier = (slave & 0xff) | ((event_eq->eqn & 0xff) << 16);
+
+ err = mlx4_cmd(dev, mailbox->dma, in_modifier, 0,
+ MLX4_CMD_GEN_EQE, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ put_res(dev, slave, res_id, RES_EQ);
+ mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+
+put:
+ put_res(dev, slave, res_id, RES_EQ);
+
+unlock:
+ mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ return err;
+}
+
+int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int eqn = vhcr->in_modifier;
+ int res_id = eqn | (slave << 8);
+ struct res_eq *eq;
+ int err;
+
+ err = get_res(dev, slave, res_id, RES_EQ, &eq);
+ if (err)
+ return err;
+
+ if (eq->com.from_state != RES_EQ_HW) {
+ err = -EINVAL;
+ goto ex_put;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+
+ex_put:
+ put_res(dev, slave, res_id, RES_EQ);
+ return err;
+}
+
+int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int cqn = vhcr->in_modifier;
+ struct mlx4_cq_context *cqc = inbox->buf;
+ int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
+ struct res_cq *cq;
+ struct res_mtt *mtt;
+
+ err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq);
+ if (err)
+ return err;
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto out_move;
+ err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt);
+ if (err)
+ goto out_put;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto out_put;
+ atomic_inc(&mtt->ref_count);
+ cq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_CQ, cqn);
+ return 0;
+
+out_put:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+out_move:
+ res_abort_move(dev, slave, RES_CQ, cqn);
+ return err;
+}
+
+int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int cqn = vhcr->in_modifier;
+ struct res_cq *cq;
+
+ err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_ALLOCATED, &cq);
+ if (err)
+ return err;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto out_move;
+ atomic_dec(&cq->mtt->ref_count);
+ res_end_move(dev, slave, RES_CQ, cqn);
+ return 0;
+
+out_move:
+ res_abort_move(dev, slave, RES_CQ, cqn);
+ return err;
+}
+
+int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int cqn = vhcr->in_modifier;
+ struct res_cq *cq;
+ int err;
+
+ err = get_res(dev, slave, cqn, RES_CQ, &cq);
+ if (err)
+ return err;
+
+ if (cq->com.from_state != RES_CQ_HW)
+ goto ex_put;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ex_put:
+ put_res(dev, slave, cqn, RES_CQ);
+
+ return err;
+}
+
+static int handle_resize(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd,
+ struct res_cq *cq)
+{
+ int err;
+ struct res_mtt *orig_mtt;
+ struct res_mtt *mtt;
+ struct mlx4_cq_context *cqc = inbox->buf;
+ int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
+
+ err = get_res(dev, slave, cq->mtt->com.res_id, RES_MTT, &orig_mtt);
+ if (err)
+ return err;
+
+ if (orig_mtt != cq->mtt) {
+ err = -EINVAL;
+ goto ex_put;
+ }
+
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_put;
+
+ err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt);
+ if (err)
+ goto ex_put1;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put1;
+ atomic_dec(&orig_mtt->ref_count);
+ put_res(dev, slave, orig_mtt->com.res_id, RES_MTT);
+ atomic_inc(&mtt->ref_count);
+ cq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ return 0;
+
+ex_put1:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ex_put:
+ put_res(dev, slave, orig_mtt->com.res_id, RES_MTT);
+
+ return err;
+
+}
+
+int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int cqn = vhcr->in_modifier;
+ struct res_cq *cq;
+ int err;
+
+ err = get_res(dev, slave, cqn, RES_CQ, &cq);
+ if (err)
+ return err;
+
+ if (cq->com.from_state != RES_CQ_HW)
+ goto ex_put;
+
+ if (vhcr->op_modifier == 0) {
+ err = handle_resize(dev, slave, vhcr, inbox, outbox, cmd, cq);
+ goto ex_put;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ex_put:
+ put_res(dev, slave, cqn, RES_CQ);
+
+ return err;
+}
+
+static int srq_get_mtt_size(struct mlx4_srq_context *srqc)
+{
+ int log_srq_size = (be32_to_cpu(srqc->state_logsize_srqn) >> 24) & 0xf;
+ int log_rq_stride = srqc->logstride & 7;
+ int page_shift = (srqc->log_page_size & 0x3f) + 12;
+
+ if (log_srq_size + log_rq_stride + 4 < page_shift)
+ return 1;
+
+ return 1 << (log_srq_size + log_rq_stride + 4 - page_shift);
+}
+
+int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_mtt *mtt;
+ struct res_srq *srq;
+ struct mlx4_srq_context *srqc = inbox->buf;
+ int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz;
+
+ if (srqn != (be32_to_cpu(srqc->state_logsize_srqn) & 0xffffff))
+ return -EINVAL;
+
+ err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_HW, &srq);
+ if (err)
+ return err;
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_abort;
+ err = check_mtt_range(dev, slave, mtt_base, srq_get_mtt_size(srqc),
+ mtt);
+ if (err)
+ goto ex_put_mtt;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put_mtt;
+
+ atomic_inc(&mtt->ref_count);
+ srq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_SRQ, srqn);
+ return 0;
+
+ex_put_mtt:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_SRQ, srqn);
+
+ return err;
+}
+
+int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_srq *srq;
+
+ err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_ALLOCATED, &srq);
+ if (err)
+ return err;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_abort;
+ atomic_dec(&srq->mtt->ref_count);
+ if (srq->cq)
+ atomic_dec(&srq->cq->ref_count);
+ res_end_move(dev, slave, RES_SRQ, srqn);
+
+ return 0;
+
+ex_abort:
+ res_abort_move(dev, slave, RES_SRQ, srqn);
+
+ return err;
+}
+
+int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_srq *srq;
+
+ err = get_res(dev, slave, srqn, RES_SRQ, &srq);
+ if (err)
+ return err;
+ if (srq->com.from_state != RES_SRQ_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ put_res(dev, slave, srqn, RES_SRQ);
+ return err;
+}
+
+int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_srq *srq;
+
+ err = get_res(dev, slave, srqn, RES_SRQ, &srq);
+ if (err)
+ return err;
+
+ if (srq->com.from_state != RES_SRQ_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ put_res(dev, slave, srqn, RES_SRQ);
+ return err;
+}
+
+int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_qp *qp;
+
+ err = get_res(dev, slave, qpn, RES_QP, &qp);
+ if (err)
+ return err;
+ if (qp->com.from_state != RES_QP_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_qp_context *context = inbox->buf + 8;
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ update_pkey_index(dev, slave, inbox);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+static int roce_verify_mac(struct mlx4_dev *dev, int slave,
+ struct mlx4_qp_context *qpc,
+ struct mlx4_cmd_mailbox *inbox)
+{
+ u64 mac;
+ int port;
+ u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+ u8 sched = *(u8 *)(inbox->buf + 64);
+ u8 smac_ix;
+
+ port = (sched >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port) && (ts != MLX4_QP_ST_MLX)) {
+ smac_ix = qpc->pri_path.grh_mylmc & 0x7f;
+ if (mac_find_smac_ix_in_slave(dev, slave, port, smac_ix, &mac))
+ return -ENOENT;
+ }
+ return 0;
+}
+
+int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *qpc = inbox->buf + 8;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_qp *qp;
+ u8 orig_sched_queue;
+ __be32 orig_param3 = qpc->param3;
+ u8 orig_vlan_control = qpc->pri_path.vlan_control;
+ u8 orig_fvl_rx = qpc->pri_path.fvl_rx;
+ u8 orig_pri_path_fl = qpc->pri_path.fl;
+ u8 orig_vlan_index = qpc->pri_path.vlan_index;
+ u8 orig_feup = qpc->pri_path.feup;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave);
+ if (err)
+ return err;
+
+ if (roce_verify_mac(dev, slave, qpc, inbox))
+ return -EINVAL;
+
+ update_pkey_index(dev, slave, inbox);
+ update_gid(dev, inbox, (u8)slave);
+ adjust_proxy_tun_qkey(dev, vhcr, qpc);
+ orig_sched_queue = qpc->pri_path.sched_queue;
+
+ err = get_res(dev, slave, qpn, RES_QP, &qp);
+ if (err)
+ return err;
+ if (qp->com.from_state != RES_QP_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ /* do not modify vport QP params for RSS QPs */
+ if (!(qp->qpc_flags & (1 << MLX4_RSS_QPC_FLAG_OFFSET))) {
+ err = update_vport_qp_param(dev, inbox, slave, qpn);
+ if (err)
+ goto out;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ /* if no error, save sched queue value passed in by VF. This is
+ * essentially the QOS value provided by the VF. This will be useful
+ * if we allow dynamic changes from VST back to VGT
+ */
+ if (!err) {
+ qp->sched_queue = orig_sched_queue;
+ qp->param3 = orig_param3;
+ qp->vlan_control = orig_vlan_control;
+ qp->fvl_rx = orig_fvl_rx;
+ qp->pri_path_fl = orig_pri_path_fl;
+ qp->vlan_index = orig_vlan_index;
+ qp->feup = orig_feup;
+ }
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave);
+ if (err)
+ return err;
+
+ update_pkey_index(dev, slave, inbox);
+ update_gid(dev, inbox, (u8)slave);
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave);
+ if (err)
+ return err;
+
+ update_pkey_index(dev, slave, inbox);
+ update_gid(dev, inbox, (u8)slave);
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+
+int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_qp_context *context = inbox->buf + 8;
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave);
+ if (err)
+ return err;
+
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ update_gid(dev, inbox, (u8)slave);
+ update_pkey_index(dev, slave, inbox);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave);
+ if (err)
+ return err;
+
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ update_gid(dev, inbox, (u8)slave);
+ update_pkey_index(dev, slave, inbox);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_qp *qp;
+
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED, &qp, 0);
+ if (err)
+ return err;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_abort;
+
+ atomic_dec(&qp->mtt->ref_count);
+ atomic_dec(&qp->rcq->ref_count);
+ atomic_dec(&qp->scq->ref_count);
+ if (qp->srq)
+ atomic_dec(&qp->srq->ref_count);
+ res_end_move(dev, slave, RES_QP, qpn);
+ return 0;
+
+ex_abort:
+ res_abort_move(dev, slave, RES_QP, qpn);
+
+ return err;
+}
+
+static struct res_gid *find_gid(struct mlx4_dev *dev, int slave,
+ struct res_qp *rqp, u8 *gid)
+{
+ struct res_gid *res;
+
+ list_for_each_entry(res, &rqp->mcg_list, list) {
+ if (!memcmp(res->gid, gid, 16))
+ return res;
+ }
+ return NULL;
+}
+
+static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp,
+ u8 *gid, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer, u64 reg_id)
+{
+ struct res_gid *res;
+ int err;
+
+ res = kzalloc(sizeof *res, GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ spin_lock_irq(&rqp->mcg_spl);
+ if (find_gid(dev, slave, rqp, gid)) {
+ kfree(res);
+ err = -EEXIST;
+ } else {
+ memcpy(res->gid, gid, 16);
+ res->prot = prot;
+ res->steer = steer;
+ res->reg_id = reg_id;
+ list_add_tail(&res->list, &rqp->mcg_list);
+ err = 0;
+ }
+ spin_unlock_irq(&rqp->mcg_spl);
+
+ return err;
+}
+
+static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp,
+ u8 *gid, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer, u64 *reg_id)
+{
+ struct res_gid *res;
+ int err;
+
+ spin_lock_irq(&rqp->mcg_spl);
+ res = find_gid(dev, slave, rqp, gid);
+ if (!res || res->prot != prot || res->steer != steer)
+ err = -EINVAL;
+ else {
+ *reg_id = res->reg_id;
+ list_del(&res->list);
+ kfree(res);
+ err = 0;
+ }
+ spin_unlock_irq(&rqp->mcg_spl);
+
+ return err;
+}
+
+static int qp_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ int block_loopback, enum mlx4_protocol prot,
+ enum mlx4_steer_type type, u64 *reg_id)
+{
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return mlx4_trans_to_dmfs_attach(dev, qp, gid, gid[5],
+ block_loopback, prot,
+ reg_id);
+ case MLX4_STEERING_MODE_B0:
+ return mlx4_qp_attach_common(dev, qp, gid,
+ block_loopback, prot, type);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, enum mlx4_steer_type type,
+ u64 reg_id)
+{
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return mlx4_flow_detach(dev, reg_id);
+ case MLX4_STEERING_MODE_B0:
+ return mlx4_qp_detach_common(dev, qp, gid, prot, type);
+ default:
+ return -EINVAL;
+ }
+}
+
+int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_qp qp; /* dummy for calling attach/detach */
+ u8 *gid = inbox->buf;
+ enum mlx4_protocol prot = (vhcr->in_modifier >> 28) & 0x7;
+ int err;
+ int qpn;
+ struct res_qp *rqp;
+ u64 reg_id = 0;
+ int attach = vhcr->op_modifier;
+ int block_loopback = vhcr->in_modifier >> 31;
+ u8 steer_type_mask = 2;
+ enum mlx4_steer_type type = (gid[7] & steer_type_mask) >> 1;
+
+ qpn = vhcr->in_modifier & 0xffffff;
+ err = get_res(dev, slave, qpn, RES_QP, &rqp);
+ if (err)
+ return err;
+
+ qp.qpn = qpn;
+ if (attach) {
+ err = qp_attach(dev, &qp, gid, block_loopback, prot,
+ type, ®_id);
+ if (err) {
+ pr_err("Fail to attach rule to qp 0x%x\n", qpn);
+ goto ex_put;
+ }
+ err = add_mcg_res(dev, slave, rqp, gid, prot, type, reg_id);
+ if (err)
+ goto ex_detach;
+ } else {
+ err = rem_mcg_res(dev, slave, rqp, gid, prot, type, ®_id);
+ if (err)
+ goto ex_put;
+
+ err = qp_detach(dev, &qp, gid, prot, type, reg_id);
+ if (err)
+ pr_err("Fail to detach rule from qp 0x%x reg_id = 0x%llx\n",
+ qpn, (unsigned long long)reg_id);
+ }
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+
+ex_detach:
+ qp_detach(dev, &qp, gid, prot, type, reg_id);
+ex_put:
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+/*
+ * MAC validation for Flow Steering rules.
+ * VF can attach rules only with a mac address which is assigned to it.
+ */
+static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header,
+ struct list_head *rlist)
+{
+ struct mac_res *res, *tmp;
+ __be64 be_mac;
+
+ /* make sure it isn't multicast or broadcast mac*/
+ if (!is_multicast_ether_addr(eth_header->eth.dst_mac) &&
+ !is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
+ list_for_each_entry_safe(res, tmp, rlist, list) {
+ be_mac = cpu_to_be64(res->mac << 16);
+ if (!memcmp(&be_mac, eth_header->eth.dst_mac, ETH_ALEN))
+ return 0;
+ }
+ pr_err("MAC %pM doesn't belong to VF %d, Steering rule rejected\n",
+ eth_header->eth.dst_mac, slave);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * In case of missing eth header, append eth header with a MAC address
+ * assigned to the VF.
+ */
+static int add_eth_header(struct mlx4_dev *dev, int slave,
+ struct mlx4_cmd_mailbox *inbox,
+ struct list_head *rlist, int header_id)
+{
+ struct mac_res *res, *tmp;
+ u8 port;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ struct mlx4_net_trans_rule_hw_eth *eth_header;
+ struct mlx4_net_trans_rule_hw_ipv4 *ip_header;
+ struct mlx4_net_trans_rule_hw_tcp_udp *l4_header;
+ __be64 be_mac = 0;
+ __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
+ port = ctrl->port;
+ eth_header = (struct mlx4_net_trans_rule_hw_eth *)(ctrl + 1);
+
+ /* Clear a space in the inbox for eth header */
+ switch (header_id) {
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ ip_header =
+ (struct mlx4_net_trans_rule_hw_ipv4 *)(eth_header + 1);
+ memmove(ip_header, eth_header,
+ sizeof(*ip_header) + sizeof(*l4_header));
+ break;
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ l4_header = (struct mlx4_net_trans_rule_hw_tcp_udp *)
+ (eth_header + 1);
+ memmove(l4_header, eth_header, sizeof(*l4_header));
+ break;
+ default:
+ return -EINVAL;
+ }
+ list_for_each_entry_safe(res, tmp, rlist, list) {
+ if (port == res->port) {
+ be_mac = cpu_to_be64(res->mac << 16);
+ break;
+ }
+ }
+ if (!be_mac) {
+ pr_err("Failed adding eth header to FS rule, Can't find matching MAC for port %d .\n",
+ port);
+ return -EINVAL;
+ }
+
+ memset(eth_header, 0, sizeof(*eth_header));
+ eth_header->size = sizeof(*eth_header) >> 2;
+ eth_header->id = cpu_to_be16(__sw_id_hw[MLX4_NET_TRANS_RULE_ID_ETH]);
+ memcpy(eth_header->dst_mac, &be_mac, ETH_ALEN);
+ memcpy(eth_header->dst_mac_msk, &mac_msk, ETH_ALEN);
+
+ return 0;
+
+}
+
+int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC];
+ int err;
+ int qpn;
+ struct res_qp *rqp;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ struct _rule_hw *rule_header;
+ int header_id;
+
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EOPNOTSUPP;
+
+ ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
+ qpn = be32_to_cpu(ctrl->qpn) & 0xffffff;
+ err = get_res(dev, slave, qpn, RES_QP, &rqp);
+ if (err) {
+ pr_err("Steering rule with qpn 0x%x rejected.\n", qpn);
+ return err;
+ }
+ rule_header = (struct _rule_hw *)(ctrl + 1);
+ header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id));
+
+ switch (header_id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ if (validate_eth_header_mac(slave, rule_header, rlist)) {
+ err = -EINVAL;
+ goto err_put;
+ }
+ break;
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ break;
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n");
+ if (add_eth_header(dev, slave, inbox, rlist, header_id)) {
+ err = -EINVAL;
+ goto err_put;
+ }
+ vhcr->in_modifier +=
+ sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2;
+ break;
+ default:
+ pr_err("Corrupted mailbox.\n");
+ err = -EINVAL;
+ goto err_put;
+ }
+
+ err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param,
+ vhcr->in_modifier, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ goto err_put;
+
+ err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn);
+ if (err) {
+ mlx4_err(dev, "Fail to add flow steering resources.\n ");
+ /* detach rule*/
+ mlx4_cmd(dev, vhcr->out_param, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ goto err_put;
+ }
+ atomic_inc(&rqp->ref_count);
+err_put:
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct res_qp *rqp;
+ struct res_fs_rule *rrule;
+
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EOPNOTSUPP;
+
+ err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule);
+ if (err)
+ return err;
+ /* Release the rule form busy state before removal */
+ put_res(dev, slave, vhcr->in_param, RES_FS_RULE);
+ err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp);
+ if (err)
+ return err;
+
+ err = mlx4_cmd(dev, vhcr->in_param, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (!err) {
+ err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE,
+ 0);
+ atomic_dec(&rqp->ref_count);
+
+ if (err) {
+ mlx4_err(dev, "Fail to remove flow steering resources.\n ");
+ goto out;
+ }
+ }
+
+out:
+ put_res(dev, slave, rrule->qpn, RES_QP);
+ return err;
+}
+
+enum {
+ BUSY_MAX_RETRIES = 10
+};
+
+int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+
+ return err;
+}
+
+static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp)
+{
+ struct res_gid *rgid;
+ struct res_gid *tmp;
+ struct mlx4_qp qp; /* dummy for calling attach/detach */
+
+ list_for_each_entry_safe(rgid, tmp, &rqp->mcg_list, list) {
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ mlx4_flow_detach(dev, rgid->reg_id);
+ break;
+ case MLX4_STEERING_MODE_B0:
+ qp.qpn = rqp->local_qpn;
+ (void) mlx4_qp_detach_common(dev, &qp, rgid->gid,
+ rgid->prot, rgid->steer);
+ break;
+ }
+ list_del(&rgid->list);
+ kfree(rgid);
+ }
+}
+
+static int _move_all_busy(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type, int print)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *rlist = &tracker->slave_list[slave].res_list[type];
+ struct res_common *r;
+ struct res_common *tmp;
+ int busy;
+
+ busy = 0;
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(r, tmp, rlist, list) {
+ if (r->owner == slave) {
+ if (!r->removing) {
+ if (r->state == RES_ANY_BUSY) {
+ if (print)
+ mlx4_dbg(dev,
+ "%s id 0x%llx is busy\n",
+ ResourceType(type),
+ (unsigned long long)r->res_id);
+ ++busy;
+ } else {
+ r->from_state = r->state;
+ r->state = RES_ANY_BUSY;
+ r->removing = 1;
+ }
+ }
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return busy;
+}
+
+static int move_all_busy(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type)
+{
+ unsigned long begin;
+ int busy;
+
+ begin = jiffies;
+ do {
+ busy = _move_all_busy(dev, slave, type, 0);
+ if (time_after(jiffies, begin + 5 * HZ))
+ break;
+ if (busy)
+ cond_resched();
+ } while (busy);
+
+ if (busy)
+ busy = _move_all_busy(dev, slave, type, 1);
+
+ return busy;
+}
+static void rem_slave_qps(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *qp_list =
+ &tracker->slave_list[slave].res_list[RES_QP];
+ struct res_qp *qp;
+ struct res_qp *tmp;
+ int state;
+ u64 in_param;
+ int qpn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_QP);
+ if (err)
+ mlx4_warn(dev, "rem_slave_qps: Could not move all qps to busy"
+ "for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(qp, tmp, qp_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (qp->com.owner == slave) {
+ qpn = qp->com.res_id;
+ detach_qp(dev, slave, qp);
+ state = qp->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_QP_RESERVED:
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&qp->com.node,
+ &tracker->res_tree[RES_QP]);
+ list_del(&qp->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (!valid_reserved(dev, slave, qpn)) {
+ __mlx4_qp_release_range(dev, qpn, 1);
+ mlx4_release_resource(dev, slave,
+ RES_QP, 1, 0);
+ }
+ kfree(qp);
+ state = 0;
+ break;
+ case RES_QP_MAPPED:
+ if (!valid_reserved(dev, slave, qpn))
+ __mlx4_qp_free_icm(dev, qpn);
+ state = RES_QP_RESERVED;
+ break;
+ case RES_QP_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param,
+ qp->local_qpn, 2,
+ MLX4_CMD_2RST_QP,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_qps: failed"
+ " to move slave %d qpn %d to"
+ " reset\n", slave,
+ qp->local_qpn);
+ atomic_dec(&qp->rcq->ref_count);
+ atomic_dec(&qp->scq->ref_count);
+ atomic_dec(&qp->mtt->ref_count);
+ if (qp->srq)
+ atomic_dec(&qp->srq->ref_count);
+ state = RES_QP_MAPPED;
+ break;
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_srqs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *srq_list =
+ &tracker->slave_list[slave].res_list[RES_SRQ];
+ struct res_srq *srq;
+ struct res_srq *tmp;
+ int state;
+ u64 in_param;
+ LIST_HEAD(tlist);
+ int srqn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_SRQ);
+ if (err)
+ mlx4_warn(dev, "rem_slave_srqs: Could not move all srqs to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(srq, tmp, srq_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (srq->com.owner == slave) {
+ srqn = srq->com.res_id;
+ state = srq->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_SRQ_ALLOCATED:
+ __mlx4_srq_free_icm(dev, srqn);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&srq->com.node,
+ &tracker->res_tree[RES_SRQ]);
+ list_del(&srq->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave,
+ RES_SRQ, 1, 0);
+ kfree(srq);
+ state = 0;
+ break;
+
+ case RES_SRQ_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param, srqn, 1,
+ MLX4_CMD_HW2SW_SRQ,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_srqs: failed"
+ " to move slave %d srq %d to"
+ " SW ownership\n",
+ slave, srqn);
+
+ atomic_dec(&srq->mtt->ref_count);
+ if (srq->cq)
+ atomic_dec(&srq->cq->ref_count);
+ state = RES_SRQ_ALLOCATED;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_cqs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *cq_list =
+ &tracker->slave_list[slave].res_list[RES_CQ];
+ struct res_cq *cq;
+ struct res_cq *tmp;
+ int state;
+ u64 in_param;
+ LIST_HEAD(tlist);
+ int cqn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_CQ);
+ if (err)
+ mlx4_warn(dev, "rem_slave_cqs: Could not move all cqs to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(cq, tmp, cq_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (cq->com.owner == slave && !atomic_read(&cq->ref_count)) {
+ cqn = cq->com.res_id;
+ state = cq->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_CQ_ALLOCATED:
+ __mlx4_cq_free_icm(dev, cqn);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&cq->com.node,
+ &tracker->res_tree[RES_CQ]);
+ list_del(&cq->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave,
+ RES_CQ, 1, 0);
+ kfree(cq);
+ state = 0;
+ break;
+
+ case RES_CQ_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param, cqn, 1,
+ MLX4_CMD_HW2SW_CQ,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_cqs: failed"
+ " to move slave %d cq %d to"
+ " SW ownership\n",
+ slave, cqn);
+ atomic_dec(&cq->mtt->ref_count);
+ state = RES_CQ_ALLOCATED;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_mrs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mpt_list =
+ &tracker->slave_list[slave].res_list[RES_MPT];
+ struct res_mpt *mpt;
+ struct res_mpt *tmp;
+ int state;
+ u64 in_param;
+ LIST_HEAD(tlist);
+ int mptn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_MPT);
+ if (err)
+ mlx4_warn(dev, "rem_slave_mrs: Could not move all mpts to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(mpt, tmp, mpt_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (mpt->com.owner == slave) {
+ mptn = mpt->com.res_id;
+ state = mpt->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_MPT_RESERVED:
+ __mlx4_mpt_release(dev, mpt->key);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&mpt->com.node,
+ &tracker->res_tree[RES_MPT]);
+ list_del(&mpt->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave,
+ RES_MPT, 1, 0);
+ kfree(mpt);
+ state = 0;
+ break;
+
+ case RES_MPT_MAPPED:
+ __mlx4_mpt_free_icm(dev, mpt->key);
+ state = RES_MPT_RESERVED;
+ break;
+
+ case RES_MPT_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param, mptn, 0,
+ MLX4_CMD_HW2SW_MPT,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_mrs: failed"
+ " to move slave %d mpt %d to"
+ " SW ownership\n",
+ slave, mptn);
+ if (mpt->mtt)
+ atomic_dec(&mpt->mtt->ref_count);
+ state = RES_MPT_MAPPED;
+ break;
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_mtts(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *mtt_list =
+ &tracker->slave_list[slave].res_list[RES_MTT];
+ struct res_mtt *mtt;
+ struct res_mtt *tmp;
+ int state;
+ LIST_HEAD(tlist);
+ int base;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_MTT);
+ if (err)
+ mlx4_warn(dev, "rem_slave_mtts: Could not move all mtts to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(mtt, tmp, mtt_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (mtt->com.owner == slave) {
+ base = mtt->com.res_id;
+ state = mtt->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_MTT_ALLOCATED:
+ __mlx4_free_mtt_range(dev, base,
+ mtt->order);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&mtt->com.node,
+ &tracker->res_tree[RES_MTT]);
+ list_del(&mtt->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave, RES_MTT,
+ 1 << mtt->order, 0);
+ kfree(mtt);
+ state = 0;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *fs_rule_list =
+ &tracker->slave_list[slave].res_list[RES_FS_RULE];
+ struct res_fs_rule *fs_rule;
+ struct res_fs_rule *tmp;
+ int state;
+ u64 base;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_FS_RULE);
+ if (err)
+ mlx4_warn(dev, "rem_slave_fs_rule: Could not move all mtts to busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(fs_rule, tmp, fs_rule_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (fs_rule->com.owner == slave) {
+ base = fs_rule->com.res_id;
+ state = fs_rule->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_FS_RULE_ALLOCATED:
+ /* detach rule */
+ err = mlx4_cmd(dev, base, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&fs_rule->com.node,
+ &tracker->res_tree[RES_FS_RULE]);
+ list_del(&fs_rule->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(fs_rule);
+ state = 0;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_eqs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *eq_list =
+ &tracker->slave_list[slave].res_list[RES_EQ];
+ struct res_eq *eq;
+ struct res_eq *tmp;
+ int err;
+ int state;
+ LIST_HEAD(tlist);
+ int eqn;
+ struct mlx4_cmd_mailbox *mailbox;
+
+ err = move_all_busy(dev, slave, RES_EQ);
+ if (err)
+ mlx4_warn(dev, "rem_slave_eqs: Could not move all eqs to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(eq, tmp, eq_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (eq->com.owner == slave) {
+ eqn = eq->com.res_id;
+ state = eq->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_EQ_RESERVED:
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&eq->com.node,
+ &tracker->res_tree[RES_EQ]);
+ list_del(&eq->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(eq);
+ state = 0;
+ break;
+
+ case RES_EQ_HW:
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ cond_resched();
+ continue;
+ }
+ err = mlx4_cmd_box(dev, slave, 0,
+ eqn & 0xff, 0,
+ MLX4_CMD_HW2SW_EQ,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_eqs: failed"
+ " to move slave %d eqs %d to"
+ " SW ownership\n", slave, eqn);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ atomic_dec(&eq->mtt->ref_count);
+ state = RES_EQ_RESERVED;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_counters(struct mlx4_dev *dev, int slave)
+{
+ __mlx4_slave_counters_free(dev, slave);
+}
+
+static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *xrcdn_list =
+ &tracker->slave_list[slave].res_list[RES_XRCD];
+ struct res_xrcdn *xrcd;
+ struct res_xrcdn *tmp;
+ int err;
+ int xrcdn;
+
+ err = move_all_busy(dev, slave, RES_XRCD);
+ if (err)
+ mlx4_warn(dev, "rem_slave_xrcdns: Could not move all xrcdns to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) {
+ if (xrcd->com.owner == slave) {
+ xrcdn = xrcd->com.res_id;
+ rb_erase(&xrcd->com.node, &tracker->res_tree[RES_XRCD]);
+ list_del(&xrcd->com.list);
+ kfree(xrcd);
+ __mlx4_xrcd_free(dev, xrcdn);
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
+ rem_slave_macs(dev, slave);
+ rem_slave_vlans(dev, slave);
+ rem_slave_fs_rule(dev, slave);
+ rem_slave_qps(dev, slave);
+ rem_slave_srqs(dev, slave);
+ rem_slave_cqs(dev, slave);
+ rem_slave_mrs(dev, slave);
+ rem_slave_eqs(dev, slave);
+ rem_slave_mtts(dev, slave);
+ rem_slave_counters(dev, slave);
+ rem_slave_xrcdns(dev, slave);
+ mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
+}
+
+void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
+{
+ struct mlx4_vf_immed_vlan_work *work =
+ container_of(_work, struct mlx4_vf_immed_vlan_work, work);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_update_qp_context *upd_context;
+ struct mlx4_dev *dev = &work->priv->dev;
+ struct mlx4_resource_tracker *tracker =
+ &work->priv->mfunc.master.res_tracker;
+ struct list_head *qp_list =
+ &tracker->slave_list[work->slave].res_list[RES_QP];
+ struct res_qp *qp;
+ struct res_qp *tmp;
+ u64 qp_path_mask_vlan_ctrl =
+ ((1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_UNTAGGED) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_1P) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_TAGGED) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_UNTAGGED) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_1P) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_TAGGED));
+
+ u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_CV) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE));
+
+ int err;
+ int port, errors = 0;
+ u8 vlan_control;
+
+ if (mlx4_is_slave(dev)) {
+ mlx4_warn(dev, "Trying to update-qp in slave %d\n",
+ work->slave);
+ goto out;
+ }
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ goto out;
+
+ if (!work->vlan_id)
+ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
+ else
+ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+
+ upd_context = mailbox->buf;
+ upd_context->qp_mask = cpu_to_be64(MLX4_UPD_QP_MASK_VSD);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(qp, tmp, qp_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (qp->com.owner == work->slave) {
+ if (qp->com.from_state != RES_QP_HW ||
+ !qp->sched_queue || /* no INIT2RTR trans yet */
+ mlx4_is_qp_reserved(dev, qp->local_qpn) ||
+ qp->qpc_flags & (1 << MLX4_RSS_QPC_FLAG_OFFSET)) {
+ spin_lock_irq(mlx4_tlock(dev));
+ continue;
+ }
+ port = (qp->sched_queue >> 6 & 1) + 1;
+ if (port != work->port) {
+ spin_lock_irq(mlx4_tlock(dev));
+ continue;
+ }
+ if (MLX4_QP_ST_RC == ((qp->qpc_flags >> 16) & 0xff))
+ upd_context->primary_addr_path_mask = cpu_to_be64(qp_path_mask);
+ else
+ upd_context->primary_addr_path_mask =
+ cpu_to_be64(qp_path_mask | qp_path_mask_vlan_ctrl);
+ if (work->vlan_id == MLX4_VGT) {
+ upd_context->qp_context.param3 = qp->param3;
+ upd_context->qp_context.pri_path.vlan_control = qp->vlan_control;
+ upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx;
+ upd_context->qp_context.pri_path.vlan_index = qp->vlan_index;
+ upd_context->qp_context.pri_path.fl = qp->pri_path_fl;
+ upd_context->qp_context.pri_path.feup = qp->feup;
+ upd_context->qp_context.pri_path.sched_queue =
+ qp->sched_queue;
+ } else {
+ upd_context->qp_context.param3 = qp->param3 & ~cpu_to_be32(MLX4_STRIP_VLAN);
+ upd_context->qp_context.pri_path.vlan_control = vlan_control;
+ upd_context->qp_context.pri_path.vlan_index = work->vlan_ix;
+ upd_context->qp_context.pri_path.fvl_rx =
+ qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN;
+ upd_context->qp_context.pri_path.fl =
+ qp->pri_path_fl | MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN;
+ upd_context->qp_context.pri_path.feup =
+ qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
+ upd_context->qp_context.pri_path.sched_queue =
+ qp->sched_queue & 0xC7;
+ upd_context->qp_context.pri_path.sched_queue |=
+ ((work->qos & 0x7) << 3);
+ }
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ qp->local_qpn & 0xffffff,
+ 0, MLX4_CMD_UPDATE_QP,
+ MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_info(dev, "UPDATE_QP failed for slave %d, "
+ "port %d, qpn %d (%d)\n",
+ work->slave, port, qp->local_qpn,
+ err);
+ errors++;
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ if (errors)
+ mlx4_err(dev, "%d UPDATE_QP failures for slave %d, port %d\n",
+ errors, work->slave, work->port);
+
+ /* unregister previous vlan_id if needed and we had no errors
+ * while updating the QPs
+ */
+ if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors &&
+ NO_INDX != work->orig_vlan_ix)
+ __mlx4_unregister_vlan(&work->priv->dev, work->port,
+ work->orig_vlan_id);
+out:
+ kfree(work);
+ return;
+}
+
Property changes on: trunk/sys/ofed/drivers/net/mlx4/resource_tracker.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/drivers/net/mlx4/sense.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/sense.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/sense.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -38,14 +38,15 @@
#include "mlx4.h"
-static int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
- enum mlx4_port_type *type)
+int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
+ enum mlx4_port_type *type)
{
u64 out_param;
int err = 0;
err = mlx4_cmd_imm(dev, 0, &out_param, port, 0,
- MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
if (err) {
mlx4_err(dev, "Sense command failed for port: %d\n", port);
return err;
@@ -52,8 +53,8 @@
}
if (out_param > 2) {
- mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", out_param);
- return EINVAL;
+ mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", (unsigned long long)out_param);
+ return -EINVAL;
}
*type = out_param;
@@ -80,20 +81,6 @@
}
/*
- * Adjust port configuration:
- * If port 1 sensed nothing and port 2 is IB, set both as IB
- * If port 2 sensed nothing and port 1 is Eth, set both as Eth
- */
- if (stype[0] == MLX4_PORT_TYPE_ETH) {
- for (i = 1; i < dev->caps.num_ports; i++)
- stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_ETH;
- }
- if (stype[dev->caps.num_ports - 1] == MLX4_PORT_TYPE_IB) {
- for (i = 0; i < dev->caps.num_ports - 1; i++)
- stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_IB;
- }
-
- /*
* If sensed nothing, remain in current configuration.
*/
for (i = 0; i < dev->caps.num_ports; i++)
@@ -121,9 +108,8 @@
sense_again:
mutex_unlock(&priv->port_mutex);
- if (sense->resched)
- queue_delayed_work(sense->sense_wq , &sense->sense_poll,
- round_jiffies(MLX4_SENSE_RANGE));
+ queue_delayed_work(mlx4_wq , &sense->sense_poll,
+ round_jiffies_relative(MLX4_SENSE_RANGE));
}
void mlx4_start_sense(struct mlx4_dev *dev)
@@ -134,18 +120,16 @@
if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP))
return;
- sense->resched = 1;
- queue_delayed_work(sense->sense_wq , &sense->sense_poll,
- round_jiffies(MLX4_SENSE_RANGE));
+ queue_delayed_work(mlx4_wq , &sense->sense_poll,
+ round_jiffies_relative(MLX4_SENSE_RANGE));
}
-
void mlx4_stop_sense(struct mlx4_dev *dev)
{
- mlx4_priv(dev)->sense.resched = 0;
+ cancel_delayed_work_sync(&mlx4_priv(dev)->sense.sense_poll);
}
-int mlx4_sense_init(struct mlx4_dev *dev)
+void mlx4_sense_init(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_sense *sense = &priv->sense;
@@ -152,21 +136,8 @@
int port;
sense->dev = dev;
- sense->sense_wq = create_singlethread_workqueue("mlx4_sense");
- if (!sense->sense_wq)
- return -ENOMEM;
-
for (port = 1; port <= dev->caps.num_ports; port++)
sense->do_sense_port[port] = 1;
- INIT_DELAYED_WORK_DEFERRABLE(&sense->sense_poll, mlx4_sense_port);
- return 0;
+ INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port);
}
-
-void mlx4_sense_cleanup(struct mlx4_dev *dev)
-{
- mlx4_stop_sense(dev);
- cancel_delayed_work(&mlx4_priv(dev)->sense.sense_poll);
- destroy_workqueue(mlx4_priv(dev)->sense.sense_wq);
-}
-
Modified: trunk/sys/ofed/drivers/net/mlx4/srq.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/srq.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/drivers/net/mlx4/srq.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,34 +31,14 @@
* SOFTWARE.
*/
-#include <linux/init.h>
-
#include <linux/mlx4/cmd.h>
#include <linux/mlx4/srq.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
#include "mlx4.h"
#include "icm.h"
-struct mlx4_srq_context {
- __be32 state_logsize_srqn;
- u8 logstride;
- u8 reserved1;
- __be16 xrc_domain;
- __be32 pg_offset_cqn;
- u32 reserved2;
- u8 log_page_size;
- u8 reserved3[2];
- u8 mtt_base_addr_h;
- __be32 mtt_base_addr_l;
- __be32 pd;
- __be16 limit_watermark;
- __be16 wqe_cnt;
- u16 reserved4;
- __be16 wqe_counter;
- u32 reserved5;
- __be64 db_rec_addr;
-};
-
void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
@@ -66,8 +46,7 @@
spin_lock(&srq_table->lock);
- srq = radix_tree_lookup(&dev->srq_table_tree,
- srqn & (dev->caps.num_srqs - 1));
+ srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
if (srq)
atomic_inc(&srq->refcount);
@@ -87,8 +66,9 @@
static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int srq_num)
{
- return mlx4_cmd(dev, mailbox->dma, srq_num, 0, MLX4_CMD_SW2HW_SRQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd(dev, mailbox->dma, srq_num, 0,
+ MLX4_CMD_SW2HW_SRQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -96,13 +76,13 @@
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
{
return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
}
static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -109,35 +89,96 @@
int srq_num)
{
return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
-int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
- struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq)
+int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
- struct mlx4_cmd_mailbox *mailbox;
- struct mlx4_srq_context *srq_context;
- u64 mtt_addr;
int err;
- srq->srqn = mlx4_bitmap_alloc(&srq_table->bitmap);
- if (srq->srqn == -1)
+
+ *srqn = mlx4_bitmap_alloc(&srq_table->bitmap);
+ if (*srqn == -1)
return -ENOMEM;
- err = mlx4_table_get(dev, &srq_table->table, srq->srqn);
+ err = mlx4_table_get(dev, &srq_table->table, *srqn);
if (err)
goto err_out;
- err = mlx4_table_get(dev, &srq_table->cmpt_table, srq->srqn);
+ err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
if (err)
goto err_put;
+ return 0;
+err_put:
+ mlx4_table_put(dev, &srq_table->table, *srqn);
+
+err_out:
+ mlx4_bitmap_free(&srq_table->bitmap, *srqn, MLX4_NO_RR);
+ return err;
+}
+
+static int mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param, RES_SRQ,
+ RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ *srqn = get_param_l(&out_param);
+
+ return err;
+ }
+ return __mlx4_srq_alloc_icm(dev, srqn);
+}
+
+void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+
+ mlx4_table_put(dev, &srq_table->cmpt_table, srqn);
+ mlx4_table_put(dev, &srq_table->table, srqn);
+ mlx4_bitmap_free(&srq_table->bitmap, srqn, MLX4_NO_RR);
+}
+
+static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, srqn);
+ if (mlx4_cmd(dev, in_param, RES_SRQ, RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed freeing cq:%d\n", srqn);
+ return;
+ }
+ __mlx4_srq_free_icm(dev, srqn);
+}
+
+int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
+ struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_srq_context *srq_context;
+ u64 mtt_addr;
+ int err;
+
+ err = mlx4_srq_alloc_icm(dev, &srq->srqn);
+ if (err)
+ return err;
+
spin_lock_irq(&srq_table->lock);
- err = radix_tree_insert(&dev->srq_table_tree, srq->srqn, srq);
+ err = radix_tree_insert(&srq_table->tree, srq->srqn, srq);
spin_unlock_irq(&srq_table->lock);
if (err)
- goto err_cmpt_put;
+ goto err_icm;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
@@ -151,7 +192,7 @@
srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) |
srq->srqn);
srq_context->logstride = srq->wqe_shift - 4;
- srq_context->xrc_domain = cpu_to_be16(xrcd);
+ srq_context->xrcd = cpu_to_be16(xrcd);
srq_context->pg_offset_cqn = cpu_to_be32(cqn & 0xffffff);
srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
@@ -173,52 +214,33 @@
err_radix:
spin_lock_irq(&srq_table->lock);
- radix_tree_delete(&dev->srq_table_tree, srq->srqn);
+ radix_tree_delete(&srq_table->tree, srq->srqn);
spin_unlock_irq(&srq_table->lock);
-err_cmpt_put:
- mlx4_table_put(dev, &srq_table->cmpt_table, srq->srqn);
-
-err_put:
- mlx4_table_put(dev, &srq_table->table, srq->srqn);
-
-err_out:
- mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
-
+err_icm:
+ mlx4_srq_free_icm(dev, srq->srqn);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_srq_alloc);
-void mlx4_srq_invalidate(struct mlx4_dev *dev, struct mlx4_srq *srq)
+void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
int err;
err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn);
if (err)
mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn);
-}
-EXPORT_SYMBOL_GPL(mlx4_srq_invalidate);
-void mlx4_srq_remove(struct mlx4_dev *dev, struct mlx4_srq *srq)
-{
- struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
-
spin_lock_irq(&srq_table->lock);
- radix_tree_delete(&dev->srq_table_tree, srq->srqn);
+ radix_tree_delete(&srq_table->tree, srq->srqn);
spin_unlock_irq(&srq_table->lock);
-}
-EXPORT_SYMBOL_GPL(mlx4_srq_remove);
-void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
-{
- struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
-
if (atomic_dec_and_test(&srq->refcount))
complete(&srq->free);
wait_for_completion(&srq->free);
- mlx4_table_put(dev, &srq_table->table, srq->srqn);
- mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
+ mlx4_srq_free_icm(dev, srq->srqn);
}
EXPORT_SYMBOL_GPL(mlx4_srq_free);
@@ -257,7 +279,9 @@
int err;
spin_lock_init(&srq_table->lock);
- INIT_RADIX_TREE(&dev->srq_table_tree, GFP_ATOMIC);
+ INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC);
+ if (mlx4_is_slave(dev))
+ return 0;
err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs,
dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0);
@@ -269,5 +293,22 @@
void mlx4_cleanup_srq_table(struct mlx4_dev *dev)
{
+ if (mlx4_is_slave(dev))
+ return;
mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap);
}
+
+struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+ struct mlx4_srq *srq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&srq_table->lock, flags);
+ srq = radix_tree_lookup(&srq_table->tree,
+ srqn & (dev->caps.num_srqs - 1));
+ spin_unlock_irqrestore(&srq_table->lock, flags);
+
+ return srq;
+}
+EXPORT_SYMBOL_GPL(mlx4_srq_lookup);
Added: trunk/sys/ofed/drivers/net/mlx4/sys_tune.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/sys_tune.c (rev 0)
+++ trunk/sys/ofed/drivers/net/mlx4/sys_tune.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2010, 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <asm/atomic.h>
+
+#include "mlx4.h"
+
+#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
+
+/* Each CPU is put into a group. In most cases, the group number is
+ * equal to the CPU number of one of the CPUs in the group. The
+ * exception is group NR_CPUS which is the default group. This is
+ * protected by sys_tune_startup_mutex. */
+DEFINE_PER_CPU(int, idle_cpu_group) = NR_CPUS;
+
+/* For each group, a count of the number of CPUs in the group which
+ * are known to be busy. A busy CPU might be running the busy loop
+ * below or general kernel code. The count is decremented on entry to
+ * the old pm_idle handler and incremented on exit. The aim is to
+ * avoid the count going to zero or negative. This situation can
+ * occur temporarily during module unload or CPU hot-plug but
+ * normality will be restored when the affected CPUs next exit the
+ * idle loop. */
+static atomic_t busy_cpu_count[NR_CPUS+1];
+
+/* A workqueue item to be executed to cause the CPU to exit from the
+ * idle loop. */
+DEFINE_PER_CPU(struct work_struct, sys_tune_cpu_work);
+
+#define sys_tune_set_state(CPU,STATE) \
+ do { } while(0)
+
+
+/* A mutex to protect most of the module datastructures. */
+static DEFINE_MUTEX(sys_tune_startup_mutex);
+
+/* The old pm_idle handler. */
+static void (*old_pm_idle)(void) = NULL;
+
+static void sys_tune_pm_idle(void)
+{
+ atomic_t *busy_cpus_ptr;
+ int busy_cpus;
+ int cpu = smp_processor_id();
+
+ busy_cpus_ptr = &(busy_cpu_count[per_cpu(idle_cpu_group, cpu)]);
+
+ sys_tune_set_state(cpu, 2);
+
+ local_irq_enable();
+ while (!need_resched()) {
+ busy_cpus = atomic_read(busy_cpus_ptr);
+
+ /* If other CPUs in this group are busy then let this
+ * CPU go idle. We mustn't let the number of busy
+ * CPUs drop below 1. */
+ if ( busy_cpus > 1 &&
+ old_pm_idle != NULL &&
+ ( atomic_cmpxchg(busy_cpus_ptr, busy_cpus,
+ busy_cpus-1) == busy_cpus ) ) {
+ local_irq_disable();
+ sys_tune_set_state(cpu, 3);
+ /* This check might not be necessary, but it
+ * seems safest to include it because there
+ * might be a kernel version which requires
+ * it. */
+ if (need_resched())
+ local_irq_enable();
+ else
+ old_pm_idle();
+ /* This CPU is busy again. */
+ sys_tune_set_state(cpu, 1);
+ atomic_add(1, busy_cpus_ptr);
+ return;
+ }
+
+ cpu_relax();
+ }
+ sys_tune_set_state(cpu, 0);
+}
+
+
+void sys_tune_work_func(struct work_struct *work)
+{
+ /* Do nothing. Since this function is running in process
+ * context, the idle thread isn't running on this CPU. */
+}
+
+
+#ifdef CONFIG_SMP
+static void sys_tune_smp_call(void *info)
+{
+ schedule_work(&get_cpu_var(sys_tune_cpu_work));
+ put_cpu_var(sys_tune_cpu_work);
+}
+#endif
+
+
+#ifdef CONFIG_SMP
+static void sys_tune_refresh(void)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
+ on_each_cpu(&sys_tune_smp_call, NULL, 0, 1);
+#else
+ on_each_cpu(&sys_tune_smp_call, NULL, 1);
+#endif
+}
+#else
+static void sys_tune_refresh(void)
+{
+ /* The current thread is executing on the one and only CPU so
+ * the idle thread isn't running. */
+}
+#endif
+
+
+
+static int sys_tune_cpu_group(int cpu)
+{
+#ifdef CONFIG_SMP
+ const cpumask_t *mask;
+ int other_cpu;
+ int group;
+
+#if defined(topology_thread_cpumask) && defined(ST_HAVE_EXPORTED_CPU_SIBLING_MAP)
+ /* Keep one hyperthread busy per core. */
+ mask = topology_thread_cpumask(cpu);
+#else
+ return cpu;
+#endif
+ for_each_cpu_mask(cpu, *(mask)) {
+ group = per_cpu(idle_cpu_group, other_cpu);
+ if (group != NR_CPUS)
+ return group;
+ }
+#endif
+
+ return cpu;
+}
+
+
+static void sys_tune_add_cpu(int cpu)
+{
+ int group;
+
+ /* Do nothing if this CPU has already been added. */
+ if (per_cpu(idle_cpu_group, cpu) != NR_CPUS)
+ return;
+
+ group = sys_tune_cpu_group(cpu);
+ per_cpu(idle_cpu_group, cpu) = group;
+ atomic_inc(&(busy_cpu_count[group]));
+
+}
+
+static void sys_tune_del_cpu(int cpu)
+{
+
+ int group;
+
+ if (per_cpu(idle_cpu_group, cpu) == NR_CPUS)
+ return;
+
+ group = per_cpu(idle_cpu_group, cpu);
+ /* If the CPU was busy, this can cause the count to drop to
+ * zero. To rectify this, we need to cause one of the other
+ * CPUs in the group to exit the idle loop. If the CPU was
+ * not busy then this causes the contribution for this CPU to
+ * go to -1 which can cause the overall count to drop to zero
+ * or go negative. To rectify this situation we need to cause
+ * this CPU to exit the idle loop. */
+ atomic_dec(&(busy_cpu_count[group]));
+ per_cpu(idle_cpu_group, cpu) = NR_CPUS;
+
+}
+
+
+static int sys_tune_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+
+ switch(action) {
+#ifdef CPU_ONLINE_FROZEN
+ case CPU_ONLINE_FROZEN:
+#endif
+ case CPU_ONLINE:
+ mutex_lock(&sys_tune_startup_mutex);
+ sys_tune_add_cpu(cpu);
+ mutex_unlock(&sys_tune_startup_mutex);
+ /* The CPU might have already entered the idle loop in
+ * the wrong group. Make sure it exits the idle loop
+ * so that it picks up the correct group. */
+ sys_tune_refresh();
+ break;
+
+#ifdef CPU_DEAD_FROZEN
+ case CPU_DEAD_FROZEN:
+#endif
+ case CPU_DEAD:
+ mutex_lock(&sys_tune_startup_mutex);
+ sys_tune_del_cpu(cpu);
+ mutex_unlock(&sys_tune_startup_mutex);
+ /* The deleted CPU may have been the only busy CPU in
+ * the group. Make sure one of the other CPUs in the
+ * group exits the idle loop. */
+ sys_tune_refresh();
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+
+static struct notifier_block sys_tune_cpu_nb = {
+ .notifier_call = sys_tune_cpu_notify,
+};
+
+
+static void sys_tune_ensure_init(void)
+{
+ BUG_ON (old_pm_idle != NULL);
+
+ /* Atomically update pm_idle to &sys_tune_pm_idle. The old value
+ * is stored in old_pm_idle before installing the new
+ * handler. */
+ do {
+ old_pm_idle = pm_idle;
+ } while (cmpxchg(&pm_idle, old_pm_idle, &sys_tune_pm_idle) !=
+ old_pm_idle);
+}
+#endif
+
+void sys_tune_fini(void)
+{
+#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
+ void (*old)(void);
+ int cpu;
+
+ unregister_cpu_notifier(&sys_tune_cpu_nb);
+
+ mutex_lock(&sys_tune_startup_mutex);
+
+
+ old = cmpxchg(&pm_idle, &sys_tune_pm_idle, old_pm_idle);
+
+ for_each_online_cpu(cpu)
+ sys_tune_del_cpu(cpu);
+
+ mutex_unlock(&sys_tune_startup_mutex);
+
+ /* Our handler may still be executing on other CPUs.
+ * Schedule this thread on all CPUs to make sure all
+ * idle threads get interrupted. */
+ sys_tune_refresh();
+
+ /* Make sure the work item has finished executing on all CPUs.
+ * This in turn ensures that all idle threads have been
+ * interrupted. */
+ flush_scheduled_work();
+#endif /* CONFIG_X86 */
+}
+
+void sys_tune_init(void)
+{
+#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ INIT_WORK(&per_cpu(sys_tune_cpu_work, cpu),
+ sys_tune_work_func);
+ }
+
+ /* Start by registering the handler to ensure we don't miss
+ * any updates. */
+ register_cpu_notifier(&sys_tune_cpu_nb);
+
+ mutex_lock(&sys_tune_startup_mutex);
+
+ for_each_online_cpu(cpu)
+ sys_tune_add_cpu(cpu);
+
+ sys_tune_ensure_init();
+
+
+ mutex_unlock(&sys_tune_startup_mutex);
+
+ /* Ensure our idle handler starts to run. */
+ sys_tune_refresh();
+#endif
+}
+
Property changes on: trunk/sys/ofed/drivers/net/mlx4/sys_tune.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/net/mlx4/utils.c
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/utils.c (rev 0)
+++ trunk/sys/ofed/drivers/net/mlx4/utils.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,189 @@
+/* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */
+
+/*
+ * Copyright (c) 2005, 2006 Reyk Floeter <reyk at openbsd.org>
+ * Copyright (c) 2007 Andrew Thompson <thompsa at FreeBSD.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/hash.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/taskqueue.h>
+#include <sys/eventhandler.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_clone.h>
+#include <net/if_arp.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/bpf.h>
+
+#if defined(INET) || defined(INET6)
+#include <netinet/in.h>
+#endif
+#ifdef INET
+#include <netinet/in_systm.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#endif
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/in6_ifattach.h>
+#endif
+
+#include <net/if_vlan_var.h>
+
+#include "utils.h"
+
+/* XXX this code should be factored out */
+/* XXX copied from if_lagg.c */
+
+static const void *
+mlx4_en_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
+{
+ if (m->m_pkthdr.len < (off + len)) {
+ return (NULL);
+ } else if (m->m_len < (off + len)) {
+ m_copydata(m, off, len, buf);
+ return (buf);
+ }
+ return (mtod(m, char *) + off);
+}
+
+uint32_t
+mlx4_en_hashmbuf(uint32_t flags, struct mbuf *m, uint32_t key)
+{
+ uint16_t etype;
+ uint32_t p = key;
+ int off;
+ struct ether_header *eh;
+ const struct ether_vlan_header *vlan;
+#ifdef INET
+ const struct ip *ip;
+ const uint32_t *ports;
+ int iphlen;
+#endif
+#ifdef INET6
+ const struct ip6_hdr *ip6;
+ uint32_t flow;
+#endif
+ union {
+#ifdef INET
+ struct ip ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr ip6;
+#endif
+ struct ether_vlan_header vlan;
+ uint32_t port;
+ } buf;
+
+
+ off = sizeof(*eh);
+ if (m->m_len < off)
+ goto out;
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ if (flags & MLX4_F_HASHL2) {
+ p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
+ p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+ }
+
+ /* Special handling for encapsulating VLAN frames */
+ if ((m->m_flags & M_VLANTAG) && (flags & MLX4_F_HASHL2)) {
+ p = hash32_buf(&m->m_pkthdr.ether_vtag,
+ sizeof(m->m_pkthdr.ether_vtag), p);
+ } else if (etype == ETHERTYPE_VLAN) {
+ vlan = mlx4_en_gethdr(m, off, sizeof(*vlan), &buf);
+ if (vlan == NULL)
+ goto out;
+
+ if (flags & MLX4_F_HASHL2)
+ p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+ etype = ntohs(vlan->evl_proto);
+ off += sizeof(*vlan) - sizeof(*eh);
+ }
+
+ switch (etype) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ ip = mlx4_en_gethdr(m, off, sizeof(*ip), &buf);
+ if (ip == NULL)
+ goto out;
+
+ if (flags & MLX4_F_HASHL3) {
+ p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
+ p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+ }
+ if (!(flags & MLX4_F_HASHL4))
+ break;
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_SCTP:
+ iphlen = ip->ip_hl << 2;
+ if (iphlen < sizeof(*ip))
+ break;
+ off += iphlen;
+ ports = mlx4_en_gethdr(m, off, sizeof(*ports), &buf);
+ if (ports == NULL)
+ break;
+ p = hash32_buf(ports, sizeof(*ports), p);
+ break;
+ }
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ if (!(flags & MLX4_F_HASHL3))
+ break;
+ ip6 = mlx4_en_gethdr(m, off, sizeof(*ip6), &buf);
+ if (ip6 == NULL)
+ goto out;
+
+ p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
+ p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
+ flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
+ p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
+ break;
+#endif
+ }
+out:
+ return (p);
+}
Property changes on: trunk/sys/ofed/drivers/net/mlx4/utils.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/ofed/drivers/net/mlx4/utils.h
===================================================================
--- trunk/sys/ofed/drivers/net/mlx4/utils.h (rev 0)
+++ trunk/sys/ofed/drivers/net/mlx4/utils.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX4_UTILS_H_
+#define _MLX4_UTILS_H_
+
+/* Lagg flags */
+#define MLX4_F_HASHL2 0x00000001 /* hash layer 2 */
+#define MLX4_F_HASHL3 0x00000002 /* hash layer 3 */
+#define MLX4_F_HASHL4 0x00000004 /* hash layer 4 */
+#define MLX4_F_HASHMASK 0x00000007
+
+uint32_t mlx4_en_hashmbuf(uint32_t flags, struct mbuf *m, uint32_t key);
+
+#endif /* _MLX4_UTILS_H_ */
Property changes on: trunk/sys/ofed/drivers/net/mlx4/utils.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/asm/atomic-long.h
===================================================================
--- trunk/sys/ofed/include/asm/atomic-long.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/atomic-long.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,6 +26,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#ifndef _ATOMIC_LONG_H_
#define _ATOMIC_LONG_H_
@@ -33,7 +35,7 @@
#include <machine/atomic.h>
typedef struct {
- volatile u_long counter;
+ volatile long counter;
} atomic_long_t;
#define atomic_long_add(i, v) atomic_long_add_return((i), (v))
Property changes on: trunk/sys/ofed/include/asm/atomic-long.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/asm/atomic.h
===================================================================
--- trunk/sys/ofed/include/asm/atomic.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/atomic.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,12 +33,15 @@
#include <sys/cdefs.h>
#include <sys/types.h>
#include <machine/atomic.h>
-#include <asm/atomic-long.h>
typedef struct {
- volatile u_int counter;
+ volatile int counter;
} atomic_t;
+/*------------------------------------------------------------------------*
+ * 32-bit atomic operations
+ *------------------------------------------------------------------------*/
+
#define atomic_add(i, v) atomic_add_return((i), (v))
#define atomic_sub(i, v) atomic_sub_return((i), (v))
#define atomic_inc_return(v) atomic_add_return(1, (v))
@@ -45,6 +49,8 @@
#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
+#define atomic_dec_return(v) atomic_sub_return(1, (v))
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
static inline int
atomic_add_return(int i, atomic_t *v)
@@ -82,4 +88,19 @@
return atomic_fetchadd_int(&v->counter, -1) - 1;
}
-#endif /* _ASM_ATOMIC_H_ */
+static inline int
+atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int c;
+
+ for (;;) {
+ c = atomic_read(v);
+ if (unlikely(c == u))
+ break;
+ if (likely(atomic_cmpset_int(&v->counter, c, c + a)))
+ break;
+ }
+ return (c != u);
+}
+
+#endif /* _ASM_ATOMIC_H_ */
Property changes on: trunk/sys/ofed/include/asm/atomic.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/asm/byteorder.h
===================================================================
--- trunk/sys/ofed/include/asm/byteorder.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/byteorder.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,11 +26,13 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#ifndef _ASM_BYTEORDER_H_
#define _ASM_BYTEORDER_H_
#include <sys/types.h>
#include <sys/endian.h>
+#include <asm/types.h>
#if BYTE_ORDER == LITTLE_ENDIAN
#define __LITTLE_ENDIAN
Property changes on: trunk/sys/ofed/include/asm/byteorder.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/asm/current.h
===================================================================
--- trunk/sys/ofed/include/asm/current.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/current.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/asm/current.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/asm/fcntl.h
===================================================================
--- trunk/sys/ofed/include/asm/fcntl.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/fcntl.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Property changes on: trunk/sys/ofed/include/asm/fcntl.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/asm/io.h
===================================================================
--- trunk/sys/ofed/include/asm/io.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/io.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -1,7 +1,8 @@
-/*-
+/*
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,4 +27,9 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef _ASM_IO_H_
+#define _ASM_IO_H_
+
#include <linux/io.h>
+
+#endif /* _ASM_IO_H_ */
Property changes on: trunk/sys/ofed/include/asm/io.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/asm/page.h
===================================================================
--- trunk/sys/ofed/include/asm/page.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/page.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/asm/page.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/asm/pgtable.h
===================================================================
--- trunk/sys/ofed/include/asm/pgtable.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/pgtable.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Property changes on: trunk/sys/ofed/include/asm/pgtable.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/asm/semaphore.h
===================================================================
--- trunk/sys/ofed/include/asm/semaphore.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/semaphore.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/asm/semaphore.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/asm/system.h
===================================================================
--- trunk/sys/ofed/include/asm/system.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/system.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/asm/system.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/asm/types.h
===================================================================
--- trunk/sys/ofed/include/asm/types.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/types.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,43 +26,36 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#ifndef _ASM_TYPES_H_
#define _ASM_TYPES_H_
-typedef unsigned short umode_t;
-
-typedef __signed__ char __s8;
-typedef unsigned char __u8;
-
-typedef __signed__ short __s16;
-typedef unsigned short __u16;
-
-typedef __signed__ int __s32;
-typedef unsigned int __u32;
-
-#if defined(__GNUC__) // && !defined(__STRICT_ANSI__)
-typedef __signed__ long long __s64;
-typedef unsigned long long __u64;
-#endif
-
#ifdef _KERNEL
-typedef signed char s8;
-typedef unsigned char u8;
+typedef uint8_t u8;
+typedef uint8_t __u8;
+typedef uint16_t u16;
+typedef uint16_t __u16;
+typedef uint32_t u32;
+typedef uint32_t __u32;
+typedef uint64_t u64;
+typedef uint64_t __u64;
-typedef signed short s16;
-typedef unsigned short u16;
+typedef int8_t s8;
+typedef int8_t __s8;
+typedef int16_t s16;
+typedef int16_t __s16;
+typedef int32_t s32;
+typedef int32_t __s32;
+typedef int64_t s64;
+typedef int64_t __s64;
-typedef signed int s32;
-typedef unsigned int u32;
-
-typedef signed long long s64;
-typedef unsigned long long u64;
-
/* DMA addresses come in generic and 64-bit flavours. */
typedef vm_paddr_t dma_addr_t;
typedef vm_paddr_t dma64_addr_t;
+typedef unsigned short umode_t;
+
#endif /* _KERNEL */
#endif /* _ASM_TYPES_H_ */
Property changes on: trunk/sys/ofed/include/asm/types.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/asm/uaccess.h
===================================================================
--- trunk/sys/ofed/include/asm/uaccess.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/asm/uaccess.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,6 +26,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#ifndef _ASM_UACCESS_H_
#define _ASM_UACCESS_H_
Property changes on: trunk/sys/ofed/include/asm/uaccess.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/bitops.h
===================================================================
--- trunk/sys/ofed/include/linux/bitops.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/bitops.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,7 +36,10 @@
#endif
#define BIT_MASK(n) (~0UL >> (BITS_PER_LONG - (n)))
#define BITS_TO_LONGS(n) howmany((n), BITS_PER_LONG)
+#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
+#define BITS_PER_BYTE 8
+
static inline int
__ffs(int mask)
{
@@ -63,6 +67,16 @@
#define ffz(mask) __ffs(~(mask))
+static inline int get_count_order(unsigned int count)
+{
+ int order;
+
+ order = fls(count) - 1;
+ if (count & (count - 1))
+ order++;
+ return order;
+}
+
static inline unsigned long
find_first_bit(unsigned long *addr, unsigned long size)
{
@@ -124,11 +138,11 @@
if (mask)
return (bit + __flsl(mask));
}
- while (--pos) {
+ while (pos--) {
addr--;
bit -= BITS_PER_LONG;
if (*addr)
- return (bit + __flsl(mask));
+ return (bit + __flsl(*addr));
}
return (size);
}
@@ -272,16 +286,23 @@
return (1);
}
-#define NBINT (NBBY * sizeof(int))
+#define NBLONG (NBBY * sizeof(long))
+#define __set_bit(i, a) \
+ atomic_set_long(&((volatile long *)(a))[(i)/NBLONG], 1UL << ((i) % NBLONG))
+
#define set_bit(i, a) \
- atomic_set_int(&((volatile int *)(a))[(i)/NBINT], 1 << (i) % NBINT)
+ atomic_set_long(&((volatile long *)(a))[(i)/NBLONG], 1UL << ((i) % NBLONG))
+#define __clear_bit(i, a) \
+ atomic_clear_long(&((volatile long *)(a))[(i)/NBLONG], 1UL << ((i) % NBLONG))
+
#define clear_bit(i, a) \
- atomic_clear_int(&((volatile int *)(a))[(i)/NBINT], 1 << (i) % NBINT)
+ atomic_clear_long(&((volatile long *)(a))[(i)/NBLONG], 1UL << ((i) % NBLONG))
#define test_bit(i, a) \
- !!(atomic_load_acq_int(&((volatile int *)(a))[(i)/NBINT]) & 1 << ((i) % NBINT))
+ !!(atomic_load_acq_long(&((volatile long *)(a))[(i)/NBLONG]) & \
+ (1UL << ((i) % NBLONG)))
static inline long
test_and_clear_bit(long bit, long *var)
@@ -288,7 +309,9 @@
{
long val;
- bit = 1 << bit;
+ var += bit / (sizeof(long) * NBBY);
+ bit %= sizeof(long) * NBBY;
+ bit = (1UL << bit);
do {
val = *(volatile long *)var;
} while (atomic_cmpset_long(var, val, val & ~bit) == 0);
@@ -301,7 +324,9 @@
{
long val;
- bit = 1 << bit;
+ var += bit / (sizeof(long) * NBBY);
+ bit %= sizeof(long) * NBBY;
+ bit = (1UL << bit);
do {
val = *(volatile long *)var;
} while (atomic_cmpset_long(var, val, val | bit) == 0);
@@ -309,4 +334,185 @@
return !!(val & bit);
}
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+#define BITMAP_LAST_WORD_MASK(nbits) \
+( \
+ ((nbits) % BITS_PER_LONG) ? \
+ (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
+)
+
+
+static inline void
+bitmap_set(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ nr -= bits_to_set;
+ bits_to_set = BITS_PER_LONG;
+ mask_to_set = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
+static inline void
+bitmap_clear(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ nr -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
+
+enum {
+ REG_OP_ISFREE, /* true if region is all zero bits */
+ REG_OP_ALLOC, /* set all bits in region */
+ REG_OP_RELEASE, /* clear all bits in region */
+};
+
+static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+{
+ int nbits_reg; /* number of bits in region */
+ int index; /* index first long of region in bitmap */
+ int offset; /* bit offset region in bitmap[index] */
+ int nlongs_reg; /* num longs spanned by region in bitmap */
+ int nbitsinlong; /* num bits of region in each spanned long */
+ unsigned long mask; /* bitmask for one long of region */
+ int i; /* scans bitmap by longs */
+ int ret = 0; /* return value */
+
+ /*
+ * Either nlongs_reg == 1 (for small orders that fit in one long)
+ * or (offset == 0 && mask == ~0UL) (for larger multiword orders.)
+ */
+ nbits_reg = 1 << order;
+ index = pos / BITS_PER_LONG;
+ offset = pos - (index * BITS_PER_LONG);
+ nlongs_reg = BITS_TO_LONGS(nbits_reg);
+ nbitsinlong = min(nbits_reg, BITS_PER_LONG);
+
+ /*
+ * Can't do "mask = (1UL << nbitsinlong) - 1", as that
+ * overflows if nbitsinlong == BITS_PER_LONG.
+ */
+ mask = (1UL << (nbitsinlong - 1));
+ mask += mask - 1;
+ mask <<= offset;
+
+ switch (reg_op) {
+ case REG_OP_ISFREE:
+ for (i = 0; i < nlongs_reg; i++) {
+ if (bitmap[index + i] & mask)
+ goto done;
+ }
+ ret = 1; /* all bits in region free (zero) */
+ break;
+
+ case REG_OP_ALLOC:
+ for (i = 0; i < nlongs_reg; i++)
+ bitmap[index + i] |= mask;
+ break;
+
+ case REG_OP_RELEASE:
+ for (i = 0; i < nlongs_reg; i++)
+ bitmap[index + i] &= ~mask;
+ break;
+ }
+done:
+ return ret;
+}
+
+/**
+ * bitmap_find_free_region - find a contiguous aligned mem region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @bits: number of bits in the bitmap
+ * @order: region size (log base 2 of number of bits) to find
+ *
+ * Find a region of free (zero) bits in a @bitmap of @bits bits and
+ * allocate them (set them to one). Only consider regions of length
+ * a power (@order) of two, aligned to that power of two, which
+ * makes the search algorithm much faster.
+ *
+ * Return the bit offset in bitmap of the allocated region,
+ * or -errno on failure.
+ */
+static inline int
+bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+{
+ int pos, end; /* scans bitmap by regions of size order */
+
+ for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+ if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
+ continue;
+ __reg_op(bitmap, pos, order, REG_OP_ALLOC);
+ return pos;
+ }
+ return -ENOMEM;
+}
+
+/**
+ * bitmap_allocate_region - allocate bitmap region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @pos: beginning of bit region to allocate
+ * @order: region size (log base 2 of number of bits) to allocate
+ *
+ * Allocate (set bits in) a specified region of a bitmap.
+ *
+ * Return 0 on success, or %-EBUSY if specified region wasn't
+ * free (not all bits were zero).
+ */
+
+static inline int
+bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+{
+ if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
+ return -EBUSY;
+ __reg_op(bitmap, pos, order, REG_OP_ALLOC);
+ return 0;
+}
+
+/**
+ * bitmap_release_region - release allocated bitmap region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @pos: beginning of bit region to release
+ * @order: region size (log base 2 of number of bits) to release
+ *
+ * This is the complement to __bitmap_find_free_region() and releases
+ * the found region (by clearing it in the bitmap).
+ *
+ * No return value.
+ */
+static inline void
+bitmap_release_region(unsigned long *bitmap, int pos, int order)
+{
+ __reg_op(bitmap, pos, order, REG_OP_RELEASE);
+}
+
+
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
#endif /* _LINUX_BITOPS_H_ */
Added: trunk/sys/ofed/include/linux/cache.h
===================================================================
--- trunk/sys/ofed/include/linux/cache.h (rev 0)
+++ trunk/sys/ofed/include/linux/cache.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,36 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iX Systems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_CACHE_H_
+#define _LINUX_CACHE_H_
+
+#define cache_line_size() CACHE_LINE_SIZE
+#define L1_CACHE_BYTES CACHE_LINE_SIZE
+
+#endif /* _LINUX_CACHE_H_ */
Property changes on: trunk/sys/ofed/include/linux/cache.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/cdev.h
===================================================================
--- trunk/sys/ofed/include/linux/cdev.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/cdev.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Added: trunk/sys/ofed/include/linux/clocksource.h
===================================================================
--- trunk/sys/ofed/include/linux/clocksource.h (rev 0)
+++ trunk/sys/ofed/include/linux/clocksource.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,37 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iX Systems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_CLOCKSOURCE_H
+#define _LINUX_CLOCKSOURCE_H
+
+/* clocksource cycle base type */
+typedef u64 cycle_t;
+
+
+#endif /* _LINUX_CLOCKSOURCE_H */
Property changes on: trunk/sys/ofed/include/linux/clocksource.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/compat.h
===================================================================
--- trunk/sys/ofed/include/linux/compat.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/compat.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,5 +30,4 @@
#ifndef _LINUX_COMPAT_H_
#define _LINUX_COMPAT_H_
-
#endif /* _LINUX_COMPAT_H_ */
Modified: trunk/sys/ofed/include/linux/compiler.h
===================================================================
--- trunk/sys/ofed/include/linux/compiler.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/compiler.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/completion.h
===================================================================
--- trunk/sys/ofed/include/linux/completion.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/completion.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,131 +26,41 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#ifndef _LINUX_COMPLETION_H_
#define _LINUX_COMPLETION_H_
#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sleepqueue.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-
struct completion {
unsigned int done;
};
-#define INIT_COMPLETION(c) ((c).done = 0)
-#define init_completion(c) ((c)->done = 0)
+#define INIT_COMPLETION(c) \
+ ((c).done = 0)
+#define init_completion(c) \
+ ((c)->done = 0)
+#define complete(c) \
+ linux_complete_common((c), 0)
+#define complete_all(c) \
+ linux_complete_common((c), 1)
+#define wait_for_completion(c) \
+ linux_wait_for_common((c), 0)
+#define wait_for_completion_interuptible(c) \
+ linux_wait_for_common((c), 1)
+#define wait_for_completion_timeout(c, timeout) \
+ linux_wait_for_timeout_common((c), (timeout), 0)
+#define wait_for_completion_interruptible_timeout(c, timeout) \
+ linux_wait_for_timeout_common((c), (timeout), 1)
+#define try_wait_for_completion(c) \
+ linux_try_wait_for_completion(c)
+#define completion_done(c) \
+ linux_completion_done(c)
-static inline void
-_complete_common(struct completion *c, int all)
-{
- int wakeup_swapper;
+extern void linux_complete_common(struct completion *, int);
+extern long linux_wait_for_common(struct completion *, int);
+extern long linux_wait_for_timeout_common(struct completion *, long, int);
+extern int linux_try_wait_for_completion(struct completion *);
+extern int linux_completion_done(struct completion *);
- sleepq_lock(c);
- c->done++;
- if (all)
- wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
- else
- wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
- sleepq_release(c);
- if (wakeup_swapper)
- kick_proc0();
-}
-
-#define complete(c) _complete_common(c, 0)
-#define complete_all(c) _complete_common(c, 1)
-
-/*
- * Indefinite wait for done != 0 with or without signals.
- */
-static inline long
-_wait_for_common(struct completion *c, int flags)
-{
-
- flags |= SLEEPQ_SLEEP;
- for (;;) {
- sleepq_lock(c);
- if (c->done)
- break;
- sleepq_add(c, NULL, "completion", flags, 0);
- if (flags & SLEEPQ_INTERRUPTIBLE) {
- if (sleepq_wait_sig(c, 0) != 0)
- return (-ERESTARTSYS);
- } else
- sleepq_wait(c, 0);
- }
- c->done--;
- sleepq_release(c);
-
- return (0);
-}
-
-#define wait_for_completion(c) _wait_for_common(c, 0)
-#define wait_for_completion_interuptible(c) \
- _wait_for_common(c, SLEEPQ_INTERRUPTIBLE)
-
-static inline long
-_wait_for_timeout_common(struct completion *c, long timeout, int flags)
-{
- long end;
-
- end = ticks + timeout;
- flags |= SLEEPQ_SLEEP;
- for (;;) {
- sleepq_lock(c);
- if (c->done)
- break;
- sleepq_add(c, NULL, "completion", flags, 0);
- sleepq_set_timeout(c, end - ticks);
- if (flags & SLEEPQ_INTERRUPTIBLE) {
- if (sleepq_timedwait_sig(c, 0) != 0)
- return (-ERESTARTSYS);
- } else
- sleepq_timedwait(c, 0);
- }
- c->done--;
- sleepq_release(c);
- timeout = end - ticks;
-
- return (timeout > 0 ? timeout : 1);
-}
-
-#define wait_for_completion_timeout(c, timeout) \
- _wait_for_timeout_common(c, timeout, 0)
-#define wait_for_completion_interruptible_timeout(c, timeout) \
- _wait_for_timeout_common(c, timeout, SLEEPQ_INTERRUPTIBLE)
-
-static inline int
-try_wait_for_completion(struct completion *c)
-{
- int isdone;
-
- isdone = 1;
- sleepq_lock(c);
- if (c->done)
- c->done--;
- else
- isdone = 0;
- sleepq_release(c);
- return (isdone);
-}
-
-static inline int
-completion_done(struct completion *c)
-{
- int isdone;
-
- isdone = 1;
- sleepq_lock(c);
- if (c->done == 0)
- isdone = 0;
- sleepq_release(c);
- return (isdone);
-}
-
-#endif /* _LINUX_COMPLETION_H_ */
+#endif /* _LINUX_COMPLETION_H_ */
Modified: trunk/sys/ofed/include/linux/delay.h
===================================================================
--- trunk/sys/ofed/include/linux/delay.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/delay.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/device.h
===================================================================
--- trunk/sys/ofed/include/linux/device.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/device.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -51,6 +52,7 @@
devclass_t bsdclass;
void (*class_release)(struct class *class);
void (*dev_release)(struct device *dev);
+ char * (*devnode)(struct device *dev, umode_t *mode);
};
struct device {
@@ -72,10 +74,12 @@
extern struct kobject class_root;
struct class_attribute {
- struct attribute attr;
- ssize_t (*show)(struct class *, char *);
- ssize_t (*store)(struct class *, const char *, size_t);
+ struct attribute attr;
+ ssize_t (*show)(struct class *, struct class_attribute *, char *);
+ ssize_t (*store)(struct class *, struct class_attribute *, const char *, size_t);
+ const void *(*namespace)(struct class *, const struct class_attribute *);
};
+
#define CLASS_ATTR(_name, _mode, _show, _store) \
struct class_attribute class_attr_##_name = \
{ { #_name, NULL, _mode }, _show, _store }
@@ -83,10 +87,10 @@
struct device_attribute {
struct attribute attr;
ssize_t (*show)(struct device *,
- struct device_attribute *, char *);
+ struct device_attribute *, char *);
ssize_t (*store)(struct device *,
- struct device_attribute *, const char *,
- size_t);
+ struct device_attribute *, const char *,
+ size_t);
};
#define DEVICE_ATTR(_name, _mode, _show, _store) \
@@ -93,6 +97,28 @@
struct device_attribute dev_attr_##_name = \
{ { #_name, NULL, _mode }, _show, _store }
+/* Simple class attribute that is just a static string */
+struct class_attribute_string {
+ struct class_attribute attr;
+ char *str;
+};
+
+static inline ssize_t
+show_class_attr_string(struct class *class,
+ struct class_attribute *attr, char *buf)
+{
+ struct class_attribute_string *cs;
+ cs = container_of(attr, struct class_attribute_string, attr);
+ return snprintf(buf, PAGE_SIZE, "%s\n", cs->str);
+}
+
+/* Currently read-only only */
+#define _CLASS_ATTR_STRING(_name, _mode, _str) \
+ { __ATTR(_name, _mode, show_class_attr_string, NULL), _str }
+#define CLASS_ATTR_STRING(_name, _mode, _str) \
+ struct class_attribute_string class_attr_##_name = \
+ _CLASS_ATTR_STRING(_name, _mode, _str)
+
#define dev_err(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__)
#define dev_warn(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__)
#define dev_info(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__)
@@ -151,7 +177,7 @@
error = -EIO;
if (dattr->show)
error = dattr->show(container_of(kobj, struct class, kobj),
- buf);
+ dattr, buf);
return (error);
}
@@ -166,7 +192,7 @@
error = -EIO;
if (dattr->store)
error = dattr->store(container_of(kobj, struct class, kobj),
- buf, count);
+ dattr, buf, count);
return (error);
}
@@ -385,4 +411,12 @@
sysfs_remove_file(&class->kobj, &attr->attr);
}
+static inline int dev_to_node(struct device *dev)
+{
+ return -1;
+}
+
+char *kvasprintf(gfp_t, const char *, va_list);
+char *kasprintf(gfp_t, const char *, ...);
+
#endif /* _LINUX_DEVICE_H_ */
Modified: trunk/sys/ofed/include/linux/dma-attrs.h
===================================================================
--- trunk/sys/ofed/include/linux/dma-attrs.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/dma-attrs.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/dma-mapping.h
===================================================================
--- trunk/sys/ofed/include/linux/dma-mapping.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/dma-mapping.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -138,6 +139,14 @@
*dma_handle = 0;
return (mem);
}
+
+static inline void *
+dma_zalloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t flag)
+{
+
+ return (dma_alloc_coherent(dev, size, dma_handle, flag | __GFP_ZERO));
+}
static inline void
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
@@ -245,6 +254,13 @@
return (0);
}
+static inline unsigned int dma_set_max_seg_size(struct device *dev,
+ unsigned int size)
+{
+ return (0);
+}
+
+
#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL)
#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL)
#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)
Modified: trunk/sys/ofed/include/linux/dmapool.h
===================================================================
--- trunk/sys/ofed/include/linux/dmapool.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/dmapool.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/err.h
===================================================================
--- trunk/sys/ofed/include/linux/err.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/err.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,4 +58,15 @@
return (void *)ptr;
}
+static inline int
+PTR_ERR_OR_ZERO(const void *ptr)
+{
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+ else
+ return 0;
+}
+
+#define PTR_RET(p) PTR_ERR_OR_ZERO(p)
+
#endif /* _LINUX_ERR_H_ */
Modified: trunk/sys/ofed/include/linux/errno.h
===================================================================
--- trunk/sys/ofed/include/linux/errno.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/errno.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,9 +32,11 @@
#include <sys/errno.h>
-#define ECOMM ESTALE
-#define ENODATA ECONNREFUSED
-#define ENOIOCTLCMD ENOIOCTL /* XXX this is negative */
-#define ERESTARTSYS ERESTART /* XXX this is negative */
+#define ECOMM ESTALE
+#define ENODATA ECONNREFUSED
+#define ENOIOCTLCMD ENOIOCTL
+#define ERESTARTSYS ERESTART
+#define ENOTSUPP EOPNOTSUPP
+#define ENONET EHOSTDOWN
-#endif /* _LINUX_ERRNO_H_ */
+#endif /* _LINUX_ERRNO_H_ */
Added: trunk/sys/ofed/include/linux/etherdevice.h
===================================================================
--- trunk/sys/ofed/include/linux/etherdevice.h (rev 0)
+++ trunk/sys/ofed/include/linux/etherdevice.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2014 Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef _LINUX_ETHERDEVICE
+#define _LINUX_ETHERDEVICE
+
+#include <linux/types.h>
+
+#define ETH_MODULE_SFF_8079 1
+#define ETH_MODULE_SFF_8079_LEN 256
+#define ETH_MODULE_SFF_8472 2
+#define ETH_MODULE_SFF_8472_LEN 512
+#define ETH_MODULE_SFF_8636 3
+#define ETH_MODULE_SFF_8636_LEN 256
+#define ETH_MODULE_SFF_8436 4
+#define ETH_MODULE_SFF_8436_LEN 256
+
+struct ethtool_eeprom {
+ u32 offset;
+ u32 len;
+};
+
+struct ethtool_modinfo {
+ u32 type;
+ u32 eeprom_len;
+};
+
+/**
+ * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Return true if the address is all zeroes.
+ */
+static inline bool is_zero_ether_addr(const u8 *addr)
+{
+ return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
+}
+
+
+
+/**
+ * is_multicast_ether_addr - Determine if the Ethernet address is a multicast.
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Return true if the address is a multicast address.
+ * By definition the broadcast address is also a multicast address.
+ */
+static inline bool is_multicast_ether_addr(const u8 *addr)
+{
+ return (0x01 & addr[0]);
+}
+
+/**
+ * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Return true if the address is the broadcast address.
+ */
+static inline bool is_broadcast_ether_addr(const u8 *addr)
+{
+ return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff;
+}
+
+/**
+ * is_valid_ether_addr - Determine if the given Ethernet address is valid
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not
+ * a multicast address, and is not FF:FF:FF:FF:FF:FF.
+ *
+ * Return true if the address is valid.
+ **/
+static inline bool is_valid_ether_addr(const u8 *addr)
+{
+ /* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to
+ ** explicitly check for it here. */
+ return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr);
+}
+
+static inline void ether_addr_copy(u8 *dst, const u8 *src)
+{
+ memcpy(dst, src, 6);
+}
+
+#endif /* _LINUX_ETHERDEVICE */
Property changes on: trunk/sys/ofed/include/linux/etherdevice.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/file.h
===================================================================
--- trunk/sys/ofed/include/linux/file.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/file.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -72,7 +73,15 @@
file = fget_unlocked(curthread->td_proc->p_fd, fd);
if (file == NULL)
return;
+ /*
+ * NOTE: We should only get here when the "fd" has not been
+ * installed, so no need to free the associated Linux file
+ * structure.
+ */
fdclose(curthread->td_proc->p_fd, file, fd, curthread);
+
+ /* drop extra reference */
+ fdrop(file, curthread);
}
static inline void
@@ -81,8 +90,15 @@
struct file *file;
file = fget_unlocked(curthread->td_proc->p_fd, fd);
- filp->_file = file;
- finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops);
+ if (file == NULL) {
+ filp->_file = NULL;
+ } else {
+ filp->_file = file;
+ finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops);
+ }
+
+ /* drop the extra reference */
+ fput(filp);
}
static inline int
@@ -95,16 +111,18 @@
error = falloc(curthread, &file, &fd, 0);
if (error)
return -error;
+ /* drop the extra reference */
+ fdrop(file, curthread);
return fd;
}
static inline struct linux_file *
-_alloc_file(int mode, const struct file_operations *fops)
+alloc_file(int mode, const struct file_operations *fops)
{
struct linux_file *filp;
filp = kzalloc(sizeof(*filp), GFP_KERNEL);
- if (filp == NULL)
+ if (filp == NULL)
return (NULL);
filp->f_op = fops;
filp->f_mode = mode;
@@ -112,8 +130,21 @@
return filp;
}
-#define alloc_file(mnt, root, mode, fops) _alloc_file((mode), (fops))
+struct fd {
+ struct linux_file *linux_file;
+};
+static inline void fdput(struct fd fd)
+{
+ fput(fd.linux_file);
+}
+
+static inline struct fd fdget(unsigned int fd)
+{
+ struct linux_file *f = linux_fget(fd);
+ return (struct fd){f};
+}
+
#define file linux_file
#define fget linux_fget
Modified: trunk/sys/ofed/include/linux/fs.h
===================================================================
--- trunk/sys/ofed/include/linux/fs.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/fs.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,6 +29,8 @@
#ifndef _LINUX_FS_H_
#define _LINUX_FS_H_
+#include <sys/cdefs.h>
+#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/vnode.h>
@@ -73,6 +76,7 @@
struct dentry f_dentry_store;
struct selinfo f_selinfo;
struct sigio *f_sigio;
+ struct vnode *f_vnode;
};
#define file linux_file
@@ -105,6 +109,12 @@
int (*open)(struct inode *, struct file *);
int (*release)(struct inode *, struct file *);
int (*fasync)(int, struct file *, int);
+
+/* Although not supported in FreeBSD, to align with Linux code
+ * we are adding llseek() only when it is mapped to no_llseek which returns
+ * an illegal seek error
+ */
+ loff_t (*llseek)(struct file *, loff_t, int);
#if 0
/* We do not support these methods. Don't permit them to compile. */
loff_t (*llseek)(struct file *, loff_t, int);
@@ -153,6 +163,21 @@
return;
}
+static inline int
+alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
+ const char *name)
+{
+
+ return 0;
+}
+
+/* No current support for seek op in FreeBSD */
+static inline int
+nonseekable_open(struct inode *inode, struct file *filp)
+{
+ return 0;
+}
+
static inline dev_t
iminor(struct inode *inode)
{
@@ -179,4 +204,10 @@
vrele(inode);
}
-#endif /* _LINUX_FS_H_ */
+static inline loff_t
+no_llseek(struct file *file, loff_t offset, int whence)
+{
+ return -ESPIPE;
+}
+
+#endif /* _LINUX_FS_H_ */
Modified: trunk/sys/ofed/include/linux/gfp.h
===================================================================
--- trunk/sys/ofed/include/linux/gfp.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/gfp.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,11 +30,14 @@
#ifndef _LINUX_GFP_H_
#define _LINUX_GFP_H_
+#include <sys/cdefs.h>
+#include <sys/types.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <linux/page.h>
+#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
@@ -101,6 +105,13 @@
kmem_free(kmem_map, (vm_offset_t)p, size);
}
+static inline void free_pages(uintptr_t addr, unsigned int order)
+{
+ if (addr == 0)
+ return;
+ __free_pages(virt_to_page((void *)addr), order);
+}
+
/*
* Alloc pages allocates directly from the buddy allocator on linux so
* order specifies a power of two bucket of pages and the results
@@ -120,4 +131,18 @@
return (virt_to_page(page));
}
+static inline uintptr_t __get_free_pages(gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+
+ page = alloc_pages(gfp_mask, order);
+ if (page == NULL)
+ return (0);
+ return ((uintptr_t)page_address(page));
+}
+
+#define alloc_pages_node(node, mask, order) alloc_pages(mask, order)
+
+#define kmalloc_node(chunk, mask, node) kmalloc(chunk, mask)
+
#endif /* _LINUX_GFP_H_ */
Modified: trunk/sys/ofed/include/linux/hardirq.h
===================================================================
--- trunk/sys/ofed/include/linux/hardirq.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/hardirq.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/idr.h
===================================================================
--- trunk/sys/ofed/include/linux/idr.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/idr.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,6 +41,10 @@
#define MAX_ID_MASK (MAX_ID_BIT - 1)
#define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS
+#define MAX_IDR_SHIFT (sizeof(int)*8 - 1)
+#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
+#define MAX_IDR_MASK (MAX_IDR_BIT - 1)
+
struct idr_layer {
unsigned long bitmap;
struct idr_layer *ary[IDR_SIZE];
Modified: trunk/sys/ofed/include/linux/if_arp.h
===================================================================
--- trunk/sys/ofed/include/linux/if_arp.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/if_arp.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/if_ether.h
===================================================================
--- trunk/sys/ofed/include/linux/if_ether.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/if_ether.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,4 +35,16 @@
#define ETH_P_8021Q ETHERTYPE_VLAN
+#define ETH_HLEN ETHER_HDR_LEN /* Total octets in header. */
+#ifndef ETH_ALEN
+#define ETH_ALEN ETHER_ADDR_LEN
+#endif
+#define ETH_FCS_LEN 4 /* Octets in the FCS */
+#define VLAN_HLEN 4 /* The additional bytes (on top of the Ethernet header)
+ * that VLAN requires. */
+/*
+ * defined Ethernet Protocol ID's.
+ */
+#define ETH_P_IP 0x0800 /* Internet Protocol packet */
+
#endif /* _LINUX_IF_ETHER_H_ */
Modified: trunk/sys/ofed/include/linux/if_vlan.h
===================================================================
--- trunk/sys/ofed/include/linux/if_vlan.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/if_vlan.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,7 +30,11 @@
#ifndef _LINUX_IF_VLAN_H_
#define _LINUX_IF_VLAN_H_
+#include <sys/socket.h>
+#include <net/if.h>
#include <net/ethernet.h>
#include <net/if_vlan_var.h>
+#define VLAN_N_VID 4096
+
#endif /* _LINUX_IF_VLAN_H_ */
Modified: trunk/sys/ofed/include/linux/in.h
===================================================================
--- trunk/sys/ofed/include/linux/in.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/in.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,6 +31,9 @@
#include "opt_inet.h"
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
#include <netinet/in.h>
#include <asm/byteorder.h>
Modified: trunk/sys/ofed/include/linux/in6.h
===================================================================
--- trunk/sys/ofed/include/linux/in6.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/in6.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,6 +30,8 @@
#ifndef _LINUX_IN6_H_
#define _LINUX_IN6_H_
+#ifndef KLD_MODULE
#include "opt_inet6.h"
+#endif
#endif /* _LINUX_IN6_H_ */
Modified: trunk/sys/ofed/include/linux/inetdevice.h
===================================================================
--- trunk/sys/ofed/include/linux/inetdevice.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/inetdevice.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/interrupt.h
===================================================================
--- trunk/sys/ofed/include/linux/interrupt.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/interrupt.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013-2015 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -116,6 +117,23 @@
return 0;
}
+static inline int
+bind_irq_to_cpu(unsigned int irq, int cpu_id)
+{
+ struct irq_ent *irqe;
+ struct device *dev;
+
+ dev = _pci_find_irq_dev(irq);
+ if (dev == NULL)
+ return (-ENOENT);
+
+ irqe = _irq_ent(dev, irq);
+ if (irqe == NULL)
+ return (-ENOENT);
+
+ return (-bus_bind_intr(dev->bsddev, irqe->res, cpu_id));
+}
+
static inline void
free_irq(unsigned int irq, void *device)
{
Modified: trunk/sys/ofed/include/linux/io-mapping.h
===================================================================
--- trunk/sys/ofed/include/linux/io-mapping.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/io-mapping.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/io.h
===================================================================
--- trunk/sys/ofed/include/linux/io.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/io.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,6 +31,7 @@
#define _LINUX_IO_H_
#include <machine/vm.h>
+#include <sys/endian.h>
static inline uint32_t
__raw_readl(const volatile void *addr)
@@ -88,6 +90,20 @@
*(volatile uint16_t *)addr = b;
}
+#undef ioread32be
+static inline uint32_t
+ioread32be(const volatile void *addr)
+{
+ return be32toh(*(const volatile uint32_t *)addr);
+}
+
+#undef iowrite32be
+static inline void
+iowrite32be(uint32_t v, volatile void *addr)
+{
+ *(volatile uint32_t *)addr = htobe32(v);
+}
+
void *_ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr);
#define ioremap_nocache(addr, size) \
_ioremap_attr((addr), (size), VM_MEMATTR_UNCACHEABLE)
Modified: trunk/sys/ofed/include/linux/ioctl.h
===================================================================
--- trunk/sys/ofed/include/linux/ioctl.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/ioctl.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/jiffies.h
===================================================================
--- trunk/sys/ofed/include/linux/jiffies.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/jiffies.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -44,11 +45,12 @@
return (tvtohz(&tv));
}
-#define jiffies ticks
+#define jiffies ticks
+#define jiffies_to_msecs(x) (((int64_t)(x)) * 1000 / hz)
-#define time_after(a, b) ((long)(b) - (long)(a) < 0)
+#define time_after(a, b) ((int)((b) - (a)) < 0)
#define time_before(a, b) time_after(b,a)
-#define time_after_eq(a, b) ((long)(a) - (long)(b) >= 0)
+#define time_after_eq(a, b) ((int)((a) - (b)) >= 0)
#define time_before_eq(a, b) time_after_eq(b, a)
#define HZ hz
Modified: trunk/sys/ofed/include/linux/kdev_t.h
===================================================================
--- trunk/sys/ofed/include/linux/kdev_t.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/kdev_t.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/kernel.h
===================================================================
--- trunk/sys/ofed/include/linux/kernel.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/kernel.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,25 +29,27 @@
#ifndef _LINUX_KERNEL_H_
#define _LINUX_KERNEL_H_
+#include <sys/cdefs.h>
+#include <sys/types.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/libkern.h>
#include <sys/stat.h>
#include <sys/smp.h>
+#include <sys/stddef.h>
+#include <sys/syslog.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/errno.h>
-#include <linux/stddef.h>
#include <linux/kthread.h>
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/wait.h>
-#include <linux/fs.h>
-#include <linux/notifier.h>
-#include <linux/log2.h>
+#include <linux/log2.h>
#include <asm/byteorder.h>
+#define KERN_CONT ""
#define KERN_EMERG "<0>"
#define KERN_ALERT "<1>"
#define KERN_CRIT "<2>"
@@ -56,6 +59,8 @@
#define KERN_INFO "<6>"
#define KERN_DEBUG "<7>"
+#define BUILD_BUG_ON(x) CTASSERT(!(x))
+
#define BUG() panic("BUG")
#define BUG_ON(condition) do { if (condition) BUG(); } while(0)
#define WARN_ON BUG_ON
@@ -62,12 +67,89 @@
#undef ALIGN
#define ALIGN(x, y) roundup2((x), (y))
+#undef PTR_ALIGN
+#define PTR_ALIGN(p, a) ((__typeof(p))ALIGN((uintptr_t)(p), (a)))
#define DIV_ROUND_UP howmany
+#define FIELD_SIZEOF(t, f) sizeof(((t *)0)->f)
#define printk(X...) printf(X)
-#define pr_debug(fmt, ...) printk(KERN_DEBUG # fmt, ##__VA_ARGS__)
+
+/*
+ * The "pr_debug()" and "pr_devel()" macros should produce zero code
+ * unless DEBUG is defined:
+ */
+#ifdef DEBUG
+#define pr_debug(fmt, ...) \
+ log(LOG_DEBUG, fmt, ##__VA_ARGS__)
+#define pr_devel(fmt, ...) \
+ log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_debug(fmt, ...) \
+ ({ if (0) log(LOG_DEBUG, fmt, ##__VA_ARGS__); 0; })
+#define pr_devel(fmt, ...) \
+ ({ if (0) log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__); 0; })
+#endif
+
#define udelay(t) DELAY(t)
+#define usleep_range(min,max) DELAY(min)
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+/*
+ * Print a one-time message (analogous to WARN_ONCE() et al):
+ */
+#define printk_once(...) do { \
+ static bool __print_once; \
+ \
+ if (!__print_once) { \
+ __print_once = true; \
+ printk(__VA_ARGS__); \
+ } \
+} while (0)
+
+/*
+ * Log a one-time message (analogous to WARN_ONCE() et al):
+ */
+#define log_once(level,...) do { \
+ static bool __log_once; \
+ \
+ if (!__log_once) { \
+ __log_once = true; \
+ log(level, __VA_ARGS__); \
+ } \
+} while (0)
+
+#define pr_emerg(fmt, ...) \
+ log(LOG_EMERG, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert(fmt, ...) \
+ log(LOG_ALERT, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit(fmt, ...) \
+ log(LOG_CRIT, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+ log(LOG_ERR, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+ log(LOG_WARNING, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn pr_warning
+#define pr_notice(fmt, ...) \
+ log(LOG_NOTICE, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+ log(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info_once(fmt, ...) \
+ log_once(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_cont(fmt, ...) \
+ printk(KERN_CONT fmt, ##__VA_ARGS__)
+
+#ifndef WARN
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ pr_warning(format); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+
#define container_of(ptr, type, member) \
({ \
__typeof(((type *)0)->member) *_p = (ptr); \
@@ -77,12 +159,29 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define simple_strtoul strtoul
+#define simple_strtol strtol
+#define kstrtol(a,b,c) ({*(c) = strtol(a,0,b);})
-#define min(x, y) (x < y ? x : y)
-#define max(x, y) (x > y ? x : y)
-#define min_t(type, _x, _y) (type)(_x) < (type)(_y) ? (type)(_x) : (_y)
-#define max_t(type, _x, _y) (type)(_x) > (type)(_y) ? (type)(_x) : (_y)
+#define min(x, y) ((x) < (y) ? (x) : (y))
+#define max(x, y) ((x) > (y) ? (x) : (y))
+#define min_t(type, _x, _y) ((type)(_x) < (type)(_y) ? (type)(_x) : (type)(_y))
+#define max_t(type, _x, _y) ((type)(_x) > (type)(_y) ? (type)(_x) : (type)(_y))
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
#define num_possible_cpus() mp_ncpus
+#define num_online_cpus() mp_ncpus
+typedef struct pm_message {
+ int event;
+} pm_message_t;
+
#endif /* _LINUX_KERNEL_H_ */
Added: trunk/sys/ofed/include/linux/kmod.h
===================================================================
--- trunk/sys/ofed/include/linux/kmod.h (rev 0)
+++ trunk/sys/ofed/include/linux/kmod.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iX Systems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_KMOD_H_
+#define _LINUX_KMOD_H_
+
+#include <sys/types.h>
+#include <sys/syscallsubr.h>
+#include <sys/refcount.h>
+#include <sys/sbuf.h>
+#include <machine/stdarg.h>
+#include <sys/proc.h>
+
+#define request_module(...) \
+({\
+ char modname[128]; \
+ int fileid; \
+ snprintf(modname, sizeof(modname), __VA_ARGS__); \
+ kern_kldload(curthread, modname, &fileid); \
+})
+
+#define request_module_nowait request_module
+
+
+#endif /* _LINUX_KMOD_H_ */
Property changes on: trunk/sys/ofed/include/linux/kmod.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/kobject.h
===================================================================
--- trunk/sys/ofed/include/linux/kobject.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/kobject.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -54,6 +55,8 @@
struct sysctl_oid *oidp;
};
+extern struct kobject *mm_kobj;
+
static inline void
kobject_init(struct kobject *kobj, struct kobj_type *ktype)
{
@@ -150,4 +153,17 @@
int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
struct kobject *parent, const char *fmt, ...);
+/* sysfs.h calles for 'kobject' which is defined here,
+ * so we need to add the include only after the 'kobject' def.
+ */
+#include <linux/sysfs.h>
+
+struct kobj_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count);
+};
+
#endif /* _LINUX_KOBJECT_H_ */
Modified: trunk/sys/ofed/include/linux/kref.h
===================================================================
--- trunk/sys/ofed/include/linux/kref.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/kref.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,6 +29,7 @@
#ifndef _LINUX_KREF_H_
#define _LINUX_KREF_H_
+#include <sys/types.h>
#include <sys/refcount.h>
struct kref {
@@ -59,4 +61,4 @@
return 0;
}
-#endif /* _KREF_H_ */
+#endif /* _LINUX_KREF_H_ */
Modified: trunk/sys/ofed/include/linux/kthread.h
===================================================================
--- trunk/sys/ofed/include/linux/kthread.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/kthread.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Added: trunk/sys/ofed/include/linux/ktime.h
===================================================================
--- trunk/sys/ofed/include/linux/ktime.h (rev 0)
+++ trunk/sys/ofed/include/linux/ktime.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,300 @@
+/*-
+ * Copyright (c) 2014 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_KTIME_H
+#define _LINUX_KTIME_H
+
+#include <sys/time.h>
+#include <linux/types.h>
+#include <linux/jiffies.h>
+
+
+/* Get the monotonic time in timespec format: */
+#define ktime_get_ts getnanouptime
+
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_SEC 1000000000L
+
+/*
+ * ktime_t:
+ *
+ * On 64-bit CPUs a single 64-bit variable is used to store the hrtimers
+ * internal representation of time values in scalar nanoseconds. The
+ * design plays out best on 64-bit CPUs, where most conversions are
+ * NOPs and most arithmetic ktime_t operations are plain arithmetic
+ * operations.
+ *
+ * On 32-bit CPUs an optimized representation of the timespec structure
+ * is used to avoid expensive conversions from and to timespecs. The
+ * endian-aware order of the tv struct members is chosen to allow
+ * mathematical operations on the tv64 member of the union too, which
+ * for certain operations produces better code.
+ *
+ * For architectures with efficient support for 64/32-bit conversions the
+ * plain scalar nanosecond based representation can be selected by the
+ * config switch CONFIG_KTIME_SCALAR.
+ */
+union ktime {
+ s64 tv64;
+#if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR)
+ struct {
+# ifdef __BIG_ENDIAN
+ s32 sec, nsec;
+# else
+ s32 nsec, sec;
+# endif
+ } tv;
+#endif
+};
+
+typedef union ktime ktime_t; /* Kill this */
+
+#define KTIME_MAX ((s64)~((u64)1 << 63))
+#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+
+/*
+ * ktime_t definitions when using the 64-bit scalar representation:
+ */
+
+#if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)
+
+/**
+ * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value
+ * @secs: seconds to set
+ * @nsecs: nanoseconds to set
+ *
+ * Return the ktime_t representation of the value
+ */
+static inline ktime_t ktime_set(const long secs, const unsigned long nsecs)
+{
+#if (BITS_PER_LONG == 64)
+ if (unlikely(secs >= KTIME_SEC_MAX))
+ return (ktime_t){ .tv64 = KTIME_MAX };
+#endif
+ return (ktime_t) { .tv64 = (s64)secs * NSEC_PER_SEC + (s64)nsecs };
+}
+
+/* Subtract two ktime_t variables. rem = lhs -rhs: */
+#define ktime_sub(lhs, rhs) \
+ ({ (ktime_t){ .tv64 = (lhs).tv64 - (rhs).tv64 }; })
+
+/* Add two ktime_t variables. res = lhs + rhs: */
+#define ktime_add(lhs, rhs) \
+ ({ (ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }; })
+
+/*
+ * Add a ktime_t variable and a scalar nanosecond value.
+ * res = kt + nsval:
+ */
+#define ktime_add_ns(kt, nsval) \
+ ({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; })
+
+/*
+ * Subtract a scalar nanosecod from a ktime_t variable
+ * res = kt - nsval:
+ */
+#define ktime_sub_ns(kt, nsval) \
+ ({ (ktime_t){ .tv64 = (kt).tv64 - (nsval) }; })
+
+/* convert a timespec to ktime_t format: */
+static inline ktime_t timespec_to_ktime(struct timespec ts)
+{
+ return ktime_set(ts.tv_sec, ts.tv_nsec);
+}
+
+/* convert a timeval to ktime_t format: */
+static inline ktime_t timeval_to_ktime(struct timeval tv)
+{
+ return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
+}
+
+/* Map the ktime_t to timespec conversion to ns_to_timespec function */
+#define ktime_to_timespec(kt) ns_to_timespec((kt).tv64)
+
+/* Map the ktime_t to timeval conversion to ns_to_timeval function */
+#define ktime_to_timeval(kt) ns_to_timeval((kt).tv64)
+
+/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
+#define ktime_to_ns(kt) ((kt).tv64)
+
+#else /* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
+
+/*
+ * Helper macros/inlines to get the ktime_t math right in the timespec
+ * representation. The macros are sometimes ugly - their actual use is
+ * pretty okay-ish, given the circumstances. We do all this for
+ * performance reasons. The pure scalar nsec_t based code was nice and
+ * simple, but created too many 64-bit / 32-bit conversions and divisions.
+ *
+ * Be especially aware that negative values are represented in a way
+ * that the tv.sec field is negative and the tv.nsec field is greater
+ * or equal to zero but less than nanoseconds per second. This is the
+ * same representation which is used by timespecs.
+ *
+ * tv.sec < 0 and 0 >= tv.nsec < NSEC_PER_SEC
+ */
+
+/* Set a ktime_t variable to a value in sec/nsec representation: */
+static inline ktime_t ktime_set(const long secs, const unsigned long nsecs)
+{
+ return (ktime_t) { .tv = { .sec = secs, .nsec = nsecs } };
+}
+
+/**
+ * ktime_sub - subtract two ktime_t variables
+ * @lhs: minuend
+ * @rhs: subtrahend
+ *
+ * Returns the remainder of the subtraction
+ */
+static inline ktime_t ktime_sub(const ktime_t lhs, const ktime_t rhs)
+{
+ ktime_t res;
+
+ res.tv64 = lhs.tv64 - rhs.tv64;
+ if (res.tv.nsec < 0)
+ res.tv.nsec += NSEC_PER_SEC;
+
+ return res;
+}
+
+/**
+ * ktime_add - add two ktime_t variables
+ * @add1: addend1
+ * @add2: addend2
+ *
+ * Returns the sum of @add1 and @add2.
+ */
+static inline ktime_t ktime_add(const ktime_t add1, const ktime_t add2)
+{
+ ktime_t res;
+
+ res.tv64 = add1.tv64 + add2.tv64;
+ /*
+ * performance trick: the (u32) -NSEC gives 0x00000000Fxxxxxxx
+ * so we subtract NSEC_PER_SEC and add 1 to the upper 32 bit.
+ *
+ * it's equivalent to:
+ * tv.nsec -= NSEC_PER_SEC
+ * tv.sec ++;
+ */
+ if (res.tv.nsec >= NSEC_PER_SEC)
+ res.tv64 += (u32)-NSEC_PER_SEC;
+
+ return res;
+}
+
+/**
+ * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
+ * @kt: addend
+ * @nsec: the scalar nsec value to add
+ *
+ * Returns the sum of @kt and @nsec in ktime_t format
+ */
+extern ktime_t ktime_add_ns(const ktime_t kt, u64 nsec);
+
+/**
+ * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
+ * @kt: minuend
+ * @nsec: the scalar nsec value to subtract
+ *
+ * Returns the subtraction of @nsec from @kt in ktime_t format
+ */
+extern ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec);
+
+/**
+ * timespec_to_ktime - convert a timespec to ktime_t format
+ * @ts: the timespec variable to convert
+ *
+ * Returns a ktime_t variable with the converted timespec value
+ */
+static inline ktime_t timespec_to_ktime(const struct timespec ts)
+{
+ return (ktime_t) { .tv = { .sec = (s32)ts.tv_sec,
+ .nsec = (s32)ts.tv_nsec } };
+}
+
+/**
+ * timeval_to_ktime - convert a timeval to ktime_t format
+ * @tv: the timeval variable to convert
+ *
+ * Returns a ktime_t variable with the converted timeval value
+ */
+static inline ktime_t timeval_to_ktime(const struct timeval tv)
+{
+ return (ktime_t) { .tv = { .sec = (s32)tv.tv_sec,
+ .nsec = (s32)(tv.tv_usec *
+ NSEC_PER_USEC) } };
+}
+
+/**
+ * ktime_to_timespec - convert a ktime_t variable to timespec format
+ * @kt: the ktime_t variable to convert
+ *
+ * Returns the timespec representation of the ktime value
+ */
+static inline struct timespec ktime_to_timespec(const ktime_t kt)
+{
+ return (struct timespec) { .tv_sec = (time_t) kt.tv.sec,
+ .tv_nsec = (long) kt.tv.nsec };
+}
+
+/**
+ * ktime_to_timeval - convert a ktime_t variable to timeval format
+ * @kt: the ktime_t variable to convert
+ *
+ * Returns the timeval representation of the ktime value
+ */
+static inline struct timeval ktime_to_timeval(const ktime_t kt)
+{
+ return (struct timeval) {
+ .tv_sec = (time_t) kt.tv.sec,
+ .tv_usec = (suseconds_t) (kt.tv.nsec / NSEC_PER_USEC) };
+}
+
+/**
+ * ktime_to_ns - convert a ktime_t variable to scalar nanoseconds
+ * @kt: the ktime_t variable to convert
+ *
+ * Returns the scalar nanoseconds representation of @kt
+ */
+static inline s64 ktime_to_ns(const ktime_t kt)
+{
+ return (s64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec;
+}
+
+#endif /* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
+
+static inline s64 ktime_get_ns(void)
+{
+ struct timespec ts;
+ ktime_t kt;
+ ktime_get_ts(&ts);
+ kt = timespec_to_ktime(ts);
+ return (ktime_to_ns(kt));
+}
+
+#endif /* _LINUX_KTIME_H */
Property changes on: trunk/sys/ofed/include/linux/ktime.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/linux_compat.c
===================================================================
--- trunk/sys/ofed/include/linux/linux_compat.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/linux_compat.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,6 +32,9 @@
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/sglist.h>
+#include <sys/sleepqueue.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/bus.h>
@@ -54,6 +58,8 @@
#include <linux/mm.h>
#include <linux/io.h>
#include <linux/vmalloc.h>
+#include <linux/timer.h>
+#include <linux/netdevice.h>
#include <vm/vm_pager.h>
@@ -65,12 +71,6 @@
#undef file
#undef cdev
#define RB_ROOT(head) (head)->rbh_root
-#undef LIST_HEAD
-/* From sys/queue.h */
-#define LIST_HEAD(name, type) \
-struct name { \
- struct type *lh_first; /* first element */ \
-}
struct kobject class_root;
struct device linux_rootdev;
@@ -77,8 +77,11 @@
struct class miscclass;
struct list_head pci_drivers;
struct list_head pci_devices;
+struct net init_net;
spinlock_t pci_lock;
+unsigned long linux_timer_hz_mask;
+
int
panic_cmp(struct rb_node *one, struct rb_node *two)
{
@@ -159,12 +162,26 @@
static void
kobject_kfree(struct kobject *kobj)
{
-
kfree(kobj);
}
+static void
+kobject_kfree_name(struct kobject *kobj)
+{
+ if (kobj) {
+ kfree(kobj->name);
+ }
+}
+
struct kobj_type kfree_type = { .release = kobject_kfree };
+static void
+dev_release(struct device *dev)
+{
+ pr_debug("dev_release: %s\n", dev_name(dev));
+ kfree(dev);
+}
+
struct device *
device_create(struct class *class, struct device *parent, dev_t devt,
void *drvdata, const char *fmt, ...)
@@ -177,6 +194,7 @@
dev->class = class;
dev->devt = devt;
dev->driver_data = drvdata;
+ dev->release = dev_release;
va_start(args, fmt);
kobject_set_name_vargs(&dev->kobj, fmt, args);
va_end(args);
@@ -211,7 +229,8 @@
struct linux_file *filp;
filp = cdp;
- filp->f_op->release(curthread->td_fpop->f_vnode, filp);
+ filp->f_op->release(filp->f_vnode, filp);
+ vdrop(filp->f_vnode);
kfree(filp);
}
@@ -231,6 +250,8 @@
filp->f_dentry = &filp->f_dentry_store;
filp->f_op = ldev->ops;
filp->f_flags = file->f_flag;
+ vhold(file->f_vnode);
+ filp->f_vnode = file->f_vnode;
if (filp->f_op->open) {
error = -filp->f_op->open(file->f_vnode, filp);
if (error) {
@@ -263,7 +284,8 @@
if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
return (error);
filp->f_flags = file->f_flag;
- devfs_clear_cdevpriv();
+ devfs_clear_cdevpriv();
+
return (0);
}
@@ -392,16 +414,6 @@
}
static int
-linux_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
- int nprot, vm_memattr_t *memattr)
-{
-
- /* XXX memattr not honored. */
- *paddr = offset;
- return (0);
-}
-
-static int
linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
vm_size_t size, struct vm_object **object, int nprot)
{
@@ -409,8 +421,6 @@
struct linux_file *filp;
struct file *file;
struct vm_area_struct vma;
- vm_paddr_t paddr;
- vm_page_t m;
int error;
file = curthread->td_fpop;
@@ -417,28 +427,35 @@
ldev = dev->si_drv1;
if (ldev == NULL)
return (ENODEV);
- if (size != PAGE_SIZE)
- return (EINVAL);
if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
return (error);
filp->f_flags = file->f_flag;
vma.vm_start = 0;
- vma.vm_end = PAGE_SIZE;
+ vma.vm_end = size;
vma.vm_pgoff = *offset / PAGE_SIZE;
vma.vm_pfn = 0;
- vma.vm_page_prot = 0;
+ vma.vm_page_prot = VM_MEMATTR_DEFAULT;
if (filp->f_op->mmap) {
error = -filp->f_op->mmap(filp, &vma);
if (error == 0) {
- paddr = (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT;
- *offset = paddr;
- m = PHYS_TO_VM_PAGE(paddr);
- *object = vm_pager_allocate(OBJT_DEVICE, dev,
- PAGE_SIZE, nprot, *offset, curthread->td_ucred);
- if (*object == NULL)
- return (EINVAL);
- if (vma.vm_page_prot != VM_MEMATTR_DEFAULT)
- pmap_page_set_memattr(m, vma.vm_page_prot);
+ struct sglist *sg;
+
+ sg = sglist_alloc(1, M_WAITOK);
+ sglist_append_phys(sg,
+ (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len);
+ *object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len,
+ nprot, 0, curthread->td_ucred);
+ if (*object == NULL) {
+ sglist_free(sg);
+ return (EINVAL);
+ }
+ *offset = 0;
+ if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) {
+ VM_OBJECT_LOCK(*object);
+ vm_object_set_memattr(*object,
+ vma.vm_page_prot);
+ VM_OBJECT_UNLOCK(*object);
+ }
}
} else
error = ENODEV;
@@ -455,7 +472,6 @@
.d_write = linux_dev_write,
.d_ioctl = linux_dev_ioctl,
.d_mmap_single = linux_dev_mmap_single,
- .d_mmap = linux_dev_mmap,
.d_poll = linux_dev_poll,
};
@@ -575,7 +591,9 @@
unsigned long vm_size;
};
-LIST_HEAD(vmmaphd, vmmap);
+struct vmmaphd {
+ struct vmmap *lh_first;
+};
#define VMMAP_HASH_SIZE 64
#define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1)
#define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK
@@ -666,9 +684,205 @@
kfree(vmmap);
}
+char *
+kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
+{
+ unsigned int len;
+ char *p;
+ va_list aq;
+
+ va_copy(aq, ap);
+ len = vsnprintf(NULL, 0, fmt, aq);
+ va_end(aq);
+
+ p = kmalloc(len + 1, gfp);
+ if (p != NULL)
+ vsnprintf(p, len + 1, fmt, ap);
+
+ return (p);
+}
+
+char *
+kasprintf(gfp_t gfp, const char *fmt, ...)
+{
+ va_list ap;
+ char *p;
+
+ va_start(ap, fmt);
+ p = kvasprintf(gfp, fmt, ap);
+ va_end(ap);
+
+ return (p);
+}
+
+static int
+linux_timer_jiffies_until(unsigned long expires)
+{
+ int delta = expires - jiffies;
+ /* guard against already expired values */
+ if (delta < 1)
+ delta = 1;
+ return (delta);
+}
+
static void
-linux_compat_init(void)
+linux_timer_callback_wrapper(void *context)
{
+ struct timer_list *timer;
+
+ timer = context;
+ timer->function(timer->data);
+}
+
+void
+mod_timer(struct timer_list *timer, unsigned long expires)
+{
+
+ timer->expires = expires;
+ callout_reset(&timer->timer_callout,
+ linux_timer_jiffies_until(expires),
+ &linux_timer_callback_wrapper, timer);
+}
+
+void
+add_timer(struct timer_list *timer)
+{
+
+ callout_reset(&timer->timer_callout,
+ linux_timer_jiffies_until(timer->expires),
+ &linux_timer_callback_wrapper, timer);
+}
+
+static void
+linux_timer_init(void *arg)
+{
+
+ /*
+ * Compute an internal HZ value which can divide 2**32 to
+ * avoid timer rounding problems when the tick value wraps
+ * around 2**32:
+ */
+ linux_timer_hz_mask = 1;
+ while (linux_timer_hz_mask < (unsigned long)hz)
+ linux_timer_hz_mask *= 2;
+ linux_timer_hz_mask--;
+}
+SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL);
+
+void
+linux_complete_common(struct completion *c, int all)
+{
+ int wakeup_swapper;
+
+ sleepq_lock(c);
+ c->done++;
+ if (all)
+ wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
+ else
+ wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
+ sleepq_release(c);
+ if (wakeup_swapper)
+ kick_proc0();
+}
+
+/*
+ * Indefinite wait for done != 0 with or without signals.
+ */
+long
+linux_wait_for_common(struct completion *c, int flags)
+{
+
+ if (flags != 0)
+ flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
+ else
+ flags = SLEEPQ_SLEEP;
+ for (;;) {
+ sleepq_lock(c);
+ if (c->done)
+ break;
+ sleepq_add(c, NULL, "completion", flags, 0);
+ if (flags & SLEEPQ_INTERRUPTIBLE) {
+ if (sleepq_wait_sig(c, 0) != 0)
+ return (-ERESTARTSYS);
+ } else
+ sleepq_wait(c, 0);
+ }
+ c->done--;
+ sleepq_release(c);
+
+ return (0);
+}
+
+/*
+ * Time limited wait for done != 0 with or without signals.
+ */
+long
+linux_wait_for_timeout_common(struct completion *c, long timeout, int flags)
+{
+ long end = jiffies + timeout;
+
+ if (flags != 0)
+ flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP;
+ else
+ flags = SLEEPQ_SLEEP;
+ for (;;) {
+ int ret;
+
+ sleepq_lock(c);
+ if (c->done)
+ break;
+ sleepq_add(c, NULL, "completion", flags, 0);
+ sleepq_set_timeout(c, linux_timer_jiffies_until(end));
+ if (flags & SLEEPQ_INTERRUPTIBLE)
+ ret = sleepq_timedwait_sig(c, 0);
+ else
+ ret = sleepq_timedwait(c, 0);
+ if (ret != 0) {
+ /* check for timeout or signal */
+ if (ret == EWOULDBLOCK)
+ return (0);
+ else
+ return (-ERESTARTSYS);
+ }
+ }
+ c->done--;
+ sleepq_release(c);
+
+ /* return how many jiffies are left */
+ return (linux_timer_jiffies_until(end));
+}
+
+int
+linux_try_wait_for_completion(struct completion *c)
+{
+ int isdone;
+
+ isdone = 1;
+ sleepq_lock(c);
+ if (c->done)
+ c->done--;
+ else
+ isdone = 0;
+ sleepq_release(c);
+ return (isdone);
+}
+
+int
+linux_completion_done(struct completion *c)
+{
+ int isdone;
+
+ isdone = 1;
+ sleepq_lock(c);
+ if (c->done == 0)
+ isdone = 0;
+ sleepq_release(c);
+ return (isdone);
+}
+
+static void
+linux_compat_init(void *arg)
+{
struct sysctl_oid *rootoid;
int i;
@@ -695,3 +909,12 @@
}
SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL);
+
+static void
+linux_compat_uninit(void *arg)
+{
+ kobject_kfree_name(&class_root);
+ kobject_kfree_name(&linux_rootdev.kobj);
+ kobject_kfree_name(&miscclass.kobj);
+}
+SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL);
Property changes on: trunk/sys/ofed/include/linux/linux_compat.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/linux_idr.c
===================================================================
--- trunk/sys/ofed/include/linux/linux_idr.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/linux_idr.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,7 +50,7 @@
* however it should be fairly fast. It is basically a radix tree with
* a builtin bitmap for allocation.
*/
-MALLOC_DEFINE(M_IDR, "idr", "Linux IDR compat");
+static MALLOC_DEFINE(M_IDR, "idr", "Linux IDR compat");
static inline int
idr_max(struct idr *idr)
@@ -76,6 +77,7 @@
{
struct idr_layer *il, *iln;
+ idr_remove_all(idr);
mtx_lock(&idr->lock);
for (il = idr->free; il != NULL; il = iln) {
iln = il->ary[0];
Property changes on: trunk/sys/ofed/include/linux/linux_idr.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/linux_radix.c
===================================================================
--- trunk/sys/ofed/include/linux/linux_radix.c 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/linux_radix.c 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,7 +38,7 @@
#include <linux/radix-tree.h>
#include <linux/err.h>
-MALLOC_DEFINE(M_RADIX, "radix", "Linux radix compat");
+static MALLOC_DEFINE(M_RADIX, "radix", "Linux radix compat");
static inline int
radix_max(struct radix_tree_root *root)
@@ -123,40 +124,84 @@
radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item)
{
struct radix_tree_node *node;
+ struct radix_tree_node *temp[RADIX_TREE_MAX_HEIGHT - 1];
int height;
int idx;
- /*
- * Expand the tree to fit indexes as big as requested.
- */
- while (root->rnode == NULL || radix_max(root) < index) {
+ /* bail out upon insertion of a NULL item */
+ if (item == NULL)
+ return (-EINVAL);
+
+ /* get root node, if any */
+ node = root->rnode;
+
+ /* allocate root node, if any */
+ if (node == NULL) {
node = malloc(sizeof(*node), M_RADIX, root->gfp_mask | M_ZERO);
if (node == NULL)
return (-ENOMEM);
- node->slots[0] = root->rnode;
- if (root->rnode)
- node->count++;
root->rnode = node;
root->height++;
}
- node = root->rnode;
- height = root->height - 1;
- /*
- * Walk down the tree finding the correct node and allocating any
- * missing nodes along the way.
- */
- while (height) {
- idx = radix_pos(index, height);
- if (node->slots[idx] == NULL) {
- node->slots[idx] = malloc(sizeof(*node), M_RADIX,
- root->gfp_mask | M_ZERO);
- if (node->slots[idx] == NULL)
+
+ /* expand radix tree as needed */
+ while (radix_max(root) < index) {
+
+ /* check if the radix tree is getting too big */
+ if (root->height == RADIX_TREE_MAX_HEIGHT)
+ return (-E2BIG);
+
+ /*
+ * If the root radix level is not empty, we need to
+ * allocate a new radix level:
+ */
+ if (node->count != 0) {
+ node = malloc(sizeof(*node), M_RADIX, root->gfp_mask | M_ZERO);
+ if (node == NULL)
return (-ENOMEM);
+ node->slots[0] = root->rnode;
node->count++;
+ root->rnode = node;
}
+ root->height++;
+ }
+
+ /* get radix tree height index */
+ height = root->height - 1;
+
+ /* walk down the tree until the first missing node, if any */
+ for ( ; height != 0; height--) {
+ idx = radix_pos(index, height);
+ if (node->slots[idx] == NULL)
+ break;
node = node->slots[idx];
- height--;
}
+
+ /* allocate the missing radix levels, if any */
+ for (idx = 0; idx != height; idx++) {
+ temp[idx] = malloc(sizeof(*node), M_RADIX,
+ root->gfp_mask | M_ZERO);
+ if (temp[idx] == NULL) {
+ while(idx--)
+ free(temp[idx], M_RADIX);
+ /* check if we should free the root node aswell */
+ if (root->rnode->count == 0) {
+ free(root->rnode, M_RADIX);
+ root->rnode = NULL;
+ root->height = 0;
+ }
+ return (-ENOMEM);
+ }
+ }
+
+ /* setup new radix levels, if any */
+ for ( ; height != 0; height--) {
+ idx = radix_pos(index, height);
+ node->slots[idx] = temp[height - 1];
+ node->count++;
+ node = node->slots[idx];
+ }
+
/*
* Insert and adjust count if the item does not already exist.
*/
Property changes on: trunk/sys/ofed/include/linux/linux_radix.c
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/list.h
===================================================================
--- trunk/sys/ofed/include/linux/list.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/list.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,6 +40,7 @@
#include <sys/kernel.h>
#include <sys/queue.h>
#include <sys/cpuset.h>
+#include <sys/jail.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
@@ -51,9 +53,11 @@
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_media.h>
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
@@ -111,6 +115,9 @@
#define list_entry(ptr, type, field) container_of(ptr, type, field)
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
#define list_for_each(p, head) \
for (p = (head)->next; p != (head); p = p->next)
@@ -304,6 +311,66 @@
new->first->pprev = &new->first;
old->first = NULL;
}
+
+/**
+ * list_is_singular - tests whether a list has just one entry.
+ * @head: the list to test.
+ */
+static inline int list_is_singular(const struct list_head *head)
+{
+ return !list_empty(head) && (head->next == head->prev);
+}
+
+static inline void __list_cut_position(struct list_head *list,
+ struct list_head *head, struct list_head *entry)
+{
+ struct list_head *new_first = entry->next;
+ list->next = head->next;
+ list->next->prev = list;
+ list->prev = entry;
+ entry->next = list;
+ head->next = new_first;
+ new_first->prev = head;
+}
+
+/**
+ * list_cut_position - cut a list into two
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ * and if so we won't cut the list
+ *
+ * This helper moves the initial part of @head, up to and
+ * including @entry, from @head to @list. You should
+ * pass on @entry an element you know is on @head. @list
+ * should be an empty list or a list you do not care about
+ * losing its data.
+ *
+ */
+static inline void list_cut_position(struct list_head *list,
+ struct list_head *head, struct list_head *entry)
+{
+ if (list_empty(head))
+ return;
+ if (list_is_singular(head) &&
+ (head->next != entry && head != entry))
+ return;
+ if (entry == head)
+ INIT_LIST_HEAD(list);
+ else
+ __list_cut_position(list, head, entry);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list->next == head;
+}
#define hlist_entry(ptr, type, field) container_of(ptr, type, field)
@@ -324,9 +391,10 @@
#define hlist_for_each_entry_from(tp, p, field) \
for (; p ? (tp = hlist_entry(p, typeof(*tp), field)): NULL; p = p->next)
-#define hlist_for_each_entry_safe(tp, p, n, head, field) \
- for (p = (head)->first; p ? \
- (n = p->next) | (tp = hlist_entry(p, typeof(*tp), field)) : \
- NULL; p = n)
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \
+ for (pos = (head)->first; \
+ (pos) != 0 && ({ n = (pos)->next; \
+ tpos = hlist_entry((pos), typeof(*(tpos)), member); 1;}); \
+ pos = (n))
#endif /* _LINUX_LIST_H_ */
Modified: trunk/sys/ofed/include/linux/lockdep.h
===================================================================
--- trunk/sys/ofed/include/linux/lockdep.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/lockdep.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,4 +35,6 @@
#define lockdep_set_class(lock, key)
-#endif /* _LINUX_LOCKDEP_H_ */
+#define lockdep_set_class_and_name(lock, key, name)
+
+#endif /* _LINUX_LOCKDEP_H_ */
Modified: trunk/sys/ofed/include/linux/log2.h
===================================================================
--- trunk/sys/ofed/include/linux/log2.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/log2.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -51,10 +52,121 @@
return (1UL << (flsl(x) - 1));
}
-static inline unsigned long
-ilog2(unsigned long x)
+
+/*
+ * deal with unrepresentable constant logarithms
+ */
+extern __attribute__((const, noreturn))
+int ____ilog2_NaN(void);
+
+/*
+ * non-constant log of base 2 calculators
+ * - the arch may override these in asm/bitops.h if they can be implemented
+ * more efficiently than using fls() and fls64()
+ * - the arch is not required to handle n==0 if implementing the fallback
+ */
+#ifndef CONFIG_ARCH_HAS_ILOG2_U32
+static inline __attribute__((const))
+int __ilog2_u32(u32 n)
{
- return (flsl(x) - 1);
+ return flsl(n) - 1;
}
+#endif
+#ifndef CONFIG_ARCH_HAS_ILOG2_U64
+static inline __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+ return flsl(n) - 1;
+}
+#endif
+
+
+/**
+ * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
+ * @n - parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ * the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n) < 1 ? ____ilog2_NaN() : \
+ (n) & (1ULL << 63) ? 63 : \
+ (n) & (1ULL << 62) ? 62 : \
+ (n) & (1ULL << 61) ? 61 : \
+ (n) & (1ULL << 60) ? 60 : \
+ (n) & (1ULL << 59) ? 59 : \
+ (n) & (1ULL << 58) ? 58 : \
+ (n) & (1ULL << 57) ? 57 : \
+ (n) & (1ULL << 56) ? 56 : \
+ (n) & (1ULL << 55) ? 55 : \
+ (n) & (1ULL << 54) ? 54 : \
+ (n) & (1ULL << 53) ? 53 : \
+ (n) & (1ULL << 52) ? 52 : \
+ (n) & (1ULL << 51) ? 51 : \
+ (n) & (1ULL << 50) ? 50 : \
+ (n) & (1ULL << 49) ? 49 : \
+ (n) & (1ULL << 48) ? 48 : \
+ (n) & (1ULL << 47) ? 47 : \
+ (n) & (1ULL << 46) ? 46 : \
+ (n) & (1ULL << 45) ? 45 : \
+ (n) & (1ULL << 44) ? 44 : \
+ (n) & (1ULL << 43) ? 43 : \
+ (n) & (1ULL << 42) ? 42 : \
+ (n) & (1ULL << 41) ? 41 : \
+ (n) & (1ULL << 40) ? 40 : \
+ (n) & (1ULL << 39) ? 39 : \
+ (n) & (1ULL << 38) ? 38 : \
+ (n) & (1ULL << 37) ? 37 : \
+ (n) & (1ULL << 36) ? 36 : \
+ (n) & (1ULL << 35) ? 35 : \
+ (n) & (1ULL << 34) ? 34 : \
+ (n) & (1ULL << 33) ? 33 : \
+ (n) & (1ULL << 32) ? 32 : \
+ (n) & (1ULL << 31) ? 31 : \
+ (n) & (1ULL << 30) ? 30 : \
+ (n) & (1ULL << 29) ? 29 : \
+ (n) & (1ULL << 28) ? 28 : \
+ (n) & (1ULL << 27) ? 27 : \
+ (n) & (1ULL << 26) ? 26 : \
+ (n) & (1ULL << 25) ? 25 : \
+ (n) & (1ULL << 24) ? 24 : \
+ (n) & (1ULL << 23) ? 23 : \
+ (n) & (1ULL << 22) ? 22 : \
+ (n) & (1ULL << 21) ? 21 : \
+ (n) & (1ULL << 20) ? 20 : \
+ (n) & (1ULL << 19) ? 19 : \
+ (n) & (1ULL << 18) ? 18 : \
+ (n) & (1ULL << 17) ? 17 : \
+ (n) & (1ULL << 16) ? 16 : \
+ (n) & (1ULL << 15) ? 15 : \
+ (n) & (1ULL << 14) ? 14 : \
+ (n) & (1ULL << 13) ? 13 : \
+ (n) & (1ULL << 12) ? 12 : \
+ (n) & (1ULL << 11) ? 11 : \
+ (n) & (1ULL << 10) ? 10 : \
+ (n) & (1ULL << 9) ? 9 : \
+ (n) & (1ULL << 8) ? 8 : \
+ (n) & (1ULL << 7) ? 7 : \
+ (n) & (1ULL << 6) ? 6 : \
+ (n) & (1ULL << 5) ? 5 : \
+ (n) & (1ULL << 4) ? 4 : \
+ (n) & (1ULL << 3) ? 3 : \
+ (n) & (1ULL << 2) ? 2 : \
+ (n) & (1ULL << 1) ? 1 : \
+ (n) & (1ULL << 0) ? 0 : \
+ ____ilog2_NaN() \
+ ) : \
+ (sizeof(n) <= 4) ? \
+ __ilog2_u32(n) : \
+ __ilog2_u64(n) \
+ )
+
+#define order_base_2(x) ilog2(roundup_pow_of_two(x))
+
#endif /* _LINUX_LOG2_H_ */
Added: trunk/sys/ofed/include/linux/math64.h
===================================================================
--- trunk/sys/ofed/include/linux/math64.h (rev 0)
+++ trunk/sys/ofed/include/linux/math64.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,133 @@
+/*-
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2014 Mellanox Technologies, Ltd. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_MATH64_H
+#define _LINUX_MATH64_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#if BITS_PER_LONG == 64
+
+# define do_div(n, base) ({ \
+ uint32_t __base = (base); \
+ uint32_t __rem; \
+ __rem = ((uint64_t)(n)) % __base; \
+ (n) = ((uint64_t)(n)) / __base; \
+ __rem; \
+})
+
+/**
+* div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
+*
+* This is commonly provided by 32bit archs to provide an optimized 64bit
+* divide.
+*/
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+
+#elif BITS_PER_LONG == 32
+
+static uint32_t __div64_32(uint64_t *n, uint32_t base)
+{
+ uint64_t rem = *n;
+ uint64_t b = base;
+ uint64_t res, d = 1;
+ uint32_t high = rem >> 32;
+
+ /* Reduce the thing a bit first */
+ res = 0;
+ if (high >= base) {
+ high /= base;
+ res = (uint64_t) high << 32;
+ rem -= (uint64_t) (high*base) << 32;
+ }
+
+ while ((int64_t)b > 0 && b < rem) {
+ b = b+b;
+ d = d+d;
+ }
+
+ do {
+ if (rem >= b) {
+ rem -= b;
+ res += d;
+ }
+ b >>= 1;
+ d >>= 1;
+ } while (d);
+
+ *n = res;
+ return rem;
+}
+
+# define do_div(n, base) ({ \
+ uint32_t __base = (base); \
+ uint32_t __rem; \
+ (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
+ if (likely(((n) >> 32) == 0)) { \
+ __rem = (uint32_t)(n) % __base; \
+ (n) = (uint32_t)(n) / __base; \
+ } else \
+ __rem = __div64_32(&(n), __base); \
+ __rem; \
+})
+
+#ifndef div_u64_rem
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ *remainder = do_div(dividend, divisor);
+ return dividend;
+}
+#endif
+
+
+#endif /* BITS_PER_LONG */
+
+
+
+/**
+ ** div_u64 - unsigned 64bit divide with 32bit divisor
+ **
+ ** This is the most common 64bit divide and should be used if possible,
+ ** as many 32bit archs can optimize this variant better than a full 64bit
+ ** divide.
+ * */
+#ifndef div_u64
+
+static inline u64 div_u64(u64 dividend, u32 divisor)
+{
+ u32 remainder;
+ return div_u64_rem(dividend, divisor, &remainder);
+}
+#endif
+
+#endif /* _LINUX_MATH64_H */
Property changes on: trunk/sys/ofed/include/linux/math64.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/miscdevice.h
===================================================================
--- trunk/sys/ofed/include/linux/miscdevice.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/miscdevice.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,6 +41,8 @@
const struct file_operations *fops;
struct cdev *cdev;
int minor;
+ const char *nodename;
+ umode_t mode;
};
extern struct class miscclass;
Modified: trunk/sys/ofed/include/linux/mlx4/cmd.h
===================================================================
--- trunk/sys/ofed/include/linux/mlx4/cmd.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/mlx4/cmd.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -34,6 +34,7 @@
#define MLX4_CMD_H
#include <linux/dma-mapping.h>
+#include <linux/types.h>
enum {
/* initialization and general commands */
@@ -59,6 +60,7 @@
MLX4_CMD_HW_HEALTH_CHECK = 0x50,
MLX4_CMD_SET_PORT = 0xc,
MLX4_CMD_SET_NODE = 0x5a,
+ MLX4_CMD_QUERY_FUNC = 0x56,
MLX4_CMD_ACCESS_DDR = 0x2e,
MLX4_CMD_MAP_ICM = 0xffa,
MLX4_CMD_UNMAP_ICM = 0xff9,
@@ -65,6 +67,9 @@
MLX4_CMD_MAP_ICM_AUX = 0xffc,
MLX4_CMD_UNMAP_ICM_AUX = 0xffb,
MLX4_CMD_SET_ICM_SIZE = 0xffd,
+ /*master notify fw on finish for slave's flr*/
+ MLX4_CMD_INFORM_FLR_DONE = 0x5b,
+ MLX4_CMD_GET_OP_REQ = 0x59,
/* TPT commands */
MLX4_CMD_SW2HW_MPT = 0xd,
@@ -107,6 +112,7 @@
MLX4_CMD_INIT2INIT_QP = 0x2d,
MLX4_CMD_SUSPEND_QP = 0x32,
MLX4_CMD_UNSUSPEND_QP = 0x33,
+ MLX4_CMD_UPDATE_QP = 0x61,
/* special QP and management commands */
MLX4_CMD_CONF_SPECIAL_QP = 0x23,
MLX4_CMD_MAD_IFC = 0x24,
@@ -119,7 +125,27 @@
/* miscellaneous commands */
MLX4_CMD_DIAG_RPRT = 0x30,
MLX4_CMD_NOP = 0x31,
+ MLX4_CMD_ACCESS_MEM = 0x2e,
+ MLX4_CMD_SET_VEP = 0x52,
+ /* Ethernet specific commands */
+ MLX4_CMD_SET_VLAN_FLTR = 0x47,
+ MLX4_CMD_SET_MCAST_FLTR = 0x48,
+ MLX4_CMD_DUMP_ETH_STATS = 0x49,
+
+ /* Communication channel commands */
+ MLX4_CMD_ARM_COMM_CHANNEL = 0x57,
+ MLX4_CMD_GEN_EQE = 0x58,
+
+ /* virtual commands */
+ MLX4_CMD_ALLOC_RES = 0xf00,
+ MLX4_CMD_FREE_RES = 0xf01,
+ MLX4_CMD_MCAST_ATTACH = 0xf05,
+ MLX4_CMD_UCAST_ATTACH = 0xf06,
+ MLX4_CMD_PROMISC = 0xf08,
+ MLX4_CMD_QUERY_FUNC_CAP = 0xf0a,
+ MLX4_CMD_QP_ATTACH = 0xf0b,
+
/* debug commands */
MLX4_CMD_QUERY_DEBUG_MSG = 0x2a,
MLX4_CMD_SET_DEBUG_MSG = 0x2b,
@@ -127,28 +153,41 @@
/* statistics commands */
MLX4_CMD_QUERY_IF_STAT = 0X54,
MLX4_CMD_SET_IF_STAT = 0X55,
+
+ /* register/delete flow steering network rules */
+ MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
+ MLX4_QP_FLOW_STEERING_DETACH = 0x66,
+ MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64,
};
enum {
- MLX4_CMD_TIME_CLASS_A = 10000,
- MLX4_CMD_TIME_CLASS_B = 10000,
- MLX4_CMD_TIME_CLASS_C = 10000,
+ MLX4_CMD_TIME_CLASS_A = 60000,
+ MLX4_CMD_TIME_CLASS_B = 60000,
+ MLX4_CMD_TIME_CLASS_C = 60000,
};
enum {
- MLX4_MAILBOX_SIZE = 4096
+ MLX4_MAILBOX_SIZE = 4096,
+ MLX4_ACCESS_MEM_ALIGN = 256,
};
enum {
/* set port opcode modifiers */
- MLX4_SET_PORT_GENERAL = 0x0,
- MLX4_SET_PORT_RQP_CALC = 0x1,
- MLX4_SET_PORT_MAC_TABLE = 0x2,
- MLX4_SET_PORT_VLAN_TABLE = 0x3,
- MLX4_SET_PORT_PRIO_MAP = 0x4,
- MLX4_SET_PORT_GID_TABLE = 0x5,
+ MLX4_SET_PORT_GENERAL = 0x0,
+ MLX4_SET_PORT_RQP_CALC = 0x1,
+ MLX4_SET_PORT_MAC_TABLE = 0x2,
+ MLX4_SET_PORT_VLAN_TABLE = 0x3,
+ MLX4_SET_PORT_PRIO_MAP = 0x4,
+ MLX4_SET_PORT_GID_TABLE = 0x5,
+ MLX4_SET_PORT_PRIO2TC = 0x8,
+ MLX4_SET_PORT_SCHEDULER = 0x9
};
+enum {
+ MLX4_CMD_WRAPPED,
+ MLX4_CMD_NATIVE
+};
+
struct mlx4_dev;
struct mlx4_cmd_mailbox {
@@ -158,23 +197,24 @@
int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
- u16 op, unsigned long timeout);
+ u16 op, unsigned long timeout, int native);
/* Invoke a command with no output parameter */
static inline int mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u32 in_modifier,
- u8 op_modifier, u16 op, unsigned long timeout)
+ u8 op_modifier, u16 op, unsigned long timeout,
+ int native)
{
return __mlx4_cmd(dev, in_param, NULL, 0, in_modifier,
- op_modifier, op, timeout);
+ op_modifier, op, timeout, native);
}
/* Invoke a command with an output mailbox */
static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param,
u32 in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout)
+ unsigned long timeout, int native)
{
return __mlx4_cmd(dev, in_param, &out_param, 0, in_modifier,
- op_modifier, op, timeout);
+ op_modifier, op, timeout, native);
}
/*
@@ -184,13 +224,35 @@
*/
static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
u32 in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout)
+ unsigned long timeout, int native)
{
return __mlx4_cmd(dev, in_param, out_param, 1, in_modifier,
- op_modifier, op, timeout);
+ op_modifier, op, timeout, native);
}
struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev);
void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox);
+u32 mlx4_comm_get_version(void);
+int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac);
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
+int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting);
+int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state);
+int mlx4_get_vf_link_state(struct mlx4_dev *dev, int port, int vf);
+/*
+ * mlx4_get_slave_default_vlan -
+ * retrun true if VST ( default vlan)
+ * if VST will fill vlan & qos (if not NULL)
+ */
+bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, u16 *vlan, u8 *qos);
+
+enum {
+ IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */
+ IFLA_VF_LINK_STATE_ENABLE, /* link always up */
+ IFLA_VF_LINK_STATE_DISABLE, /* link always down */
+ __IFLA_VF_LINK_STATE_MAX,
+};
+
+#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
+
#endif /* MLX4_CMD_H */
Property changes on: trunk/sys/ofed/include/linux/mlx4/cmd.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/mlx4/cq.h
===================================================================
--- trunk/sys/ofed/include/linux/mlx4/cq.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/mlx4/cq.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -42,17 +42,31 @@
__be32 vlan_my_qpn;
__be32 immed_rss_invalid;
__be32 g_mlpath_rqpn;
- __be16 sl_vid;
- __be16 rlid;
- __be16 status;
- u8 ipv6_ext_mask;
- u8 badfcs_enc;
+ union {
+ struct {
+ union {
+ struct {
+ __be16 sl_vid;
+ __be16 rlid;
+ };
+ __be32 timestamp_16_47;
+ };
+ __be16 status;
+ u8 ipv6_ext_mask;
+ u8 badfcs_enc;
+ };
+ struct {
+ __be16 reserved1;
+ u8 smac[6];
+ };
+ };
__be32 byte_cnt;
__be16 wqe_index;
__be16 checksum;
- u8 reserved[3];
+ u8 reserved2[1];
+ __be16 timestamp_0_15;
u8 owner_sr_opcode;
-};
+} __packed;
struct mlx4_err_cqe {
__be32 my_qpn;
@@ -64,9 +78,26 @@
u8 owner_sr_opcode;
};
+struct mlx4_ts_cqe {
+ __be32 vlan_my_qpn;
+ __be32 immed_rss_invalid;
+ __be32 g_mlpath_rqpn;
+ __be32 timestamp_hi;
+ __be16 status;
+ u8 ipv6_ext_mask;
+ u8 badfcs_enc;
+ __be32 byte_cnt;
+ __be16 wqe_index;
+ __be16 checksum;
+ u8 reserved;
+ __be16 timestamp_lo;
+ u8 owner_sr_opcode;
+} __packed;
+
enum {
MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29,
MLX4_CQE_QPN_MASK = 0xffffff,
+ MLX4_CQE_VID_MASK = 0xfff,
};
enum {
@@ -108,7 +139,7 @@
};
static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd,
- void __iomem *uar_page,
+ u8 __iomem *uar_page,
spinlock_t *doorbell_lock)
{
__be32 doorbell[2];
@@ -146,5 +177,5 @@
u16 count, u16 period);
int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
int entries, struct mlx4_mtt *mtt);
-
+int mlx4_cq_ignore_overrun(struct mlx4_dev *dev, struct mlx4_cq *cq);
#endif /* MLX4_CQ_H */
Property changes on: trunk/sys/ofed/include/linux/mlx4/cq.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/mlx4/device.h
===================================================================
--- trunk/sys/ofed/include/linux/mlx4/device.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/mlx4/device.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -36,47 +36,205 @@
#include <linux/pci.h>
#include <linux/completion.h>
#include <linux/radix-tree.h>
-
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/workqueue.h>
#include <asm/atomic.h>
-#include <linux/mlx4/driver.h>
+#include <linux/clocksource.h>
+#define MAX_MSIX_P_PORT 17
+#define MAX_MSIX 64
+#define MSIX_LEGACY_SZ 4
+#define MIN_MSIX_P_PORT 5
+
+#define MLX4_ROCE_MAX_GIDS 128
+#define MLX4_ROCE_PF_GIDS 16
+
+#define MLX4_NUM_UP 8
+#define MLX4_NUM_TC 8
+#define MLX4_MAX_100M_UNITS_VAL 255 /*
+ * work around: can't set values
+ * greater then this value when
+ * using 100 Mbps units.
+ */
+#define MLX4_RATELIMIT_100M_UNITS 3 /* 100 Mbps */
+#define MLX4_RATELIMIT_1G_UNITS 4 /* 1 Gbps */
+#define MLX4_RATELIMIT_DEFAULT 0x00ff
+
+#define CORE_CLOCK_MASK 0xffffffffffffULL
+
enum {
MLX4_FLAG_MSI_X = 1 << 0,
MLX4_FLAG_OLD_PORT_CMDS = 1 << 1,
+ MLX4_FLAG_MASTER = 1 << 2,
+ MLX4_FLAG_SLAVE = 1 << 3,
+ MLX4_FLAG_SRIOV = 1 << 4,
+ MLX4_FLAG_DEV_NUM_STR = 1 << 5,
+ MLX4_FLAG_OLD_REG_MAC = 1 << 6,
};
enum {
- MLX4_MAX_PORTS = 2
+ MLX4_PORT_CAP_IS_SM = 1 << 1,
+ MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19,
};
enum {
- MLX4_BOARD_ID_LEN = 64
+ MLX4_MAX_PORTS = 2,
+ MLX4_MAX_PORT_PKEYS = 128
};
+/* base qkey for use in sriov tunnel-qp/proxy-qp communication.
+ * These qkeys must not be allowed for general use. This is a 64k range,
+ * and to test for violation, we use the mask (protect against future chg).
+ */
+#define MLX4_RESERVED_QKEY_BASE (0xFFFF0000)
+#define MLX4_RESERVED_QKEY_MASK (0xFFFF0000)
+
enum {
- MLX4_DEV_CAP_FLAG_RC = 1 << 0,
- MLX4_DEV_CAP_FLAG_UC = 1 << 1,
- MLX4_DEV_CAP_FLAG_UD = 1 << 2,
- MLX4_DEV_CAP_FLAG_XRC = 1 << 3,
- MLX4_DEV_CAP_FLAG_SRQ = 1 << 6,
- MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1 << 7,
- MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8,
- MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9,
- MLX4_DEV_CAP_FLAG_DPDP = 1 << 12,
- MLX4_DEV_CAP_FLAG_RAW_ETY = 1 << 13,
- MLX4_DEV_CAP_FLAG_BLH = 1 << 15,
- MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16,
- MLX4_DEV_CAP_FLAG_APM = 1 << 17,
- MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18,
- MLX4_DEV_CAP_FLAG_RAW_MCAST = 1 << 19,
- MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1 << 20,
- MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21,
- MLX4_DEV_CAP_FLAG_IBOE = 1 << 30,
- MLX4_DEV_CAP_FLAG_FC_T11 = 1 << 31
+ MLX4_BOARD_ID_LEN = 64,
+ MLX4_VSD_LEN = 208
};
enum {
+ MLX4_MAX_NUM_PF = 16,
+ MLX4_MAX_NUM_VF = 64,
+ MLX4_MFUNC_MAX = 80,
+ MLX4_MAX_EQ_NUM = 1024,
+ MLX4_MFUNC_EQ_NUM = 4,
+ MLX4_MFUNC_MAX_EQES = 8,
+ MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1)
+};
+
+/* Driver supports 3 diffrent device methods to manage traffic steering:
+ * -device managed - High level API for ib and eth flow steering. FW is
+ * managing flow steering tables.
+ * - B0 steering mode - Common low level API for ib and (if supported) eth.
+ * - A0 steering mode - Limited low level API for eth. In case of IB,
+ * B0 mode is in use.
+ */
+enum {
+ MLX4_STEERING_MODE_A0,
+ MLX4_STEERING_MODE_B0,
+ MLX4_STEERING_MODE_DEVICE_MANAGED
+};
+
+static inline const char *mlx4_steering_mode_str(int steering_mode)
+{
+ switch (steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ return "A0 steering";
+
+ case MLX4_STEERING_MODE_B0:
+ return "B0 steering";
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return "Device managed flow steering";
+
+ default:
+ return "Unrecognize steering mode";
+ }
+}
+
+enum {
+ MLX4_DEV_CAP_FLAG_RC = 1LL << 0,
+ MLX4_DEV_CAP_FLAG_UC = 1LL << 1,
+ MLX4_DEV_CAP_FLAG_UD = 1LL << 2,
+ MLX4_DEV_CAP_FLAG_XRC = 1LL << 3,
+ MLX4_DEV_CAP_FLAG_SRQ = 1LL << 6,
+ MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1LL << 7,
+ MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8,
+ MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9,
+ MLX4_DEV_CAP_FLAG_DPDP = 1LL << 12,
+ MLX4_DEV_CAP_FLAG_BLH = 1LL << 15,
+ MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1LL << 16,
+ MLX4_DEV_CAP_FLAG_APM = 1LL << 17,
+ MLX4_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
+ MLX4_DEV_CAP_FLAG_RAW_MCAST = 1LL << 19,
+ MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1LL << 20,
+ MLX4_DEV_CAP_FLAG_UD_MCAST = 1LL << 21,
+ MLX4_DEV_CAP_FLAG_IBOE = 1LL << 30,
+ MLX4_DEV_CAP_FLAG_UC_LOOPBACK = 1LL << 32,
+ MLX4_DEV_CAP_FLAG_FCS_KEEP = 1LL << 34,
+ MLX4_DEV_CAP_FLAG_WOL_PORT1 = 1LL << 37,
+ MLX4_DEV_CAP_FLAG_WOL_PORT2 = 1LL << 38,
+ MLX4_DEV_CAP_FLAG_UDP_RSS = 1LL << 40,
+ MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41,
+ MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42,
+ MLX4_DEV_CAP_FLAG_CROSS_CHANNEL = 1LL << 44,
+ MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48,
+ MLX4_DEV_CAP_FLAG_COUNTERS_EXT = 1LL << 49,
+ MLX4_DEV_CAP_FLAG_SET_PORT_ETH_SCHED = 1LL << 53,
+ MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55,
+ MLX4_DEV_CAP_FLAG_FAST_DROP = 1LL << 57,
+ MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
+ MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61,
+ MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62
+};
+
+enum {
+ MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0,
+ MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1,
+ MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2,
+ MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3,
+ MLX4_DEV_CAP_FLAG2_FSM = 1LL << 4,
+ MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 5,
+ MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 6,
+ MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1LL << 7,
+ MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 8,
+ MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 9,
+ MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 10,
+ MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 11,
+ MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 12,
+ MLX4_DEV_CAP_FLAG2_TS = 1LL << 13,
+ MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW = 1LL << 14,
+ MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN = 1LL << 15,
+ MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 16,
+ MLX4_DEV_CAP_FLAG2_FS_EN_NCSI = 1LL << 17,
+ MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18,
+ MLX4_DEV_CAP_FLAG2_DMFS_TAG_MODE = 1LL << 19,
+ MLX4_DEV_CAP_FLAG2_ROCEV2 = 1LL << 20,
+ MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 21,
+ MLX4_DEV_CAP_FLAG2_CQE_STRIDE = 1LL << 22,
+ MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 23,
+ MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1LL << 24,
+ MLX4_DEV_CAP_FLAG2_RX_CSUM_MODE = 1LL << 25,
+};
+
+/* bit enums for an 8-bit flags field indicating special use
+ * QPs which require special handling in qp_reserve_range.
+ * Currently, this only includes QPs used by the ETH interface,
+ * where we expect to use blueflame. These QPs must not have
+ * bits 6 and 7 set in their qp number.
+ *
+ * This enum may use only bits 0..7.
+ */
+enum {
+ MLX4_RESERVE_BF_QP = 1 << 7,
+};
+
+enum {
+ MLX4_DEV_CAP_CQ_FLAG_IO = 1 << 0
+};
+
+enum {
+ MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0,
+ MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1
+};
+
+enum {
+ MLX4_USER_DEV_CAP_64B_CQE = 1L << 0
+};
+
+enum {
+ MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0
+};
+
+
+#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
+
+enum {
+ MLX4_BMME_FLAG_WIN_TYPE_2B = 1 << 1,
MLX4_BMME_FLAG_LOCAL_INV = 1 << 6,
MLX4_BMME_FLAG_REMOTE_INV = 1 << 7,
MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9,
@@ -102,7 +260,15 @@
MLX4_EVENT_TYPE_PORT_CHANGE = 0x09,
MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f,
MLX4_EVENT_TYPE_ECC_DETECT = 0x0e,
- MLX4_EVENT_TYPE_CMD = 0x0a
+ MLX4_EVENT_TYPE_CMD = 0x0a,
+ MLX4_EVENT_TYPE_VEP_UPDATE = 0x19,
+ MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18,
+ MLX4_EVENT_TYPE_OP_REQUIRED = 0x1a,
+ MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b,
+ MLX4_EVENT_TYPE_FLR_EVENT = 0x1c,
+ MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d,
+ MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT = 0x3e,
+ MLX4_EVENT_TYPE_NONE = 0xff,
};
enum {
@@ -111,11 +277,40 @@
};
enum {
+ MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE = 1,
+ MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE = 2,
+};
+
+enum {
+ MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0,
+};
+
+enum slave_port_state {
+ SLAVE_PORT_DOWN = 0,
+ SLAVE_PENDING_UP,
+ SLAVE_PORT_UP,
+};
+
+enum slave_port_gen_event {
+ SLAVE_PORT_GEN_EVENT_DOWN = 0,
+ SLAVE_PORT_GEN_EVENT_UP,
+ SLAVE_PORT_GEN_EVENT_NONE,
+};
+
+enum slave_port_state_event {
+ MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
+ MLX4_PORT_STATE_DEV_EVENT_PORT_UP,
+ MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
+ MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
+};
+
+enum {
MLX4_PERM_LOCAL_READ = 1 << 10,
MLX4_PERM_LOCAL_WRITE = 1 << 11,
MLX4_PERM_REMOTE_READ = 1 << 12,
MLX4_PERM_REMOTE_WRITE = 1 << 13,
- MLX4_PERM_ATOMIC = 1 << 14
+ MLX4_PERM_ATOMIC = 1 << 14,
+ MLX4_PERM_BIND_MW = 1 << 15,
};
enum {
@@ -126,7 +321,6 @@
MLX4_OPCODE_SEND = 0x0a,
MLX4_OPCODE_SEND_IMM = 0x0b,
MLX4_OPCODE_LSO = 0x0e,
- MLX4_OPCODE_BIG_LSO = 0x2e,
MLX4_OPCODE_RDMA_READ = 0x10,
MLX4_OPCODE_ATOMIC_CS = 0x11,
MLX4_OPCODE_ATOMIC_FA = 0x12,
@@ -150,14 +344,26 @@
MLX4_STAT_RATE_OFFSET = 5
};
+enum mlx4_protocol {
+ MLX4_PROT_IB_IPV6 = 0,
+ MLX4_PROT_ETH,
+ MLX4_PROT_IB_IPV4,
+ MLX4_PROT_FCOE
+};
+
enum {
MLX4_MTT_FLAG_PRESENT = 1
};
+enum {
+ MLX4_MAX_MTT_SHIFT = 31
+};
+
enum mlx4_qp_region {
MLX4_QP_REGION_FW = 0,
MLX4_QP_REGION_ETH_ADDR,
MLX4_QP_REGION_FC_ADDR,
+ MLX4_QP_REGION_FC_EXCH,
MLX4_NUM_QP_REGION
};
@@ -165,7 +371,8 @@
MLX4_PORT_TYPE_NONE = 0,
MLX4_PORT_TYPE_IB = 1,
MLX4_PORT_TYPE_ETH = 2,
- MLX4_PORT_TYPE_AUTO = 3
+ MLX4_PORT_TYPE_AUTO = 3,
+ MLX4_PORT_TYPE_NA = 4
};
enum mlx4_special_vlan_idx {
@@ -173,25 +380,63 @@
MLX4_VLAN_MISS_IDX,
MLX4_VLAN_REGULAR
};
-#define MLX4_LEAST_ATTACHED_VECTOR 0xffffffff
+enum mlx4_steer_type {
+ MLX4_MC_STEER = 0,
+ MLX4_UC_STEER,
+ MLX4_NUM_STEERS
+};
+
enum {
- MLX4_CUNTERS_DISABLED,
- MLX4_CUNTERS_BASIC,
- MLX4_CUNTERS_EXT
+ MLX4_NUM_FEXCH = 64 * 1024,
};
enum {
- MAX_FAST_REG_PAGES = 511,
+ MLX4_MAX_FAST_REG_PAGES = 511,
};
+enum {
+ MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14,
+ MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15,
+ MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16,
+};
+
+/* Port mgmt change event handling */
+enum {
+ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK = 1 << 0,
+ MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK = 1 << 1,
+ MLX4_EQ_PORT_INFO_LID_CHANGE_MASK = 1 << 2,
+ MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK = 1 << 3,
+ MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4,
+};
+
+#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
+ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
+
+enum mlx4_module_id {
+ MLX4_MODULE_ID_SFP = 0x3,
+ MLX4_MODULE_ID_QSFP = 0xC,
+ MLX4_MODULE_ID_QSFP_PLUS = 0xD,
+ MLX4_MODULE_ID_QSFP28 = 0x11,
+};
+
static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
{
return (major << 32) | (minor << 16) | subminor;
}
+struct mlx4_phys_caps {
+ u32 gid_phys_table_len[MLX4_MAX_PORTS + 1];
+ u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1];
+ u32 num_phys_eqs;
+ u32 base_sqpn;
+ u32 base_proxy_sqpn;
+ u32 base_tunnel_sqpn;
+};
+
struct mlx4_caps {
u64 fw_ver;
+ u32 function;
int num_ports;
int vl_cap[MLX4_MAX_PORTS + 1];
int ib_mtu_cap[MLX4_MAX_PORTS + 1];
@@ -206,6 +451,7 @@
u64 trans_code[MLX4_MAX_PORTS + 1];
int local_ca_ack_delay;
int num_uars;
+ u32 uar_page_size;
int bf_reg_size;
int bf_regs_per_page;
int max_sq_sg;
@@ -216,7 +462,10 @@
int max_rq_desc_sz;
int max_qp_init_rdma;
int max_qp_dest_rdma;
- int sqp_start;
+ u32 *qp0_proxy;
+ u32 *qp1_proxy;
+ u32 *qp0_tunnel;
+ u32 *qp1_tunnel;
int num_srqs;
int max_srq_wqes;
int max_srq_sge;
@@ -227,9 +476,10 @@
int num_eqs;
int reserved_eqs;
int num_comp_vectors;
+ int comp_pool;
int num_mpts;
- int num_mtt_segs;
- int mtts_per_seg;
+ int max_fmr_maps;
+ u64 num_mtts;
int fmr_reserved_mtts;
int reserved_mtts;
int reserved_mrws;
@@ -238,36 +488,49 @@
int num_amgms;
int reserved_mcgs;
int num_qp_per_mgm;
+ int steering_mode;
int num_pds;
int reserved_pds;
+ int max_xrcds;
+ int reserved_xrcds;
int mtt_entry_sz;
- int reserved_xrcds;
- int max_xrcds;
u32 max_msg_sz;
u32 page_size_cap;
u64 flags;
+ u64 flags2;
u32 bmme_flags;
u32 reserved_lkey;
u16 stat_rate_support;
- int udp_rss;
- int loopback_support;
- int wol;
+ u8 cq_timestamp;
u8 port_width_cap[MLX4_MAX_PORTS + 1];
int max_gso_sz;
+ int max_rss_tbl_sz;
int reserved_qps_cnt[MLX4_NUM_QP_REGION];
int reserved_qps;
int reserved_qps_base[MLX4_NUM_QP_REGION];
int log_num_macs;
int log_num_vlans;
- int log_num_prios;
enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
u8 supported_type[MLX4_MAX_PORTS + 1];
- enum mlx4_port_type port_mask[MLX4_MAX_PORTS + 1];
+ u8 suggested_type[MLX4_MAX_PORTS + 1];
+ u8 default_sense[MLX4_MAX_PORTS + 1];
+ u32 port_mask[MLX4_MAX_PORTS + 1];
enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1];
- u8 counters_mode;
+ u32 max_counters;
+ u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
+ u16 sqp_demux;
+ u32 sync_qp;
+ u32 cq_flags;
+ u32 eqe_size;
+ u32 cqe_size;
+ u8 eqe_factor;
+ u32 userspace_caps; /* userspace must be aware to */
+ u32 function_caps; /* functions must be aware to */
+ u8 fast_drop;
+ u16 hca_core_clock;
u32 max_basic_counters;
- u32 max_ext_counters;
- u32 mc_promisc_mode;
+ u32 max_extended_counters;
+ u8 def_counter_index[MLX4_MAX_PORTS + 1];
};
struct mlx4_buf_list {
@@ -284,7 +547,7 @@
};
struct mlx4_mtt {
- u32 first_seg;
+ u32 offset;
int order;
int page_shift;
};
@@ -331,6 +594,18 @@
int enabled;
};
+enum mlx4_mw_type {
+ MLX4_MW_TYPE_1 = 1,
+ MLX4_MW_TYPE_2 = 2,
+};
+
+struct mlx4_mw {
+ u32 key;
+ u32 pd;
+ enum mlx4_mw_type type;
+ int enabled;
+};
+
struct mlx4_fmr {
struct mlx4_mr mr;
struct mlx4_mpt_entry *mpt;
@@ -375,6 +650,8 @@
atomic_t refcount;
struct completion free;
+ int eqn;
+ u16 irq;
};
struct mlx4_qp {
@@ -422,7 +699,8 @@
u8 hop_limit;
__be32 sl_tclass_flowlabel;
u8 dgid[16];
- u32 reserved4[2];
+ u8 s_mac[6];
+ u8 reserved4[2];
__be16 vlan;
u8 mac[6];
};
@@ -432,52 +710,181 @@
struct mlx4_eth_av eth;
};
-struct mlx4_counters {
- __be32 counter_mode;
- __be32 num_ifc;
- u32 reserved[2];
- __be64 rx_frames;
- __be64 rx_bytes;
- __be64 tx_frames;
- __be64 tx_bytes;
+struct mlx4_if_stat_control {
+ u8 reserved1[3];
+ /* Extended counters enabled */
+ u8 cnt_mode;
+ /* Number of interfaces */
+ __be32 num_of_if;
+ __be32 reserved[2];
};
-struct mlx4_counters_ext {
- __be32 counter_mode;
- __be32 num_ifc;
- u32 reserved[2];
- __be64 rx_uni_frames;
- __be64 rx_uni_bytes;
- __be64 rx_mcast_frames;
- __be64 rx_mcast_bytes;
- __be64 rx_bcast_frames;
- __be64 rx_bcast_bytes;
- __be64 rx_nobuf_frames;
- __be64 rx_nobuf_bytes;
- __be64 rx_err_frames;
- __be64 rx_err_bytes;
- __be64 tx_uni_frames;
- __be64 tx_uni_bytes;
- __be64 tx_mcast_frames;
- __be64 tx_mcast_bytes;
- __be64 tx_bcast_frames;
- __be64 tx_bcast_bytes;
- __be64 tx_nobuf_frames;
- __be64 tx_nobuf_bytes;
- __be64 tx_err_frames;
- __be64 tx_err_bytes;
+struct mlx4_if_stat_basic {
+ struct mlx4_if_stat_control control;
+ struct {
+ __be64 IfRxFrames;
+ __be64 IfRxOctets;
+ __be64 IfTxFrames;
+ __be64 IfTxOctets;
+ } counters[];
};
+#define MLX4_IF_STAT_BSC_SZ(ports)(sizeof(struct mlx4_if_stat_extended) +\
+ sizeof(((struct mlx4_if_stat_extended *)0)->\
+ counters[0]) * ports)
+struct mlx4_if_stat_extended {
+ struct mlx4_if_stat_control control;
+ struct {
+ __be64 IfRxUnicastFrames;
+ __be64 IfRxUnicastOctets;
+ __be64 IfRxMulticastFrames;
+ __be64 IfRxMulticastOctets;
+ __be64 IfRxBroadcastFrames;
+ __be64 IfRxBroadcastOctets;
+ __be64 IfRxNoBufferFrames;
+ __be64 IfRxNoBufferOctets;
+ __be64 IfRxErrorFrames;
+ __be64 IfRxErrorOctets;
+ __be32 reserved[39];
+ __be64 IfTxUnicastFrames;
+ __be64 IfTxUnicastOctets;
+ __be64 IfTxMulticastFrames;
+ __be64 IfTxMulticastOctets;
+ __be64 IfTxBroadcastFrames;
+ __be64 IfTxBroadcastOctets;
+ __be64 IfTxDroppedFrames;
+ __be64 IfTxDroppedOctets;
+ __be64 IfTxRequestedFramesSent;
+ __be64 IfTxGeneratedFramesSent;
+ __be64 IfTxTsoOctets;
+ } __packed counters[];
+};
+#define MLX4_IF_STAT_EXT_SZ(ports) (sizeof(struct mlx4_if_stat_extended) +\
+ sizeof(((struct mlx4_if_stat_extended *)\
+ 0)->counters[0]) * ports)
+
+union mlx4_counter {
+ struct mlx4_if_stat_control control;
+ struct mlx4_if_stat_basic basic;
+ struct mlx4_if_stat_extended ext;
+};
+#define MLX4_IF_STAT_SZ(ports) MLX4_IF_STAT_EXT_SZ(ports)
+
+struct mlx4_quotas {
+ int qp;
+ int cq;
+ int srq;
+ int mpt;
+ int mtt;
+ int counter;
+ int xrcd;
+};
+
struct mlx4_dev {
struct pci_dev *pdev;
unsigned long flags;
+ unsigned long num_slaves;
struct mlx4_caps caps;
+ struct mlx4_phys_caps phys_caps;
+ struct mlx4_quotas quotas;
struct radix_tree_root qp_table_tree;
- struct radix_tree_root srq_table_tree;
- u32 rev_id;
+ u8 rev_id;
char board_id[MLX4_BOARD_ID_LEN];
+ u16 vsd_vendor_id;
+ char vsd[MLX4_VSD_LEN];
+ int num_vfs;
+ int numa_node;
+ int oper_log_mgm_entry_size;
+ u64 regid_promisc_array[MLX4_MAX_PORTS + 1];
+ u64 regid_allmulti_array[MLX4_MAX_PORTS + 1];
};
+struct mlx4_clock_params {
+ u64 offset;
+ u8 bar;
+ u8 size;
+};
+
+struct mlx4_eqe {
+ u8 reserved1;
+ u8 type;
+ u8 reserved2;
+ u8 subtype;
+ union {
+ u32 raw[6];
+ struct {
+ __be32 cqn;
+ } __packed comp;
+ struct {
+ u16 reserved1;
+ __be16 token;
+ u32 reserved2;
+ u8 reserved3[3];
+ u8 status;
+ __be64 out_param;
+ } __packed cmd;
+ struct {
+ __be32 qpn;
+ } __packed qp;
+ struct {
+ __be32 srqn;
+ } __packed srq;
+ struct {
+ __be32 cqn;
+ u32 reserved1;
+ u8 reserved2[3];
+ u8 syndrome;
+ } __packed cq_err;
+ struct {
+ u32 reserved1[2];
+ __be32 port;
+ } __packed port_change;
+ struct {
+ #define COMM_CHANNEL_BIT_ARRAY_SIZE 4
+ u32 reserved;
+ u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
+ } __packed comm_channel_arm;
+ struct {
+ u8 port;
+ u8 reserved[3];
+ __be64 mac;
+ } __packed mac_update;
+ struct {
+ __be32 slave_id;
+ } __packed flr_event;
+ struct {
+ __be16 current_temperature;
+ __be16 warning_threshold;
+ } __packed warming;
+ struct {
+ u8 reserved[3];
+ u8 port;
+ union {
+ struct {
+ __be16 mstr_sm_lid;
+ __be16 port_lid;
+ __be32 changed_attr;
+ u8 reserved[3];
+ u8 mstr_sm_sl;
+ __be64 gid_prefix;
+ } __packed port_info;
+ struct {
+ __be32 block_ptr;
+ __be32 tbl_entries_mask;
+ } __packed tbl_change_info;
+ } params;
+ } __packed port_mgmt_change;
+ struct {
+ u8 reserved[3];
+ u8 port;
+ u32 reserved1[5];
+ } __packed bad_cable;
+ } event;
+ u8 slave_id;
+ u8 reserved3[2];
+ u8 owner;
+} __packed;
+
struct mlx4_init_port_param {
int set_guid0;
int set_node_guid;
@@ -492,44 +899,107 @@
u64 si_guid;
};
-static inline void mlx4_query_steer_cap(struct mlx4_dev *dev, int *log_mac,
- int *log_vlan, int *log_prio)
-{
- *log_mac = dev->caps.log_num_macs;
- *log_vlan = dev->caps.log_num_vlans;
- *log_prio = dev->caps.log_num_prios;
-}
+#define MAD_IFC_DATA_SZ 192
+/* MAD IFC Mailbox */
+struct mlx4_mad_ifc {
+ u8 base_version;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+ __be16 status;
+ __be16 class_specific;
+ __be64 tid;
+ __be16 attr_id;
+ __be16 resv;
+ __be32 attr_mod;
+ __be64 mkey;
+ __be16 dr_slid;
+ __be16 dr_dlid;
+ u8 reserved[28];
+ u8 data[MAD_IFC_DATA_SZ];
+} __packed;
#define mlx4_foreach_port(port, dev, type) \
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if ((type) == (dev)->caps.port_mask[(port)])
+#define mlx4_foreach_non_ib_transport_port(port, dev) \
+ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
+ if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
+
#define mlx4_foreach_ib_transport_port(port, dev) \
- for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
+ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+#define MLX4_INVALID_SLAVE_ID 0xFF
+
+#define MLX4_SINK_COUNTER_INDEX 0xff
+
+void handle_port_mgmt_change_event(struct work_struct *work);
+
+static inline int mlx4_master_func_num(struct mlx4_dev *dev)
+{
+ return dev->caps.function;
+}
+
+static inline int mlx4_is_master(struct mlx4_dev *dev)
+{
+ return dev->flags & MLX4_FLAG_MASTER;
+}
+
+static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev)
+{
+ return dev->phys_caps.base_sqpn + 8 +
+ 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev);
+}
+
+static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
+{
+ return (qpn < dev->phys_caps.base_sqpn + 8 +
+ 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev));
+}
+
+static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn)
+{
+ int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8;
+
+ if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8)
+ return 1;
+
+ return 0;
+}
+
+static inline int mlx4_is_mfunc(struct mlx4_dev *dev)
+{
+ return dev->flags & (MLX4_FLAG_SLAVE | MLX4_FLAG_MASTER);
+}
+
+static inline int mlx4_is_slave(struct mlx4_dev *dev)
+{
+ return dev->flags & MLX4_FLAG_SLAVE;
+}
+
int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
struct mlx4_buf *buf);
void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
{
- if (buf->direct.buf != NULL)
- return buf->direct.buf + offset;
+ if (BITS_PER_LONG == 64 || buf->nbufs == 1)
+ return (u8 *)buf->direct.buf + offset;
else
- return buf->page_list[offset >> PAGE_SHIFT].buf +
+ return (u8 *)buf->page_list[offset >> PAGE_SHIFT].buf +
(offset & (PAGE_SIZE - 1));
}
int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
-
int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf);
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node);
void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf);
int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
@@ -536,20 +1006,15 @@
struct mlx4_mtt *mtt);
void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
-int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);
-int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
-
-int mlx4_mr_reserve_range(struct mlx4_dev *dev, int cnt, int align, u32 *base_mridx);
-void mlx4_mr_release_range(struct mlx4_dev *dev, u32 base_mridx, int cnt);
-int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
- u64 iova, u64 size, u32 access, int npages,
- int page_shift, struct mlx4_mr *mr);
int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
int npages, int page_shift, struct mlx4_mr *mr);
-void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr);
-void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
+int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr);
+int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type,
+ struct mlx4_mw *mw);
+void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw);
+int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw);
int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int start_index, int npages, u64 *page_list);
int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
@@ -565,16 +1030,17 @@
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
- unsigned vector, int collapsed);
+ unsigned vector, int collapsed, int timestamp_en);
void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
-int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 flags);
void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
-int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
+int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq);
void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
@@ -583,41 +1049,279 @@
int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
+int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot);
+int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot);
int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- int block_mcast_loopback, enum mlx4_mcast_prot prot);
+ u8 port, int block_mcast_loopback,
+ enum mlx4_protocol protocol, u64 *reg_id);
int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- enum mlx4_mcast_prot prot);
+ enum mlx4_protocol protocol, u64 reg_id);
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index);
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
+enum {
+ MLX4_DOMAIN_UVERBS = 0x1000,
+ MLX4_DOMAIN_ETHTOOL = 0x2000,
+ MLX4_DOMAIN_RFS = 0x3000,
+ MLX4_DOMAIN_NIC = 0x5000,
+};
+enum mlx4_net_trans_rule_id {
+ MLX4_NET_TRANS_RULE_ID_ETH = 0,
+ MLX4_NET_TRANS_RULE_ID_IB,
+ MLX4_NET_TRANS_RULE_ID_IPV6,
+ MLX4_NET_TRANS_RULE_ID_IPV4,
+ MLX4_NET_TRANS_RULE_ID_TCP,
+ MLX4_NET_TRANS_RULE_ID_UDP,
+ MLX4_NET_TRANS_RULE_NUM, /* should be last */
+ MLX4_NET_TRANS_RULE_DUMMY = -1, /* force enum to be signed */
+};
+
+extern const u16 __sw_id_hw[];
+
+static inline int map_hw_to_sw_id(u16 header_id)
+{
+
+ int i;
+ for (i = 0; i < MLX4_NET_TRANS_RULE_NUM; i++) {
+ if (header_id == __sw_id_hw[i])
+ return i;
+ }
+ return -EINVAL;
+}
+
+enum mlx4_net_trans_promisc_mode {
+ MLX4_FS_REGULAR = 1,
+ MLX4_FS_ALL_DEFAULT,
+ MLX4_FS_MC_DEFAULT,
+ MLX4_FS_UC_SNIFFER,
+ MLX4_FS_MC_SNIFFER,
+ MLX4_FS_MODE_NUM, /* should be last */
+ MLX4_FS_MODE_DUMMY = -1, /* force enum to be signed */
+};
+
+struct mlx4_spec_eth {
+ u8 dst_mac[6];
+ u8 dst_mac_msk[6];
+ u8 src_mac[6];
+ u8 src_mac_msk[6];
+ u8 ether_type_enable;
+ __be16 ether_type;
+ __be16 vlan_id_msk;
+ __be16 vlan_id;
+};
+
+struct mlx4_spec_tcp_udp {
+ __be16 dst_port;
+ __be16 dst_port_msk;
+ __be16 src_port;
+ __be16 src_port_msk;
+};
+
+struct mlx4_spec_ipv4 {
+ __be32 dst_ip;
+ __be32 dst_ip_msk;
+ __be32 src_ip;
+ __be32 src_ip_msk;
+};
+
+struct mlx4_spec_ib {
+ __be32 l3_qpn;
+ __be32 qpn_msk;
+ u8 dst_gid[16];
+ u8 dst_gid_msk[16];
+};
+
+struct mlx4_spec_list {
+ struct list_head list;
+ enum mlx4_net_trans_rule_id id;
+ union {
+ struct mlx4_spec_eth eth;
+ struct mlx4_spec_ib ib;
+ struct mlx4_spec_ipv4 ipv4;
+ struct mlx4_spec_tcp_udp tcp_udp;
+ };
+};
+
+enum mlx4_net_trans_hw_rule_queue {
+ MLX4_NET_TRANS_Q_FIFO,
+ MLX4_NET_TRANS_Q_LIFO,
+};
+
+struct mlx4_net_trans_rule {
+ struct list_head list;
+ enum mlx4_net_trans_hw_rule_queue queue_mode;
+ bool exclusive;
+ bool allow_loopback;
+ enum mlx4_net_trans_promisc_mode promisc_mode;
+ u8 port;
+ u16 priority;
+ u32 qpn;
+};
+
+struct mlx4_net_trans_rule_hw_ctrl {
+ __be16 prio;
+ u8 type;
+ u8 flags;
+ u8 rsvd1;
+ u8 funcid;
+ u8 vep;
+ u8 port;
+ __be32 qpn;
+ __be32 rsvd2;
+};
+
+struct mlx4_net_trans_rule_hw_ib {
+ u8 size;
+ u8 rsvd1;
+ __be16 id;
+ u32 rsvd2;
+ __be32 l3_qpn;
+ __be32 qpn_mask;
+ u8 dst_gid[16];
+ u8 dst_gid_msk[16];
+} __packed;
+
+struct mlx4_net_trans_rule_hw_eth {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ u8 rsvd1[6];
+ u8 dst_mac[6];
+ u16 rsvd2;
+ u8 dst_mac_msk[6];
+ u16 rsvd3;
+ u8 src_mac[6];
+ u16 rsvd4;
+ u8 src_mac_msk[6];
+ u8 rsvd5;
+ u8 ether_type_enable;
+ __be16 ether_type;
+ __be16 vlan_tag_msk;
+ __be16 vlan_tag;
+} __packed;
+
+struct mlx4_net_trans_rule_hw_tcp_udp {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ __be16 rsvd1[3];
+ __be16 dst_port;
+ __be16 rsvd2;
+ __be16 dst_port_msk;
+ __be16 rsvd3;
+ __be16 src_port;
+ __be16 rsvd4;
+ __be16 src_port_msk;
+} __packed;
+
+struct mlx4_net_trans_rule_hw_ipv4 {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ __be32 rsvd1;
+ __be32 dst_ip;
+ __be32 dst_ip_msk;
+ __be32 src_ip;
+ __be32 src_ip_msk;
+} __packed;
+
+struct _rule_hw {
+ union {
+ struct {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ };
+ struct mlx4_net_trans_rule_hw_eth eth;
+ struct mlx4_net_trans_rule_hw_ib ib;
+ struct mlx4_net_trans_rule_hw_ipv4 ipv4;
+ struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp;
+ };
+};
+
+int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
+ enum mlx4_net_trans_promisc_mode mode);
+int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
+ enum mlx4_net_trans_promisc_mode mode);
+int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
+
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port);
+int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, unsigned long *stats_bitmap);
+int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+ u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
+int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+ u8 promisc);
+int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
+int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
+ u8 *pg, u16 *ratelimit);
int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
-int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
- u64 *page_list, int npages, u64 iova, u32 fbo,
- u32 len, u32 *lkey, u32 *rkey, int same_key);
int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
int npages, u64 iova, u32 *lkey, u32 *rkey);
-int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
- u32 access, int max_pages, int max_maps,
- u8 page_shift, struct mlx4_fmr *fmr);
int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
int max_maps, u8 page_shift, struct mlx4_fmr *fmr);
int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
u32 *lkey, u32 *rkey);
-int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
int mlx4_SYNC_TPT(struct mlx4_dev *dev);
int mlx4_query_diag_counters(struct mlx4_dev *mlx4_dev, int array_length,
- u8 op_modifier, u32 in_offset[], u32 counter_out[]);
+ u8 op_modifier, u32 in_offset[],
+ u32 counter_out[]);
+
int mlx4_test_interrupts(struct mlx4_dev *dev);
+int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector);
+void mlx4_release_eq(struct mlx4_dev *dev, int vec);
-void mlx4_get_fc_t11_settings(struct mlx4_dev *dev, int *enable_pre_t11, int *t11_supported);
+int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);
+int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
-int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
-void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
+int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx);
+void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx);
+int mlx4_flow_attach(struct mlx4_dev *dev,
+ struct mlx4_net_trans_rule *rule, u64 *reg_id);
+int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
+int map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
+ enum mlx4_net_trans_promisc_mode flow_type);
+int map_sw_to_hw_steering_id(struct mlx4_dev *dev,
+ enum mlx4_net_trans_rule_id id);
+int hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id);
+
+void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port,
+ int i, int val);
+
+int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey);
+
+int mlx4_is_slave_active(struct mlx4_dev *dev, int slave);
+int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port);
+int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port);
+int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr, u16 lid, u8 sl);
+int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change);
+enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port);
+int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event);
+
+void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
+__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
+int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id);
+int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid);
+
+int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn);
+
+int mlx4_read_clock(struct mlx4_dev *dev);
+int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
+ struct mlx4_clock_params *params);
+
+int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
+ u16 offset, u16 size, u8 *data);
+
#endif /* MLX4_DEVICE_H */
Property changes on: trunk/sys/ofed/include/linux/mlx4/device.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/mlx4/doorbell.h
===================================================================
--- trunk/sys/ofed/include/linux/mlx4/doorbell.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/mlx4/doorbell.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -77,7 +77,7 @@
spin_lock_irqsave(doorbell_lock, flags);
__raw_writel((__force u32) val[0], dest);
- __raw_writel((__force u32) val[1], dest + 4);
+ __raw_writel((__force u32) val[1], (u8 *)dest + 4);
spin_unlock_irqrestore(doorbell_lock, flags);
}
Property changes on: trunk/sys/ofed/include/linux/mlx4/doorbell.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/mlx4/driver.h
===================================================================
--- trunk/sys/ofed/include/linux/mlx4/driver.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/mlx4/driver.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -33,50 +33,104 @@
#ifndef MLX4_DRIVER_H
#define MLX4_DRIVER_H
-#include <linux/device.h>
+#include <linux/mlx4/device.h>
struct mlx4_dev;
+#define MLX4_MAC_MASK 0xffffffffffffULL
+#define MLX4_BE_SHORT_MASK cpu_to_be16(0xffff)
+#define MLX4_BE_WORD_MASK cpu_to_be32(0xffffffff)
+
enum mlx4_dev_event {
MLX4_DEV_EVENT_CATASTROPHIC_ERROR,
MLX4_DEV_EVENT_PORT_UP,
MLX4_DEV_EVENT_PORT_DOWN,
MLX4_DEV_EVENT_PORT_REINIT,
+ MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
+ MLX4_DEV_EVENT_SLAVE_INIT,
+ MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
};
-enum mlx4_query_reply {
- MLX4_QUERY_NOT_MINE = -1,
- MLX4_QUERY_MINE_NOPORT = 0
+struct mlx4_interface {
+ void * (*add) (struct mlx4_dev *dev);
+ void (*remove)(struct mlx4_dev *dev, void *context);
+ void (*event) (struct mlx4_dev *dev, void *context,
+ enum mlx4_dev_event event, unsigned long param);
+ void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port);
+ struct list_head list;
+ enum mlx4_protocol protocol;
};
-enum mlx4_prot {
- MLX4_PROT_IB,
- MLX4_PROT_EN,
+enum {
+ MLX4_MAX_DEVICES = 32,
+ MLX4_DEVS_TBL_SIZE = MLX4_MAX_DEVICES + 1,
+ MLX4_DBDF2VAL_STR_SIZE = 512,
+ MLX4_STR_NAME_SIZE = 64,
+ MLX4_MAX_BDF_VALS = 2,
+ MLX4_ENDOF_TBL = -1LL
};
-enum mlx4_mcast_prot {
- MLX4_MCAST_PROT_IB = 0,
- MLX4_MCAST_PROT_EN = 1,
+struct mlx4_dbdf2val {
+ u64 dbdf;
+ int val[MLX4_MAX_BDF_VALS];
};
-struct mlx4_interface {
- void * (*add) (struct mlx4_dev *dev);
- void (*remove)(struct mlx4_dev *dev, void *context);
- void (*event) (struct mlx4_dev *dev, void *context,
- enum mlx4_dev_event event, int port);
- void * (*get_prot_dev) (struct mlx4_dev *dev, void *context, u8 port);
- enum mlx4_prot protocol;
+struct mlx4_range {
+ int min;
+ int max;
+};
- enum mlx4_query_reply (*query) (void *context, void *);
- struct list_head list;
+/*
+ * mlx4_dbdf2val_lst struct holds all the data needed to convert
+ * dbdf-to-value-list string into dbdf-to-value table.
+ * dbdf-to-value-list string is a comma separated list of dbdf-to-value strings.
+ * the format of dbdf-to-value string is: "[mmmm:]bb:dd.f-v1[;v2]"
+ * mmmm - Domain number (optional)
+ * bb - Bus number
+ * dd - device number
+ * f - Function number
+ * v1 - First value related to the domain-bus-device-function.
+ * v2 - Second value related to the domain-bus-device-function (optional).
+ * bb, dd - Two hexadecimal digits without preceding 0x.
+ * mmmm - Four hexadecimal digits without preceding 0x.
+ * f - One hexadecimal without preceding 0x.
+ * v1,v2 - Number with normal convention (e.g 100, 0xd3).
+ * dbdf-to-value-list string format:
+ * "[mmmm:]bb:dd.f-v1[;v2],[mmmm:]bb:dd.f-v1[;v2],..."
+ *
+ */
+struct mlx4_dbdf2val_lst {
+ char name[MLX4_STR_NAME_SIZE]; /* String name */
+ char str[MLX4_DBDF2VAL_STR_SIZE]; /* dbdf2val list str */
+ struct mlx4_dbdf2val tbl[MLX4_DEVS_TBL_SIZE];/* dbdf to value table */
+ int num_vals; /* # of vals per dbdf */
+ int def_val[MLX4_MAX_BDF_VALS]; /* Default values */
+ struct mlx4_range range; /* Valid values range */
};
+int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst);
+int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx,
+ int *val);
+
int mlx4_register_interface(struct mlx4_interface *intf);
void mlx4_unregister_interface(struct mlx4_interface *intf);
-void *mlx4_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port);
-struct mlx4_dev *mlx4_query_interface(void *, int *port);
-void mlx4_set_iboe_counter(struct mlx4_dev *dev, int index, u8 port);
-int mlx4_get_iboe_counter(struct mlx4_dev *dev, u8 port);
+void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto,
+ int port);
+#ifndef ETH_ALEN
+#define ETH_ALEN 6
+#endif
+static inline u64 mlx4_mac_to_u64(const u8 *addr)
+{
+ u64 mac = 0;
+ int i;
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac <<= 8;
+ mac |= addr[i];
+ }
+ return mac;
+}
+
#endif /* MLX4_DRIVER_H */
Property changes on: trunk/sys/ofed/include/linux/mlx4/driver.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/mlx4/qp.h
===================================================================
--- trunk/sys/ofed/include/linux/mlx4/qp.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/mlx4/qp.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -39,6 +39,21 @@
#define MLX4_INVALID_LKEY 0x100
+#define DS_SIZE_ALIGNMENT 16
+
+#define SET_BYTE_COUNT(byte_count) cpu_to_be32(byte_count)
+#define SET_LSO_MSS(mss_hdr_size) cpu_to_be32(mss_hdr_size)
+#define DS_BYTE_COUNT_MASK cpu_to_be32(0x7fffffff)
+
+enum ib_m_qp_attr_mask {
+ IB_M_EXT_CLASS_1 = 1 << 28,
+ IB_M_EXT_CLASS_2 = 1 << 29,
+ IB_M_EXT_CLASS_3 = 1 << 30,
+
+ IB_M_QP_MOD_VEND_MASK = (IB_M_EXT_CLASS_1 | IB_M_EXT_CLASS_2 |
+ IB_M_EXT_CLASS_3)
+};
+
enum mlx4_qp_optpar {
MLX4_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
MLX4_QP_OPTPAR_RRE = 1 << 1,
@@ -95,11 +110,42 @@
MLX4_QP_BIT_RWE = 1 << 14,
MLX4_QP_BIT_RAE = 1 << 13,
MLX4_QP_BIT_RIC = 1 << 4,
+ MLX4_QP_BIT_COLL_SYNC_RQ = 1 << 2,
+ MLX4_QP_BIT_COLL_SYNC_SQ = 1 << 1,
+ MLX4_QP_BIT_COLL_MASTER = 1 << 0
};
+enum {
+ MLX4_RSS_HASH_XOR = 0,
+ MLX4_RSS_HASH_TOP = 1,
+
+ MLX4_RSS_UDP_IPV6 = 1 << 0,
+ MLX4_RSS_UDP_IPV4 = 1 << 1,
+ MLX4_RSS_TCP_IPV6 = 1 << 2,
+ MLX4_RSS_IPV6 = 1 << 3,
+ MLX4_RSS_TCP_IPV4 = 1 << 4,
+ MLX4_RSS_IPV4 = 1 << 5,
+
+ /* offset of mlx4_rss_context within mlx4_qp_context.pri_path */
+ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH = 0x24,
+ /* offset of being RSS indirection QP within mlx4_qp_context.flags */
+ MLX4_RSS_QPC_FLAG_OFFSET = 13,
+};
+
+struct mlx4_rss_context {
+ __be32 base_qpn;
+ __be32 default_qpn;
+ u16 reserved;
+ u8 hash_fn;
+ u8 flags;
+ __be32 rss_key[10];
+ __be32 base_qpn_udp;
+};
+
struct mlx4_qp_path {
u8 fl;
- u8 reserved1[2];
+ u8 vlan_control;
+ u8 disable_pkey_check;
u8 pkey_index;
u8 counter_index;
u8 grh_mylmc;
@@ -112,11 +158,36 @@
u8 rgid[16];
u8 sched_queue;
u8 vlan_index;
- u8 reserved3[2];
+ u8 feup;
+ u8 fvl_rx;
u8 reserved4[2];
u8 dmac[6];
};
+enum { /* fl */
+ MLX4_FL_CV = 1 << 6,
+ MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2,
+ MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1,
+ MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0,
+};
+enum { /* vlan_control */
+ MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER = 1 << 7,
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6,
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED = 1 << 2,
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED = 1 << 1,/* 802.1p priorty tag*/
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED = 1 << 0
+};
+
+enum { /* feup */
+ MLX4_FEUP_FORCE_ETH_UP = 1 << 6, /* force Eth UP */
+ MLX4_FSM_FORCE_ETH_SRC_MAC = 1 << 5, /* force Source MAC */
+ MLX4_FVL_FORCE_ETH_VLAN = 1 << 3 /* force Eth vlan */
+};
+
+enum { /* fvl_rx */
+ MLX4_FVL_RX_FORCE_ETH_VLAN = 1 << 0 /* enforce Eth rx vlan */
+};
+
struct mlx4_qp_context {
__be32 flags;
__be32 pd;
@@ -153,23 +224,57 @@
u8 reserved4[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
- u8 VE;
- u8 reserved5;
- __be16 VFT_id_prio;
- u8 reserved6;
- u8 exch_size;
- __be16 exch_base;
- u8 VFT_hop_cnt;
- u8 my_fc_id_idx;
- __be16 reserved7;
- u32 reserved8[7];
+ u32 reserved5[10];
};
+struct mlx4_update_qp_context {
+ __be64 qp_mask;
+ __be64 primary_addr_path_mask;
+ __be64 secondary_addr_path_mask;
+ u64 reserved1;
+ struct mlx4_qp_context qp_context;
+ u64 reserved2[58];
+};
+
+enum {
+ MLX4_UPD_QP_MASK_PM_STATE = 32,
+ MLX4_UPD_QP_MASK_VSD = 33,
+};
+
+enum {
+ MLX4_UPD_QP_PATH_MASK_PKEY_INDEX = 0 + 32,
+ MLX4_UPD_QP_PATH_MASK_FSM = 1 + 32,
+ MLX4_UPD_QP_PATH_MASK_MAC_INDEX = 2 + 32,
+ MLX4_UPD_QP_PATH_MASK_FVL = 3 + 32,
+ MLX4_UPD_QP_PATH_MASK_CV = 4 + 32,
+ MLX4_UPD_QP_PATH_MASK_VLAN_INDEX = 5 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN = 6 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_UNTAGGED = 7 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_1P = 8 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_TAGGED = 9 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_UNTAGGED = 10 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_1P = 11 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_TAGGED = 12 + 32,
+ MLX4_UPD_QP_PATH_MASK_FEUP = 13 + 32,
+ MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE = 14 + 32,
+ MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX = 15 + 32,
+ MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32,
+};
+
+enum { /* param3 */
+ MLX4_STRIP_VLAN = 1 << 30
+};
+
+
/* Which firmware version adds support for NEC (NoErrorCompletion) bit */
#define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232)
enum {
+ MLX4_WQE_CTRL_OWN = 1 << 31,
MLX4_WQE_CTRL_NEC = 1 << 29,
+ MLX4_WQE_CTRL_RR = 1 << 6,
MLX4_WQE_CTRL_FENCE = 1 << 6,
MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2,
MLX4_WQE_CTRL_SOLICITED = 1 << 1,
@@ -192,8 +297,12 @@
* [4] IP checksum
* [3:2] C (generate completion queue entry)
* [1] SE (solicited event)
+ * [0] FL (force loopback)
*/
- __be32 srcrb_flags;
+ union {
+ __be32 srcrb_flags;
+ __be16 srcrb_flags16[2];
+ };
/*
* imm is immediate data for send/RDMA write w/ immediate;
* also invalidation key for send with invalidate; input
@@ -204,8 +313,7 @@
enum {
MLX4_WQE_MLX_VL15 = 1 << 17,
- MLX4_WQE_MLX_SLR = 1 << 16,
- MLX4_WQE_MLX_ICRC = 1 << 4
+ MLX4_WQE_MLX_SLR = 1 << 16
};
struct mlx4_wqe_mlx_seg {
@@ -212,7 +320,8 @@
u8 owner;
u8 reserved1[2];
u8 opcode;
- u8 reserved2[3];
+ __be16 sched_prio;
+ u8 reserved2;
u8 size;
/*
* [17] VL15
@@ -241,6 +350,11 @@
__be32 header[0];
};
+enum mlx4_wqe_bind_seg_flags2 {
+ MLX4_WQE_BIND_TYPE_2 = (1<<31),
+ MLX4_WQE_BIND_ZERO_BASED = (1<<30),
+};
+
struct mlx4_wqe_bind_seg {
__be32 flags1;
__be32 flags2;
@@ -253,9 +367,9 @@
enum {
MLX4_WQE_FMR_PERM_LOCAL_READ = 1 << 27,
MLX4_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28,
- MLX4_WQE_FMR_PERM_REMOTE_READ = 1 << 29,
- MLX4_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30,
- MLX4_WQE_FMR_PERM_ATOMIC = 1 << 31
+ MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ = 1 << 29,
+ MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE = 1 << 30,
+ MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC = 1 << 31
};
struct mlx4_wqe_fmr_seg {
@@ -280,12 +394,10 @@
};
struct mlx4_wqe_local_inval_seg {
- __be32 flags;
- u32 reserved1;
+ u64 reserved1;
__be32 mem_key;
- u32 reserved2[2];
- __be32 guest_id;
- __be64 pa;
+ u32 reserved2;
+ u64 reserved3[2];
};
struct mlx4_wqe_raddr_seg {
@@ -338,9 +450,6 @@
return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
}
-struct mlx4_qp *mlx4_qp_lookup_lock(struct mlx4_dev *dev, u32 qpn);
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
-int mlx4_qp_get_region(struct mlx4_dev *dev, enum mlx4_qp_region region,
- int *base_qpn, int *cnt);
#endif /* MLX4_QP_H */
Property changes on: trunk/sys/ofed/include/linux/mlx4/qp.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/mlx4/srq.h
===================================================================
--- trunk/sys/ofed/include/linux/mlx4/srq.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/mlx4/srq.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -33,9 +33,6 @@
#ifndef MLX4_SRQ_H
#define MLX4_SRQ_H
-#include <linux/types.h>
-#include <linux/mlx4/device.h>
-
struct mlx4_wqe_srq_next_seg {
u16 reserved1;
__be16 next_wqe_index;
@@ -42,13 +39,6 @@
u32 reserved2[3];
};
-void mlx4_srq_invalidate(struct mlx4_dev *dev, struct mlx4_srq *srq);
-void mlx4_srq_remove(struct mlx4_dev *dev, struct mlx4_srq *srq);
+struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn);
-static inline struct mlx4_srq *__mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn)
-{
- return radix_tree_lookup(&dev->srq_table_tree,
- srqn & (dev->caps.num_srqs - 1));
-}
-
#endif /* MLX4_SRQ_H */
Property changes on: trunk/sys/ofed/include/linux/mlx4/srq.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/mm.h
===================================================================
--- trunk/sys/ofed/include/linux/mm.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/mm.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,6 +40,7 @@
vm_offset_t vm_end;
vm_offset_t vm_pgoff;
vm_paddr_t vm_pfn; /* PFN For mmap. */
+ vm_size_t vm_len; /* length for mmap. */
vm_memattr_t vm_page_prot;
};
@@ -77,6 +79,7 @@
{
vma->vm_page_prot = prot;
vma->vm_pfn = pfn;
+ vma->vm_len = size;
return (0);
}
Modified: trunk/sys/ofed/include/linux/module.h
===================================================================
--- trunk/sys/ofed/include/linux/module.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/module.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,13 +38,21 @@
#define MODULE_AUTHOR(name)
#define MODULE_DESCRIPTION(name)
#define MODULE_LICENSE(name)
-#define MODULE_VERSION(name)
+#ifndef MODULE_VERSION
+#define MODULE_VERSION(name)
+#endif
+
#define THIS_MODULE ((struct module *)0)
#define EXPORT_SYMBOL(name)
#define EXPORT_SYMBOL_GPL(name)
+/* OFED pre-module initialization */
+#define SI_SUB_OFED_PREINIT (SI_SUB_ROOT_CONF - 2)
+/* OFED default module initialization */
+#define SI_SUB_OFED_MODINIT (SI_SUB_ROOT_CONF - 1)
+
#include <sys/linker.h>
static inline void
@@ -68,17 +77,20 @@
}
#define module_init(fn) \
- SYSINIT(fn, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, _module_run, (fn))
+ SYSINIT(fn, SI_SUB_OFED_MODINIT, SI_ORDER_FIRST, _module_run, (fn))
+#define module_exit(fn) \
+ SYSUNINIT(fn, SI_SUB_OFED_MODINIT, SI_ORDER_SECOND, _module_run, (fn))
+
/*
- * XXX This is a freebsdism designed to work around not having a module
- * load order resolver built in.
+ * The following two macros are a workaround for not having a module
+ * load and unload order resolver:
*/
#define module_init_order(fn, order) \
- SYSINIT(fn, SI_SUB_RUN_SCHEDULER, (order), _module_run, (fn))
+ SYSINIT(fn, SI_SUB_OFED_MODINIT, (order), _module_run, (fn))
-#define module_exit(fn) \
- SYSUNINIT(fn, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, _module_run, (fn))
+#define module_exit_order(fn, order) \
+ SYSUNINIT(fn, SI_SUB_OFED_MODINIT, (order), _module_run, (fn))
#define module_get(module)
#define module_put(module)
Modified: trunk/sys/ofed/include/linux/moduleparam.h
===================================================================
--- trunk/sys/ofed/include/linux/moduleparam.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/moduleparam.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,6 +26,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#ifndef _LINUX_MODULEPARAM_H_
#define _LINUX_MODULEPARAM_H_
@@ -81,6 +83,8 @@
SYSINIT(name##_param_sysinit, SI_SUB_DRIVERS, SI_ORDER_FIRST, \
param_sysinit, &__param_##name);
+#define module_param_string(name, string, len, perm)
+
#define module_param_named(name, var, type, mode) \
module_param_call(name, param_set_##type, param_get_##type, &var, mode)
@@ -87,6 +91,9 @@
#define module_param(var, type, mode) \
module_param_named(var, var, type, mode)
+#define module_param_array(var, type, addr_argc, mode) \
+ module_param_named(var, var, type, mode)
+
#define MODULE_PARM_DESC(name, desc)
static inline int
Modified: trunk/sys/ofed/include/linux/mutex.h
===================================================================
--- trunk/sys/ofed/include/linux/mutex.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/mutex.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/net.h
===================================================================
--- trunk/sys/ofed/include/linux/net.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/net.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -44,23 +45,23 @@
sock_getname(struct socket *so, struct sockaddr *addr, int *sockaddr_len,
int peer)
{
- struct sockaddr **nam;
+ struct sockaddr *nam;
int error;
nam = NULL;
- if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
- return (-ENOTCONN);
+ if (peer) {
+ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
+ return (-ENOTCONN);
- if (peer)
- error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, nam);
- else
- error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, nam);
+ error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &nam);
+ } else
+ error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &nam);
if (error)
return (-error);
- *addr = **nam;
+ *addr = *nam;
*sockaddr_len = addr->sa_len;
- free(*nam, M_SONAME);
+ free(nam, M_SONAME);
return (0);
}
Modified: trunk/sys/ofed/include/linux/netdevice.h
===================================================================
--- trunk/sys/ofed/include/linux/netdevice.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/netdevice.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,7 +40,6 @@
#include <linux/completion.h>
#include <linux/device.h>
-#include <linux/ethtool.h>
#include <linux/workqueue.h>
#include <linux/net.h>
#include <linux/notifier.h>
@@ -97,6 +97,24 @@
nb->notifier_call(nb, NETDEV_UNREGISTER, ifp);
}
+static inline void
+_handle_iflladdr_event(void *arg, struct ifnet *ifp)
+{
+ struct notifier_block *nb;
+
+ nb = arg;
+ nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp);
+}
+
+static inline void
+_handle_ifaddr_event(void *arg, struct ifnet *ifp)
+{
+ struct notifier_block *nb;
+
+ nb = arg;
+ nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp);
+}
+
static inline int
register_netdevice_notifier(struct notifier_block *nb)
{
@@ -107,10 +125,22 @@
ifnet_arrival_event, _handle_ifnet_arrival_event, nb, 0);
nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER(
ifnet_departure_event, _handle_ifnet_departure_event, nb, 0);
+ nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER(
+ iflladdr_event, _handle_iflladdr_event, nb, 0);
+
return (0);
}
static inline int
+register_inetaddr_notifier(struct notifier_block *nb)
+{
+
+ nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER(
+ ifaddr_event, _handle_ifaddr_event, nb, 0);
+ return (0);
+}
+
+static inline int
unregister_netdevice_notifier(struct notifier_block *nb)
{
@@ -118,9 +148,23 @@
EVENTHANDLER_DEREGISTER(ifnet_arrival_event, nb->tags[NETDEV_REGISTER]);
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
nb->tags[NETDEV_UNREGISTER]);
+ EVENTHANDLER_DEREGISTER(iflladdr_event,
+ nb->tags[NETDEV_CHANGEADDR]);
+
return (0);
}
+static inline int
+unregister_inetaddr_notifier(struct notifier_block *nb)
+{
+
+ EVENTHANDLER_DEREGISTER(ifaddr_event,
+ nb->tags[NETDEV_CHANGEIFADDR]);
+
+ return (0);
+}
+
+
#define rtnl_lock()
#define rtnl_unlock()
Modified: trunk/sys/ofed/include/linux/notifier.h
===================================================================
--- trunk/sys/ofed/include/linux/notifier.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/notifier.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,7 +36,7 @@
* Max number of FreeBSD events to map to Linux events per notify type.
*/
#define NOTIFY_DONE 0
-#define _NOTIFY_COUNT 5
+#define _NOTIFY_COUNT 7
struct notifier_block {
int (*notifier_call)(struct notifier_block *, unsigned long, void *);
@@ -49,6 +50,8 @@
#define NETDEV_DOWN 0x0002
#define NETDEV_REGISTER 0x0003
#define NETDEV_UNREGISTER 0x0004
+#define NETDEV_CHANGEADDR 0x0005
+#define NETDEV_CHANGEIFADDR 0x0006
#endif /* _LINUX_NOTIFIER_H_ */
Modified: trunk/sys/ofed/include/linux/page.h
===================================================================
--- trunk/sys/ofed/include/linux/page.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/page.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/pci.h
===================================================================
--- trunk/sys/ofed/include/linux/pci.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/pci.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -43,7 +44,6 @@
#include <machine/resource.h>
-#include <linux/init.h>
#include <linux/list.h>
#include <linux/dmapool.h>
#include <linux/dma-mapping.h>
@@ -72,19 +72,49 @@
#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274
+#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
+#define PCI_FUNC(devfn) ((devfn) & 0x07)
-#define PCI_VDEVICE(vendor, device) \
- PCI_VENDOR_ID_##vendor, (device), PCI_ANY_ID, PCI_ANY_ID, 0, 0
-#define PCI_DEVICE(vendor, device) \
- (vendor), (device), PCI_ANY_ID, PCI_ANY_ID, 0, 0
+#define PCI_VDEVICE(_vendor, _device) \
+ .vendor = PCI_VENDOR_ID_##_vendor, .device = (_device), \
+ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
+#define PCI_DEVICE(_vendor, _device) \
+ .vendor = (_vendor), .device = (_device), \
+ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
#define to_pci_dev(n) container_of(n, struct pci_dev, dev)
-#define PCI_VENDOR_ID PCIR_DEVVENDOR
-#define PCI_COMMAND PCIR_COMMAND
-#define PCI_EXP_DEVCTL PCIR_EXPRESS_DEVICE_CTL
-#define PCI_EXP_LNKCTL PCIR_EXPRESS_LINK_CTL
+#define PCI_VENDOR_ID PCIR_DEVVENDOR
+#define PCI_COMMAND PCIR_COMMAND
+#define PCI_EXP_DEVCTL PCIER_DEVICE_CTL /* Device Control */
+#define PCI_EXP_LNKCTL PCIER_LINK_CTL /* Link Control */
+#define PCI_EXP_FLAGS_TYPE PCIEM_FLAGS_TYPE /* Device/Port type */
+#define PCI_EXP_DEVCAP PCIER_DEVICE_CAP /* Device capabilities */
+#define PCI_EXP_DEVSTA PCIER_DEVICE_STA /* Device Status */
+#define PCI_EXP_LNKCAP PCIER_LINK_CAP /* Link Capabilities */
+#define PCI_EXP_LNKSTA PCIER_LINK_STA /* Link Status */
+#define PCI_EXP_SLTCAP PCIER_SLOT_CAP /* Slot Capabilities */
+#define PCI_EXP_SLTCTL PCIER_SLOT_CTL /* Slot Control */
+#define PCI_EXP_SLTSTA PCIER_SLOT_STA /* Slot Status */
+#define PCI_EXP_RTCTL PCIER_ROOT_CTL /* Root Control */
+#define PCI_EXP_RTCAP PCIER_ROOT_CAP /* Root Capabilities */
+#define PCI_EXP_RTSTA PCIER_ROOT_STA /* Root Status */
+#define PCI_EXP_DEVCAP2 PCIER_DEVICE_CAP2 /* Device Capabilities 2 */
+#define PCI_EXP_DEVCTL2 PCIER_DEVICE_CTL2 /* Device Control 2 */
+#define PCI_EXP_LNKCAP2 PCIER_LINK_CAP2 /* Link Capabilities 2 */
+#define PCI_EXP_LNKCTL2 PCIER_LINK_CTL2 /* Link Control 2 */
+#define PCI_EXP_LNKSTA2 PCIER_LINK_STA2 /* Link Status 2 */
+#define PCI_EXP_FLAGS PCIER_FLAGS /* Capabilities register */
+#define PCI_EXP_FLAGS_VERS PCIEM_FLAGS_VERSION /* Capability version */
+#define PCI_EXP_TYPE_ROOT_PORT PCIEM_TYPE_ROOT_PORT /* Root Port */
+#define PCI_EXP_TYPE_ENDPOINT PCIEM_TYPE_ENDPOINT /* Express Endpoint */
+#define PCI_EXP_TYPE_LEG_END PCIEM_TYPE_LEGACY_ENDPOINT /* Legacy Endpoint */
+#define PCI_EXP_TYPE_DOWNSTREAM PCIEM_TYPE_DOWNSTREAM_PORT /* Downstream Port */
+#define PCI_EXP_FLAGS_SLOT PCIEM_FLAGS_SLOT /* Slot implemented */
+#define PCI_EXP_TYPE_RC_EC PCIEM_TYPE_ROOT_EC /* Root Complex Event Collector */
+
#define IORESOURCE_MEM SYS_RES_MEMORY
#define IORESOURCE_IO SYS_RES_IOPORT
#define IORESOURCE_IRQ SYS_RES_IRQ
@@ -91,14 +121,18 @@
struct pci_dev;
+
struct pci_driver {
struct list_head links;
char *name;
- struct pci_device_id *id_table;
+ const struct pci_device_id *id_table;
int (*probe)(struct pci_dev *dev, const struct pci_device_id *id);
void (*remove)(struct pci_dev *dev);
+ int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */
+ int (*resume) (struct pci_dev *dev); /* Device woken up */
driver_t driver;
devclass_t bsdclass;
+ const struct pci_error_handlers *err_handler;
};
extern struct list_head pci_drivers;
@@ -115,6 +149,8 @@
uint16_t device;
uint16_t vendor;
unsigned int irq;
+ unsigned int devfn;
+ u8 revision;
};
static inline struct resource_list_entry *
@@ -234,6 +270,14 @@
}
static inline int
+pci_clear_master(struct pci_dev *pdev)
+{
+
+ pci_disable_busmaster(pdev->dev.bsddev);
+ return (0);
+}
+
+static inline int
pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
{
int rid;
@@ -294,6 +338,7 @@
#define PCI_CAP_ID_EXP PCIY_EXPRESS
#define PCI_CAP_ID_PCIX PCIY_PCIX
+
static inline int
pci_find_capability(struct pci_dev *pdev, int capid)
{
@@ -304,6 +349,26 @@
return (reg);
}
+
+
+
+/**
+ * pci_pcie_cap - get the saved PCIe capability offset
+ * @dev: PCI device
+ *
+ * PCIe capability offset is calculated at PCI device initialization
+ * time and saved in the data structure. This function returns saved
+ * PCIe capability offset. Using this instead of pci_find_capability()
+ * reduces unnecessary search in the PCI configuration space. If you
+ * need to calculate PCIe capability offset from raw device for some
+ * reasons, please use pci_find_capability() instead.
+ */
+static inline int pci_pcie_cap(struct pci_dev *dev)
+{
+ return pci_find_capability(dev, PCI_CAP_ID_EXP);
+}
+
+
static inline int
pci_read_config_byte(struct pci_dev *pdev, int where, u8 *val)
{
@@ -353,9 +418,9 @@
}
static struct pci_driver *
-linux_pci_find(device_t dev, struct pci_device_id **idp)
+linux_pci_find(device_t dev, const struct pci_device_id **idp)
{
- struct pci_device_id *id;
+ const struct pci_device_id *id;
struct pci_driver *pdrv;
uint16_t vendor;
uint16_t device;
@@ -380,7 +445,7 @@
static inline int
linux_pci_probe(device_t dev)
{
- struct pci_device_id *id;
+ const struct pci_device_id *id;
struct pci_driver *pdrv;
if ((pdrv = linux_pci_find(dev, &id)) == NULL)
@@ -397,7 +462,7 @@
struct resource_list_entry *rle;
struct pci_dev *pdev;
struct pci_driver *pdrv;
- struct pci_device_id *id;
+ const struct pci_device_id *id;
int error;
pdrv = linux_pci_find(dev, &id);
@@ -519,6 +584,14 @@
avail = nreq;
if ((error = -pci_alloc_msix(pdev->dev.bsddev, &avail)) != 0)
return error;
+ /*
+ * Handle case where "pci_alloc_msix()" may allocate less
+ * interrupts than available and return with no error:
+ */
+ if (avail < nreq) {
+ pci_release_msi(pdev->dev.bsddev);
+ return avail;
+ }
rle = _pci_get_rle(pdev, SYS_RES_IRQ, 1);
pdev->dev.msix = rle->start;
pdev->dev.msix_max = rle->start + avail;
@@ -527,6 +600,54 @@
return (0);
}
+#define pci_enable_msix_range linux_pci_enable_msix_range
+static inline int
+pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
+ int minvec, int maxvec)
+{
+ int nvec = maxvec;
+ int rc;
+
+ if (maxvec < minvec)
+ return (-ERANGE);
+
+ do {
+ rc = pci_enable_msix(dev, entries, nvec);
+ if (rc < 0) {
+ return (rc);
+ } else if (rc > 0) {
+ if (rc < minvec)
+ return (-ENOSPC);
+ nvec = rc;
+ }
+ } while (rc);
+ return (nvec);
+}
+
+static inline int pci_channel_offline(struct pci_dev *pdev)
+{
+ return false;
+}
+
+static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+ return -ENODEV;
+}
+static inline void pci_disable_sriov(struct pci_dev *dev)
+{
+}
+
+/**
+ * DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table
+ * @_table: device table name
+ *
+ * This macro is used to create a struct pci_device_id array (a device table)
+ * in a generic manner.
+ */
+#define DEFINE_PCI_DEVICE_TABLE(_table) \
+ const struct pci_device_id _table[] __devinitdata
+
+
/* XXX This should not be necessary. */
#define pcix_set_mmrbc(d, v) 0
#define pcix_get_max_mmrbc(d) 0
@@ -576,5 +697,173 @@
#define pci_unmap_len dma_unmap_len
#define pci_unmap_len_set dma_unmap_len_set
+typedef unsigned int __bitwise pci_channel_state_t;
+typedef unsigned int __bitwise pci_ers_result_t;
+enum pci_channel_state {
+ /* I/O channel is in normal state */
+ pci_channel_io_normal = (__force pci_channel_state_t) 1,
+
+ /* I/O to channel is blocked */
+ pci_channel_io_frozen = (__force pci_channel_state_t) 2,
+
+ /* PCI card is dead */
+ pci_channel_io_perm_failure = (__force pci_channel_state_t) 3,
+};
+
+enum pci_ers_result {
+ /* no result/none/not supported in device driver */
+ PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1,
+
+ /* Device driver can recover without slot reset */
+ PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2,
+
+ /* Device driver wants slot to be reset. */
+ PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3,
+
+ /* Device has completely failed, is unrecoverable */
+ PCI_ERS_RESULT_DISCONNECT = (__force pci_ers_result_t) 4,
+
+ /* Device driver is fully recovered and operational */
+ PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
+};
+
+
+/* PCI bus error event callbacks */
+struct pci_error_handlers {
+ /* PCI bus error detected on this device */
+ pci_ers_result_t (*error_detected)(struct pci_dev *dev,
+ enum pci_channel_state error);
+
+ /* MMIO has been re-enabled, but not DMA */
+ pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev);
+
+ /* PCI Express link has been reset */
+ pci_ers_result_t (*link_reset)(struct pci_dev *dev);
+
+ /* PCI slot has been reset */
+ pci_ers_result_t (*slot_reset)(struct pci_dev *dev);
+
+ /* Device driver may resume normal operations */
+ void (*resume)(struct pci_dev *dev);
+};
+
+/* freeBSD does not support SRIOV - yet */
+static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
+{
+ return dev;
+}
+
+static inline bool pci_is_pcie(struct pci_dev *dev)
+{
+ return !!pci_pcie_cap(dev);
+}
+
+static inline u16 pcie_flags_reg(struct pci_dev *dev)
+{
+ int pos;
+ u16 reg16;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (!pos)
+ return 0;
+
+ pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16);
+
+ return reg16;
+}
+
+
+static inline int pci_pcie_type(struct pci_dev *dev)
+{
+ return (pcie_flags_reg(dev) & PCI_EXP_FLAGS_TYPE) >> 4;
+}
+
+static inline int pcie_cap_version(struct pci_dev *dev)
+{
+ return pcie_flags_reg(dev) & PCI_EXP_FLAGS_VERS;
+}
+
+static inline bool pcie_cap_has_lnkctl(struct pci_dev *dev)
+{
+ int type = pci_pcie_type(dev);
+
+ return pcie_cap_version(dev) > 1 ||
+ type == PCI_EXP_TYPE_ROOT_PORT ||
+ type == PCI_EXP_TYPE_ENDPOINT ||
+ type == PCI_EXP_TYPE_LEG_END;
+}
+
+static inline bool pcie_cap_has_devctl(const struct pci_dev *dev)
+{
+ return true;
+}
+
+static inline bool pcie_cap_has_sltctl(struct pci_dev *dev)
+{
+ int type = pci_pcie_type(dev);
+
+ return pcie_cap_version(dev) > 1 ||
+ type == PCI_EXP_TYPE_ROOT_PORT ||
+ (type == PCI_EXP_TYPE_DOWNSTREAM &&
+ pcie_flags_reg(dev) & PCI_EXP_FLAGS_SLOT);
+}
+
+static inline bool pcie_cap_has_rtctl(struct pci_dev *dev)
+{
+ int type = pci_pcie_type(dev);
+
+ return pcie_cap_version(dev) > 1 ||
+ type == PCI_EXP_TYPE_ROOT_PORT ||
+ type == PCI_EXP_TYPE_RC_EC;
+}
+
+static bool pcie_capability_reg_implemented(struct pci_dev *dev, int pos)
+{
+ if (!pci_is_pcie(dev))
+ return false;
+
+ switch (pos) {
+ case PCI_EXP_FLAGS_TYPE:
+ return true;
+ case PCI_EXP_DEVCAP:
+ case PCI_EXP_DEVCTL:
+ case PCI_EXP_DEVSTA:
+ return pcie_cap_has_devctl(dev);
+ case PCI_EXP_LNKCAP:
+ case PCI_EXP_LNKCTL:
+ case PCI_EXP_LNKSTA:
+ return pcie_cap_has_lnkctl(dev);
+ case PCI_EXP_SLTCAP:
+ case PCI_EXP_SLTCTL:
+ case PCI_EXP_SLTSTA:
+ return pcie_cap_has_sltctl(dev);
+ case PCI_EXP_RTCTL:
+ case PCI_EXP_RTCAP:
+ case PCI_EXP_RTSTA:
+ return pcie_cap_has_rtctl(dev);
+ case PCI_EXP_DEVCAP2:
+ case PCI_EXP_DEVCTL2:
+ case PCI_EXP_LNKCAP2:
+ case PCI_EXP_LNKCTL2:
+ case PCI_EXP_LNKSTA2:
+ return pcie_cap_version(dev) > 1;
+ default:
+ return false;
+ }
+}
+
+
+static inline int pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val)
+{
+ if (pos & 1)
+ return -EINVAL;
+
+ if (!pcie_capability_reg_implemented(dev, pos))
+ return 0;
+
+ return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val);
+}
+
+
#endif /* _LINUX_PCI_H_ */
Modified: trunk/sys/ofed/include/linux/poll.h
===================================================================
--- trunk/sys/ofed/include/linux/poll.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/poll.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Added: trunk/sys/ofed/include/linux/printk.h
===================================================================
--- trunk/sys/ofed/include/linux/printk.h (rev 0)
+++ trunk/sys/ofed/include/linux/printk.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iX Systems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FBSD_PRINTK_H_
+#define _FBSD_PRINTK_H_
+
+/* GID printing macros */
+#define GID_PRINT_FMT "%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x"
+#define GID_PRINT_ARGS(gid_raw) htons(((u16 *)gid_raw)[0]), htons(((u16 *)gid_raw)[1]),\
+ htons(((u16 *)gid_raw)[2]), htons(((u16 *)gid_raw)[3]),\
+ htons(((u16 *)gid_raw)[4]), htons(((u16 *)gid_raw)[5]),\
+ htons(((u16 *)gid_raw)[6]), htons(((u16 *)gid_raw)[7])
+
+#endif /* _FBSD_PRINTK_H */
Property changes on: trunk/sys/ofed/include/linux/printk.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/linux/radix-tree.h
===================================================================
--- trunk/sys/ofed/include/linux/radix-tree.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/radix-tree.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/random.h
===================================================================
--- trunk/sys/ofed/include/linux/random.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/random.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/rbtree.h
===================================================================
--- trunk/sys/ofed/include/linux/rbtree.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/rbtree.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/rwlock.h
===================================================================
--- trunk/sys/ofed/include/linux/rwlock.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/rwlock.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/rwsem.h
===================================================================
--- trunk/sys/ofed/include/linux/rwsem.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/rwsem.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/scatterlist.h
===================================================================
--- trunk/sys/ofed/include/linux/scatterlist.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/scatterlist.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,12 +26,27 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#ifndef _LINUX_SCATTERLIST_H_
#define _LINUX_SCATTERLIST_H_
-#include <linux/string.h>
#include <linux/page.h>
+#include <linux/slab.h>
+/*
+ * SG table design.
+ *
+ * If flags bit 0 is set, then the sg field contains a pointer to the next sg
+ * table list. Otherwise the next entry is at sg + 1, can be determined using
+ * the sg_is_chain() function.
+ *
+ * If flags bit 1 is set, then this sg entry is the last element in a list,
+ * can be determined using the sg_is_last() function.
+ *
+ * See sg_next().
+ *
+ */
+
struct scatterlist {
union {
struct page *page;
@@ -42,6 +58,18 @@
uint32_t flags;
};
+struct sg_table {
+ struct scatterlist *sgl; /* the list */
+ unsigned int nents; /* number of mapped entries */
+ unsigned int orig_nents; /* original size of list */
+};
+
+/*
+ * Maximum number of entries that will be allocated in one piece, if
+ * a list larger than this is required then chaining will be utilized.
+ */
+#define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist))
+
#define sg_dma_address(sg) (sg)->address
#define sg_dma_len(sg) (sg)->length
#define sg_page(sg) (sg)->sl_un.page
@@ -92,6 +120,212 @@
return sg_page(sg)->phys_addr + sg->offset;
}
+/**
+ * sg_chain - Chain two sglists together
+ * @prv: First scatterlist
+ * @prv_nents: Number of entries in prv
+ * @sgl: Second scatterlist
+ *
+ * Description:
+ * Links @prv@ and @sgl@ together, to form a longer scatterlist.
+ *
+ **/
+static inline void
+sg_chain(struct scatterlist *prv, unsigned int prv_nents,
+ struct scatterlist *sgl)
+{
+/*
+ * offset and length are unused for chain entry. Clear them.
+ */
+ struct scatterlist *sg = &prv[prv_nents - 1];
+
+ sg->offset = 0;
+ sg->length = 0;
+
+ /*
+ * Indicate a link pointer, and set the link to the second list.
+ */
+ sg->flags = SG_CHAIN;
+ sg->sl_un.sg = sgl;
+}
+
+/**
+ * sg_mark_end - Mark the end of the scatterlist
+ * @sg: SG entryScatterlist
+ *
+ * Description:
+ * Marks the passed in sg entry as the termination point for the sg
+ * table. A call to sg_next() on this entry will return NULL.
+ *
+ **/
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+ sg->flags = SG_END;
+}
+
+/**
+ * __sg_free_table - Free a previously mapped sg table
+ * @table: The sg table header to use
+ * @max_ents: The maximum number of entries per single scatterlist
+ *
+ * Description:
+ * Free an sg table previously allocated and setup with
+ * __sg_alloc_table(). The @max_ents value must be identical to
+ * that previously used with __sg_alloc_table().
+ *
+ **/
+static inline void
+__sg_free_table(struct sg_table *table, unsigned int max_ents)
+{
+ struct scatterlist *sgl, *next;
+
+ if (unlikely(!table->sgl))
+ return;
+
+ sgl = table->sgl;
+ while (table->orig_nents) {
+ unsigned int alloc_size = table->orig_nents;
+ unsigned int sg_size;
+
+ /*
+ * If we have more than max_ents segments left,
+ * then assign 'next' to the sg table after the current one.
+ * sg_size is then one less than alloc size, since the last
+ * element is the chain pointer.
+ */
+ if (alloc_size > max_ents) {
+ next = sgl[max_ents - 1].sl_un.sg;
+ alloc_size = max_ents;
+ sg_size = alloc_size - 1;
+ } else {
+ sg_size = alloc_size;
+ next = NULL;
+ }
+
+ table->orig_nents -= sg_size;
+ kfree(sgl);
+ sgl = next;
+ }
+
+ table->sgl = NULL;
+}
+
+/**
+ * sg_free_table - Free a previously allocated sg table
+ * @table: The mapped sg table header
+ *
+ **/
+static inline void
+sg_free_table(struct sg_table *table)
+{
+ __sg_free_table(table, SG_MAX_SINGLE_ALLOC);
+}
+
+/**
+ * __sg_alloc_table - Allocate and initialize an sg table with given allocator
+ * @table: The sg table header to use
+ * @nents: Number of entries in sg list
+ * @max_ents: The maximum number of entries the allocator returns per call
+ * @gfp_mask: GFP allocation mask
+ *
+ * Description:
+ * This function returns a @table @nents long. The allocator is
+ * defined to return scatterlist chunks of maximum size @max_ents.
+ * Thus if @nents is bigger than @max_ents, the scatterlists will be
+ * chained in units of @max_ents.
+ *
+ * Notes:
+ * If this function returns non-0 (eg failure), the caller must call
+ * __sg_free_table() to cleanup any leftover allocations.
+ *
+ **/
+static inline int
+__sg_alloc_table(struct sg_table *table, unsigned int nents,
+ unsigned int max_ents, gfp_t gfp_mask)
+{
+ struct scatterlist *sg, *prv;
+ unsigned int left;
+
+ memset(table, 0, sizeof(*table));
+
+ if (nents == 0)
+ return -EINVAL;
+ left = nents;
+ prv = NULL;
+ do {
+ unsigned int sg_size, alloc_size = left;
+
+ if (alloc_size > max_ents) {
+ alloc_size = max_ents;
+ sg_size = alloc_size - 1;
+ } else
+ sg_size = alloc_size;
+
+ left -= sg_size;
+
+ sg = kmalloc(alloc_size * sizeof(struct scatterlist), gfp_mask);
+ if (unlikely(!sg)) {
+ /*
+ * Adjust entry count to reflect that the last
+ * entry of the previous table won't be used for
+ * linkage. Without this, sg_kfree() may get
+ * confused.
+ */
+ if (prv)
+ table->nents = ++table->orig_nents;
+
+ return -ENOMEM;
+ }
+
+ sg_init_table(sg, alloc_size);
+ table->nents = table->orig_nents += sg_size;
+
+ /*
+ * If this is the first mapping, assign the sg table header.
+ * If this is not the first mapping, chain previous part.
+ */
+ if (prv)
+ sg_chain(prv, max_ents, sg);
+ else
+ table->sgl = sg;
+
+ /*
+ * If no more entries after this one, mark the end
+ */
+ if (!left)
+ sg_mark_end(&sg[sg_size - 1]);
+
+ prv = sg;
+ } while (left);
+
+ return 0;
+}
+
+/**
+ * sg_alloc_table - Allocate and initialize an sg table
+ * @table: The sg table header to use
+ * @nents: Number of entries in sg list
+ * @gfp_mask: GFP allocation mask
+ *
+ * Description:
+ * Allocate and initialize an sg table. If @nents@ is larger than
+ * SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
+ *
+ **/
+
+static inline int
+sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
+{
+ int ret;
+
+ ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
+ gfp_mask);
+ if (unlikely(ret))
+ __sg_free_table(table, SG_MAX_SINGLE_ALLOC);
+
+ return ret;
+}
+
#define for_each_sg(sglist, sg, sgmax, _itr) \
for (_itr = 0, sg = (sglist); _itr < (sgmax); _itr++, sg = sg_next(sg))
Modified: trunk/sys/ofed/include/linux/sched.h
===================================================================
--- trunk/sys/ofed/include/linux/sched.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/sched.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/semaphore.h
===================================================================
--- trunk/sys/ofed/include/linux/semaphore.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/semaphore.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/slab.h
===================================================================
--- trunk/sys/ofed/include/linux/slab.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/slab.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,11 +39,18 @@
MALLOC_DECLARE(M_KMALLOC);
-#define kmalloc(size, flags) malloc((size), M_KMALLOC, (flags))
-#define kzalloc(size, flags) kmalloc((size), (flags) | M_ZERO)
-#define kfree(ptr) free(__DECONST(void *, (ptr)), M_KMALLOC)
-#define krealloc(ptr, size, flags) realloc((ptr), (size), M_KMALLOC, (flags))
-#define kcalloc(n, size, flags) kmalloc((n) * (size), flags | M_ZERO)
+#define kmalloc(size, flags) malloc((size), M_KMALLOC, (flags))
+#define kvmalloc(size) kmalloc((size), 0)
+#define kzalloc(size, flags) kmalloc((size), (flags) | M_ZERO)
+#define kzalloc_node(size, flags, node) kzalloc(size, flags)
+#define kfree(ptr) free(__DECONST(void *, (ptr)), M_KMALLOC)
+#define krealloc(ptr, size, flags) realloc((ptr), (size), M_KMALLOC, (flags))
+#define kcalloc(n, size, flags) kmalloc((n) * (size), flags | M_ZERO)
+#define vzalloc(size) kzalloc(size, GFP_KERNEL | __GFP_NOWARN)
+#define vfree(arg) kfree(arg)
+#define kvfree(arg) kfree(arg)
+#define vmalloc(size) kmalloc(size, GFP_KERNEL)
+#define vmalloc_node(size, node) kmalloc(size, GFP_KERNEL)
struct kmem_cache {
uma_zone_t cache_zone;
Modified: trunk/sys/ofed/include/linux/socket.h
===================================================================
--- trunk/sys/ofed/include/linux/socket.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/socket.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/spinlock.h
===================================================================
--- trunk/sys/ofed/include/linux/spinlock.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/spinlock.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,7 +36,6 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
-#include <linux/lockdep.h>
#include <linux/rwlock.h>
typedef struct {
Modified: trunk/sys/ofed/include/linux/string.h
===================================================================
--- trunk/sys/ofed/include/linux/string.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/string.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,6 +36,9 @@
#include <sys/libkern.h>
+#define strnicmp strncasecmp
+
+
static inline void *
kmemdup(const void *src, size_t len, gfp_t gfp)
{
Modified: trunk/sys/ofed/include/linux/sysfs.h
===================================================================
--- trunk/sys/ofed/include/linux/sysfs.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/sysfs.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -75,39 +76,45 @@
struct kobject *kobj;
struct attribute *attr;
const struct sysfs_ops *ops;
- void *buf;
+ char *buf;
int error;
ssize_t len;
kobj = arg1;
attr = (struct attribute *)arg2;
- buf = (void *)get_zeroed_page(GFP_KERNEL);
- len = 1; /* Copy out a NULL byte at least. */
if (kobj->ktype == NULL || kobj->ktype->sysfs_ops == NULL)
return (ENODEV);
- ops = kobj->ktype->sysfs_ops;
+ buf = (char *)get_zeroed_page(GFP_KERNEL);
if (buf == NULL)
return (ENOMEM);
+ ops = kobj->ktype->sysfs_ops;
if (ops->show) {
len = ops->show(kobj, attr, buf);
/*
- * It's valid not to have a 'show' so we just return 1 byte
- * of NULL.
+ * It's valid to not have a 'show' so just return an
+ * empty string.
*/
if (len < 0) {
error = -len;
- len = 1;
if (error != EIO)
goto out;
+ buf[0] = '\0';
+ } else if (len) {
+ len--;
+ if (len >= PAGE_SIZE)
+ len = PAGE_SIZE - 1;
+ /* Trim trailing newline. */
+ buf[len] = '\0';
}
}
- error = SYSCTL_OUT(req, buf, len);
- if (error || !req->newptr || ops->store == NULL)
+
+ /* Leave one trailing byte to append a newline. */
+ error = sysctl_handle_string(oidp, buf, PAGE_SIZE - 1, req);
+ if (error != 0 || req->newptr == NULL || ops->store == NULL)
goto out;
- error = SYSCTL_IN(req, buf, PAGE_SIZE);
- if (error)
- goto out;
- len = ops->store(kobj, attr, buf, req->newlen);
+ len = strlcat(buf, "\n", PAGE_SIZE);
+ KASSERT(len < PAGE_SIZE, ("new attribute truncated"));
+ len = ops->store(kobj, attr, buf, len);
if (len < 0)
error = -len;
out:
@@ -179,4 +186,6 @@
sysctl_remove_oid(kobj->oidp, 1, 1);
}
+#define sysfs_attr_init(attr) do {} while(0)
+
#endif /* _LINUX_SYSFS_H_ */
Modified: trunk/sys/ofed/include/linux/timer.h
===================================================================
--- trunk/sys/ofed/include/linux/timer.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/timer.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,7 +27,7 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _LINUX_TIMER_H_
-#define _LINUX_TIMER_H_
+#define _LINUX_TIMER_H_
#include <linux/types.h>
@@ -35,22 +36,14 @@
#include <sys/callout.h>
struct timer_list {
- struct callout timer_callout;
- void (*function)(unsigned long);
- unsigned long data;
+ struct callout timer_callout;
+ void (*function) (unsigned long);
+ unsigned long data;
+ unsigned long expires;
};
-#define expires timer_callout.c_time
+extern unsigned long linux_timer_hz_mask;
-static inline void
-_timer_fn(void *context)
-{
- struct timer_list *timer;
-
- timer = context;
- timer->function(timer->data);
-}
-
#define setup_timer(timer, func, dat) \
do { \
(timer)->function = (func); \
@@ -65,23 +58,15 @@
callout_init(&(timer)->timer_callout, CALLOUT_MPSAFE); \
} while (0)
-#define mod_timer(timer, expire) \
- callout_reset(&(timer)->timer_callout, (expire) - jiffies, \
- _timer_fn, (timer))
+extern void mod_timer(struct timer_list *, unsigned long);
+extern void add_timer(struct timer_list *);
-#define add_timer(timer) \
- callout_reset(&(timer)->timer_callout, \
- (timer)->timer_callout.c_time - jiffies, _timer_fn, (timer))
-
#define del_timer(timer) callout_stop(&(timer)->timer_callout)
#define del_timer_sync(timer) callout_drain(&(timer)->timer_callout)
-
#define timer_pending(timer) callout_pending(&(timer)->timer_callout)
+#define round_jiffies(j) \
+ ((unsigned long)(((j) + linux_timer_hz_mask) & ~linux_timer_hz_mask))
+#define round_jiffies_relative(j) \
+ round_jiffies(j)
-static inline unsigned long
-round_jiffies(unsigned long j)
-{
- return roundup(j, hz);
-}
-
-#endif /* _LINUX_TIMER_H_ */
+#endif /* _LINUX_TIMER_H_ */
Modified: trunk/sys/ofed/include/linux/types.h
===================================================================
--- trunk/sys/ofed/include/linux/types.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/types.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,27 +31,35 @@
#include <sys/cdefs.h>
#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
#include <linux/compiler.h>
#include <asm/types.h>
-typedef __u16 __le16;
-typedef __u16 __be16;
-typedef __u32 __le32;
-typedef __u32 __be32;
-typedef __u64 __le64;
-typedef __u64 __be64;
-#ifndef __bool_true_false_are_defined
-typedef _Bool bool;
-#define true TRUE
-#define false FALSE
+#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+
+#ifndef __bitwise__
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
#endif
+#endif
-typedef unsigned long kernel_ulong_t;
+typedef uint16_t __le16;
+typedef uint16_t __be16;
+typedef uint32_t __le32;
+typedef uint32_t __be32;
+typedef uint64_t __le64;
+typedef uint64_t __be64;
+
typedef unsigned int uint;
typedef unsigned gfp_t;
typedef uint64_t loff_t;
typedef vm_paddr_t resource_size_t;
+typedef u64 phys_addr_t;
+
#define DECLARE_BITMAP(n, bits) \
unsigned long n[howmany(bits, sizeof(long) * 8)]
Modified: trunk/sys/ofed/include/linux/uaccess.h
===================================================================
--- trunk/sys/ofed/include/linux/uaccess.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/uaccess.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/vmalloc.h
===================================================================
--- trunk/sys/ofed/include/linux/vmalloc.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/vmalloc.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,7 +30,7 @@
#ifndef _LINUX_VMALLOC_H_
#define _LINUX_VMALLOC_H_
-#include <asm/page.h>
+#include <linux/page.h>
#define VM_MAP 0x0000
#define PAGE_KERNEL 0x0000
Modified: trunk/sys/ofed/include/linux/wait.h
===================================================================
--- trunk/sys/ofed/include/linux/wait.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/wait.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Modified: trunk/sys/ofed/include/linux/workqueue.h
===================================================================
--- trunk/sys/ofed/include/linux/workqueue.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/linux/workqueue.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -80,7 +81,7 @@
callout_init(&(_work)->timer, CALLOUT_MPSAFE); \
} while (0)
-#define INIT_DELAYED_WORK_DEFERRABLE INIT_DELAYED_WORK
+#define INIT_DEFERRABLE_WORK INIT_DELAYED_WORK
#define schedule_work(work) \
do { \
@@ -90,11 +91,12 @@
#define flush_scheduled_work() flush_taskqueue(taskqueue_thread)
-#define queue_work(q, work) \
-do { \
- (work)->taskqueue = (q)->taskqueue; \
- taskqueue_enqueue((q)->taskqueue, &(work)->work_task); \
-} while (0)
+static inline int queue_work (struct workqueue_struct *q, struct work_struct *work)
+{
+ (work)->taskqueue = (q)->taskqueue;
+ /* Return opposite val to align with Linux logic */
+ return !taskqueue_enqueue((q)->taskqueue, &(work)->work_task);
+}
static inline void
_delayed_work_fn(void *arg)
@@ -121,6 +123,14 @@
return (!pending);
}
+static inline bool schedule_delayed_work(struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct workqueue_struct wq;
+ wq.taskqueue = taskqueue_thread;
+ return queue_delayed_work(&wq, dwork, delay);
+}
+
static inline struct workqueue_struct *
_create_workqueue_common(char *name, int cpus)
{
@@ -129,7 +139,7 @@
wq = kmalloc(sizeof(*wq), M_WAITOK);
wq->taskqueue = taskqueue_create((name), M_WAITOK,
taskqueue_thread_enqueue, &wq->taskqueue);
- taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, (name));
+ taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name);
return (wq);
}
@@ -184,10 +194,30 @@
{
callout_stop(&work->timer);
- if (work->work.taskqueue &&
- taskqueue_cancel(work->work.taskqueue, &work->work.work_task, NULL))
- taskqueue_drain(work->work.taskqueue, &work->work.work_task);
+ if (work->work.taskqueue)
+ return (taskqueue_cancel(work->work.taskqueue,
+ &work->work.work_task, NULL) == 0);
return 0;
}
+static inline int
+cancel_delayed_work_sync(struct delayed_work *work)
+{
+
+ callout_drain(&work->timer);
+ if (work->work.taskqueue &&
+ taskqueue_cancel(work->work.taskqueue, &work->work.work_task, NULL))
+ taskqueue_drain(work->work.taskqueue, &work->work.work_task);
+ return 0;
+}
+
+static inline bool
+mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
+ unsigned long delay)
+{
+ cancel_delayed_work(dwork);
+ queue_delayed_work(wq, dwork, delay);
+ return false;
+}
+
#endif /* _LINUX_WORKQUEUE_H_ */
Added: trunk/sys/ofed/include/net/if_inet6.h
===================================================================
--- trunk/sys/ofed/include/net/if_inet6.h (rev 0)
+++ trunk/sys/ofed/include/net/if_inet6.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iX Systems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_INET6_H_
+#define _NET_IF_INET6_H_
+
+static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf)
+{
+/*
+ * +-------+-------+-------+-------+-------+-------+
+ * | 33 | 33 | DST13 | DST14 | DST15 | DST16 |
+ * +-------+-------+-------+-------+-------+-------+
+ */
+
+ buf[0]= 0x33;
+ buf[1]= 0x33;
+
+ memcpy(buf + 2, &addr->s6_addr32[3], sizeof(__u32));
+}
+
+#endif /* _NET_IF_INET6_H_ */
Property changes on: trunk/sys/ofed/include/net/if_inet6.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/net/ip.h
===================================================================
--- trunk/sys/ofed/include/net/ip.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/net/ip.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,11 +42,17 @@
#include <netinet/in.h>
#include <netinet/in_pcb.h>
-#ifdef INET
static inline void inet_get_local_port_range(int *low, int *high)
{
+#ifdef INET
+ CURVNET_SET_QUIET(TD_TO_VNET(curthread));
*low = V_ipport_firstauto;
*high = V_ipport_lastauto;
+ CURVNET_RESTORE();
+#else
+ *low = IPPORT_EPHEMERALFIRST; /* 10000 */
+ *high = IPPORT_EPHEMERALLAST; /* 65535 */
+#endif
}
static inline void
@@ -71,11 +78,10 @@
buf[13] = 0;
buf[14] = 0;
buf[15] = 0;
- buf[16] = (addr >> 24) & 0x0f;
+ buf[16] = (addr >> 24) & 0xff;
buf[17] = (addr >> 16) & 0xff;
buf[18] = (addr >> 8) & 0xff;
buf[19] = addr & 0xff;
}
-#endif
#endif /* _LINUX_NET_IP_H_ */
Modified: trunk/sys/ofed/include/net/ipv6.h
===================================================================
--- trunk/sys/ofed/include/net/ipv6.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/net/ipv6.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,4 +58,53 @@
}
#endif
+static inline void __ipv6_addr_set_half(__be32 *addr,
+ __be32 wh, __be32 wl)
+{
+#if BITS_PER_LONG == 64
+#if defined(__BIG_ENDIAN)
+ if (__builtin_constant_p(wh) && __builtin_constant_p(wl)) {
+ *(__force u64 *)addr = ((__force u64)(wh) << 32 | (__force u64)(wl));
+ return;
+ }
+#elif defined(__LITTLE_ENDIAN)
+ if (__builtin_constant_p(wl) && __builtin_constant_p(wh)) {
+ *(__force u64 *)addr = ((__force u64)(wl) << 32 | (__force u64)(wh));
+ return;
+ }
+#endif
+#endif
+ addr[0] = wh;
+ addr[1] = wl;
+}
+
+static inline void ipv6_addr_set(struct in6_addr *addr,
+ __be32 w1, __be32 w2,
+ __be32 w3, __be32 w4)
+{
+ __ipv6_addr_set_half(&addr->s6_addr32[0], w1, w2);
+ __ipv6_addr_set_half(&addr->s6_addr32[2], w3, w4);
+}
+
+static inline void ipv6_addr_set_v4mapped(const __be32 addr,
+ struct in6_addr *v4mapped)
+{
+ ipv6_addr_set(v4mapped,
+ 0, 0,
+ htonl(0x0000FFFF),
+ addr);
+}
+
+static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
+{
+ return ((a->s6_addr32[0] | a->s6_addr32[1] |
+ (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0);
+}
+
+static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+ return memcmp(a1, a2, sizeof(struct in6_addr));
+}
+
+
#endif /* _LINUX_NET_IPV6_H_ */
Modified: trunk/sys/ofed/include/net/netevent.h
===================================================================
--- trunk/sys/ofed/include/net/netevent.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/net/netevent.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -42,7 +43,7 @@
struct llentry;
static inline void
-_handle_arp_update_event(void *arg, struct llentry *lle)
+_handle_arp_update_event(void *arg, struct llentry *lle, int evt __unused)
{
struct notifier_block *nb;
@@ -54,7 +55,7 @@
register_netevent_notifier(struct notifier_block *nb)
{
nb->tags[NETEVENT_NEIGH_UPDATE] = EVENTHANDLER_REGISTER(
- arp_update_event, _handle_arp_update_event, nb, 0);
+ lle_event, _handle_arp_update_event, nb, 0);
return (0);
}
@@ -62,8 +63,7 @@
unregister_netevent_notifier(struct notifier_block *nb)
{
- EVENTHANDLER_DEREGISTER(arp_update_event,
- nb->tags[NETEVENT_NEIGH_UPDATE]);
+ EVENTHANDLER_DEREGISTER(lle_event, nb->tags[NETEVENT_NEIGH_UPDATE]);
return (0);
}
Modified: trunk/sys/ofed/include/net/tcp.h
===================================================================
--- trunk/sys/ofed/include/net/tcp.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/net/tcp.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -2,6 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Index: trunk/sys/ofed/include/rdma/Kbuild
===================================================================
--- trunk/sys/ofed/include/rdma/Kbuild 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/Kbuild 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/Kbuild
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/ib_addr.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_addr.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_addr.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -41,7 +41,6 @@
#include <linux/socket.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
-#include <linux/ethtool.h>
#include <linux/if_vlan.h>
struct rdma_addr_client {
Property changes on: trunk/sys/ofed/include/rdma/ib_addr.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/rdma/ib_cache.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_cache.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_cache.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/ib_cache.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/ib_cm.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_cm.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_cm.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -38,6 +38,9 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_sa.h>
+/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
+extern struct class cm_class;
+
enum ib_cm_state {
IB_CM_IDLE,
IB_CM_LISTEN,
@@ -259,6 +262,18 @@
void *private_data;
};
+#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
+#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
+#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
+#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
+#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
+#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
+#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
+#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
+#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
+#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
+#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
+
/**
* ib_cm_handler - User-defined callback to process communication events.
* @cm_id: Communication identifier associated with the reported event.
Property changes on: trunk/sys/ofed/include/rdma/ib_cm.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/rdma/ib_fmr_pool.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_fmr_pool.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_fmr_pool.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/ib_fmr_pool.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/ib_mad.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_mad.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_mad.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -151,7 +151,7 @@
typedef u64 __bitwise ib_sa_comp_mask;
-#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n))
+#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n)))
/*
* ib_sa_hdr and ib_sa_mad structures must be packed because they have
Property changes on: trunk/sys/ofed/include/rdma/ib_mad.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/rdma/ib_marshall.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_marshall.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_marshall.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/ib_marshall.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/rdma/ib_pack.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_pack.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_pack.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/ib_pack.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Added: trunk/sys/ofed/include/rdma/ib_pma.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_pma.h (rev 0)
+++ trunk/sys/ofed/include/rdma/ib_pma.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_PMA_H)
+#define IB_PMA_H
+
+#include <rdma/ib_mad.h>
+
+/*
+ * PMA class portinfo capability mask bits
+ */
+#define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9)
+#define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12)
+
+#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001)
+#define IB_PMA_PORT_SAMPLES_CONTROL cpu_to_be16(0x0010)
+#define IB_PMA_PORT_SAMPLES_RESULT cpu_to_be16(0x0011)
+#define IB_PMA_PORT_COUNTERS cpu_to_be16(0x0012)
+#define IB_PMA_PORT_COUNTERS_EXT cpu_to_be16(0x001D)
+#define IB_PMA_PORT_SAMPLES_RESULT_EXT cpu_to_be16(0x001E)
+
+struct ib_pma_mad {
+ struct ib_mad_hdr mad_hdr;
+ u8 reserved[40];
+ u8 data[192];
+} __packed;
+
+struct ib_pma_portsamplescontrol {
+ u8 opcode;
+ u8 port_select;
+ u8 tick;
+ u8 counter_width; /* resv: 7:3, counter width: 2:0 */
+ __be32 counter_mask0_9; /* 2, 10 3-bit fields */
+ __be16 counter_mask10_14; /* 1, 5 3-bit fields */
+ u8 sample_mechanisms;
+ u8 sample_status; /* only lower 2 bits */
+ __be64 option_mask;
+ __be64 vendor_mask;
+ __be32 sample_start;
+ __be32 sample_interval;
+ __be16 tag;
+ __be16 counter_select[15];
+ __be32 reserved1;
+ __be64 samples_only_option_mask;
+ __be32 reserved2[28];
+};
+
+struct ib_pma_portsamplesresult {
+ __be16 tag;
+ __be16 sample_status; /* only lower 2 bits */
+ __be32 counter[15];
+};
+
+struct ib_pma_portsamplesresult_ext {
+ __be16 tag;
+ __be16 sample_status; /* only lower 2 bits */
+ __be32 extended_width; /* only upper 2 bits */
+ __be64 counter[15];
+};
+
+struct ib_pma_portcounters {
+ u8 reserved;
+ u8 port_select;
+ __be16 counter_select;
+ __be16 symbol_error_counter;
+ u8 link_error_recovery_counter;
+ u8 link_downed_counter;
+ __be16 port_rcv_errors;
+ __be16 port_rcv_remphys_errors;
+ __be16 port_rcv_switch_relay_errors;
+ __be16 port_xmit_discards;
+ u8 port_xmit_constraint_errors;
+ u8 port_rcv_constraint_errors;
+ u8 reserved1;
+ u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */
+ __be16 reserved2;
+ __be16 vl15_dropped;
+ __be32 port_xmit_data;
+ __be32 port_rcv_data;
+ __be32 port_xmit_packets;
+ __be32 port_rcv_packets;
+ __be32 port_xmit_wait;
+} __packed;
+
+
+#define IB_PMA_SEL_SYMBOL_ERROR cpu_to_be16(0x0001)
+#define IB_PMA_SEL_LINK_ERROR_RECOVERY cpu_to_be16(0x0002)
+#define IB_PMA_SEL_LINK_DOWNED cpu_to_be16(0x0004)
+#define IB_PMA_SEL_PORT_RCV_ERRORS cpu_to_be16(0x0008)
+#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS cpu_to_be16(0x0010)
+#define IB_PMA_SEL_PORT_XMIT_DISCARDS cpu_to_be16(0x0040)
+#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS cpu_to_be16(0x0200)
+#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS cpu_to_be16(0x0400)
+#define IB_PMA_SEL_PORT_VL15_DROPPED cpu_to_be16(0x0800)
+#define IB_PMA_SEL_PORT_XMIT_DATA cpu_to_be16(0x1000)
+#define IB_PMA_SEL_PORT_RCV_DATA cpu_to_be16(0x2000)
+#define IB_PMA_SEL_PORT_XMIT_PACKETS cpu_to_be16(0x4000)
+#define IB_PMA_SEL_PORT_RCV_PACKETS cpu_to_be16(0x8000)
+
+struct ib_pma_portcounters_ext {
+ u8 reserved;
+ u8 port_select;
+ __be16 counter_select;
+ __be32 reserved1;
+ __be64 port_xmit_data;
+ __be64 port_rcv_data;
+ __be64 port_xmit_packets;
+ __be64 port_rcv_packets;
+ __be64 port_unicast_xmit_packets;
+ __be64 port_unicast_rcv_packets;
+ __be64 port_multicast_xmit_packets;
+ __be64 port_multicast_rcv_packets;
+} __packed;
+
+#define IB_PMA_SELX_PORT_XMIT_DATA cpu_to_be16(0x0001)
+#define IB_PMA_SELX_PORT_RCV_DATA cpu_to_be16(0x0002)
+#define IB_PMA_SELX_PORT_XMIT_PACKETS cpu_to_be16(0x0004)
+#define IB_PMA_SELX_PORT_RCV_PACKETS cpu_to_be16(0x0008)
+#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS cpu_to_be16(0x0010)
+#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS cpu_to_be16(0x0020)
+#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS cpu_to_be16(0x0040)
+#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS cpu_to_be16(0x0080)
+
+#endif /* IB_PMA_H */
Property changes on: trunk/sys/ofed/include/rdma/ib_pma.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/ib_sa.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_sa.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_sa.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -372,6 +372,28 @@
u8 padding[49];
};
+#define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0)
+#define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1)
+#define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2)
+#define IB_SA_GUIDINFO_REC_RES2 IB_SA_COMP_MASK(3)
+#define IB_SA_GUIDINFO_REC_GID0 IB_SA_COMP_MASK(4)
+#define IB_SA_GUIDINFO_REC_GID1 IB_SA_COMP_MASK(5)
+#define IB_SA_GUIDINFO_REC_GID2 IB_SA_COMP_MASK(6)
+#define IB_SA_GUIDINFO_REC_GID3 IB_SA_COMP_MASK(7)
+#define IB_SA_GUIDINFO_REC_GID4 IB_SA_COMP_MASK(8)
+#define IB_SA_GUIDINFO_REC_GID5 IB_SA_COMP_MASK(9)
+#define IB_SA_GUIDINFO_REC_GID6 IB_SA_COMP_MASK(10)
+#define IB_SA_GUIDINFO_REC_GID7 IB_SA_COMP_MASK(11)
+
+struct ib_sa_guidinfo_rec {
+ __be16 lid;
+ u8 block_num;
+ /* reserved */
+ u8 res1;
+ __be32 res2;
+ u8 guid_info_list[64];
+};
+
struct ib_sa_client {
atomic_t users;
struct completion comp;
@@ -556,4 +578,16 @@
*/
void ib_sa_unregister_inform_info(struct ib_inform_info *info);
+int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_guidinfo_rec *rec,
+ ib_sa_comp_mask comp_mask, u8 method,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_guidinfo_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+
#endif /* IB_SA_H */
Property changes on: trunk/sys/ofed/include/rdma/ib_sa.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/ib_smi.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_smi.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_smi.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -38,6 +38,7 @@
#define IB_SMI_H
#include <rdma/ib_mad.h>
+#include <asm/byteorder.h>
#define IB_SMP_DATA_SIZE 64
#define IB_SMP_MAX_PATH_HOPS 64
Property changes on: trunk/sys/ofed/include/rdma/ib_smi.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/ib_umem.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_umem.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_umem.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -39,6 +39,7 @@
#include <linux/dma-attrs.h>
struct ib_ucontext;
+struct vm_area_struct;
struct ib_umem {
struct ib_ucontext *context;
@@ -57,6 +58,24 @@
unsigned long diff;
};
+struct ib_cmem {
+
+ struct ib_ucontext *context;
+ size_t length;
+ /* Link list of contiguous blocks being part of that cmem */
+ struct list_head ib_cmem_block;
+
+ /* Order of cmem block, 2^ block_order will equal number
+ of physical pages per block
+ */
+ unsigned long block_order;
+ /* Refernce counter for that memory area
+ - When value became 0 pages will be returned to the kernel.
+ */
+ struct kref refcount;
+};
+
+
struct ib_umem_chunk {
struct list_head list;
int nents;
@@ -70,4 +89,14 @@
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
+int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem,
+ struct vm_area_struct *vma);
+struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context,
+ unsigned long total_size,
+ unsigned long page_size_order);
+void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem);
+int ib_umem_map_to_vma(struct ib_umem *umem,
+ struct vm_area_struct *vma);
+
+
#endif /* IB_UMEM_H */
Property changes on: trunk/sys/ofed/include/rdma/ib_umem.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/ib_user_cm.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_user_cm.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_user_cm.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -34,6 +34,7 @@
#ifndef IB_USER_CM_H
#define IB_USER_CM_H
+#include <linux/types.h>
#include <rdma/ib_user_sa.h>
#define IB_USER_CM_ABI_VERSION 5
Property changes on: trunk/sys/ofed/include/rdma/ib_user_cm.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/rdma/ib_user_mad.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_user_mad.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_user_mad.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/ib_user_mad.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/rdma/ib_user_sa.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_user_sa.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_user_sa.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/ib_user_sa.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/ib_user_verbs.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_user_verbs.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_user_verbs.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -82,9 +82,13 @@
IB_USER_VERBS_CMD_QUERY_SRQ,
IB_USER_VERBS_CMD_DESTROY_SRQ,
IB_USER_VERBS_CMD_POST_SRQ_RECV,
+ IB_USER_VERBS_CMD_OPEN_XRCD,
+ IB_USER_VERBS_CMD_CLOSE_XRCD,
+ IB_USER_VERBS_CMD_CREATE_XSRQ,
+ IB_USER_VERBS_CMD_OPEN_QP,
+ IB_USER_VERBS_CMD_ATTACH_FLOW,
+ IB_USER_VERBS_CMD_DETACH_FLOW,
IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
- IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN,
- IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN,
IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP,
IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP,
IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP,
@@ -230,6 +234,21 @@
__u32 pd_handle;
};
+struct ib_uverbs_open_xrcd {
+ __u64 response;
+ __u32 fd;
+ __u32 oflags;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_open_xrcd_resp {
+ __u32 xrcd_handle;
+};
+
+struct ib_uverbs_close_xrcd {
+ __u32 xrcd_handle;
+};
+
struct ib_uverbs_reg_mr {
__u64 response;
__u64 start;
@@ -412,6 +431,17 @@
__u64 driver_data[0];
};
+struct ib_uverbs_open_qp {
+ __u64 response;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 qpn;
+ __u8 qp_type;
+ __u8 reserved[7];
+ __u64 driver_data[0];
+};
+
+/* also used for open response */
struct ib_uverbs_create_qp_resp {
__u32 qp_handle;
__u32 qpn;
@@ -569,6 +599,16 @@
} wr;
};
+struct ibv_uverbs_flow_spec {
+ __u32 type;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+ __u8 l4_protocol;
+ __u8 block_mc_loopback;
+};
+
struct ib_uverbs_post_send {
__u64 response;
__u32 qp_handle;
@@ -646,6 +686,45 @@
__u64 driver_data[0];
};
+struct ibv_kern_flow_spec {
+ __u32 type;
+ __u32 reserved1;
+ union {
+ struct {
+ __be16 ethertype;
+ __be16 vlan;
+ __u8 vlan_present;
+ __u8 mac[6];
+ __u8 port;
+ } eth;
+ struct {
+ __be32 qpn;
+ } ib_uc;
+ struct {
+ __u8 mgid[16];
+ } ib_mc;
+ } l2_id;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+ __u8 l4_protocol;
+ __u8 block_mc_loopback;
+ __u8 reserved[2];
+};
+
+struct ib_uverbs_attach_flow {
+ __u32 qp_handle;
+ __u32 priority;
+ struct ibv_kern_flow_spec spec;
+};
+
+struct ib_uverbs_detach_flow {
+ __u32 qp_handle;
+ __u32 priority;
+ struct ibv_kern_flow_spec spec;
+};
+
struct ib_uverbs_create_srq {
__u64 response;
__u64 user_handle;
@@ -656,15 +735,17 @@
__u64 driver_data[0];
};
-struct ib_uverbs_create_xrc_srq {
+struct ib_uverbs_create_xsrq {
__u64 response;
__u64 user_handle;
+ __u32 srq_type;
__u32 pd_handle;
__u32 max_wr;
__u32 max_sge;
__u32 srq_limit;
+ __u32 reserved;
__u32 xrcd_handle;
- __u32 xrc_cq;
+ __u32 cq_handle;
__u64 driver_data[0];
};
@@ -672,7 +753,7 @@
__u32 srq_handle;
__u32 max_wr;
__u32 max_sge;
- __u32 reserved;
+ __u32 srqn;
};
struct ib_uverbs_modify_srq {
Property changes on: trunk/sys/ofed/include/rdma/ib_user_verbs.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/ib_verbs.h
===================================================================
--- trunk/sys/ofed/include/rdma/ib_verbs.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/ib_verbs.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -47,12 +47,14 @@
#include <linux/list.h>
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
-#include <asm/atomic.h>
#include <asm/uaccess.h>
#include <linux/rbtree.h>
#include <linux/mutex.h>
+extern struct workqueue_struct *ib_wq;
+
union ib_gid {
u8 raw[16];
struct {
@@ -114,6 +116,11 @@
IB_DEVICE_XRC = (1<<20),
IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
+ IB_DEVICE_MR_ALLOCATE = (1<<23),
+ IB_DEVICE_SHARED_MR = (1<<24),
+ IB_DEVICE_QPG = (1<<25),
+ IB_DEVICE_UD_RSS = (1<<26),
+ IB_DEVICE_UD_TSS = (1<<27)
};
enum ib_atomic_cap {
@@ -161,6 +168,7 @@
int max_srq_wr;
int max_srq_sge;
unsigned int max_fast_reg_page_list_len;
+ int max_rss_tbl_sz;
u16 max_pkeys;
u8 local_ca_ack_delay;
};
@@ -207,6 +215,7 @@
IB_PORT_SM_DISABLED = 1 << 10,
IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+ IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
IB_PORT_CM_SUP = 1 << 16,
IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
IB_PORT_REINIT_SUP = 1 << 18,
@@ -237,6 +246,15 @@
}
}
+enum ib_port_speed {
+ IB_SPEED_SDR = 1,
+ IB_SPEED_DDR = 2,
+ IB_SPEED_QDR = 4,
+ IB_SPEED_FDR10 = 8,
+ IB_SPEED_FDR = 16,
+ IB_SPEED_EDR = 32
+};
+
struct ib_protocol_stats {
/* TBD... */
};
@@ -421,7 +439,15 @@
IB_RATE_40_GBPS = 7,
IB_RATE_60_GBPS = 8,
IB_RATE_80_GBPS = 9,
- IB_RATE_120_GBPS = 10
+ IB_RATE_120_GBPS = 10,
+ IB_RATE_14_GBPS = 11,
+ IB_RATE_56_GBPS = 12,
+ IB_RATE_112_GBPS = 13,
+ IB_RATE_168_GBPS = 14,
+ IB_RATE_25_GBPS = 15,
+ IB_RATE_100_GBPS = 16,
+ IB_RATE_200_GBPS = 17,
+ IB_RATE_300_GBPS = 18
};
/**
@@ -433,6 +459,13 @@
int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
/**
+ * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
+ * For example, IB_RATE_2_5_GBPS will be converted to 2500.
+ * @rate: rate to convert.
+ */
+int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
+
+/**
* mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
* enum.
* @mult: multiple to convert.
@@ -498,6 +531,7 @@
IB_WC_GRH = 1,
IB_WC_WITH_IMM = (1<<1),
IB_WC_WITH_INVALIDATE = (1<<2),
+ IB_WC_IP_CSUM_OK = (1<<3),
};
struct ib_wc {
@@ -528,6 +562,11 @@
IB_CQ_REPORT_MISSED_EVENTS = 1 << 2,
};
+enum ib_srq_type {
+ IB_SRQT_BASIC,
+ IB_SRQT_XRC
+};
+
enum ib_srq_attr_mask {
IB_SRQ_MAX_WR = 1 << 0,
IB_SRQ_LIMIT = 1 << 1,
@@ -543,6 +582,14 @@
void (*event_handler)(struct ib_event *, void *);
void *srq_context;
struct ib_srq_attr attr;
+ enum ib_srq_type srq_type;
+
+ union {
+ struct {
+ struct ib_xrcd *xrcd;
+ struct ib_cq *cq;
+ } xrc;
+ } ext;
};
struct ib_qp_cap {
@@ -551,6 +598,7 @@
u32 max_send_sge;
u32 max_recv_sge;
u32 max_inline_data;
+ u32 qpg_tss_mask_sz;
};
enum ib_sig_type {
@@ -572,15 +620,34 @@
IB_QPT_UD,
IB_QPT_XRC,
IB_QPT_RAW_IPV6,
- IB_QPT_RAW_ETY,
- IB_QPT_RAW_ETH
+ IB_QPT_RAW_ETHERTYPE,
+ IB_QPT_RAW_PACKET = 8,
+ IB_QPT_XRC_INI = 9,
+ IB_QPT_XRC_TGT,
+ IB_QPT_MAX,
};
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ IB_QP_CREATE_NETIF_QP = 1 << 2,
+ /* reserve bits 26-31 for low level drivers' internal use */
+ IB_QP_CREATE_RESERVED_START = 1 << 26,
+ IB_QP_CREATE_RESERVED_END = 1 << 31,
};
+enum ib_qpg_type {
+ IB_QPG_NONE = 0,
+ IB_QPG_PARENT = (1<<0),
+ IB_QPG_CHILD_RX = (1<<1),
+ IB_QPG_CHILD_TX = (1<<2)
+};
+
+struct ib_qpg_init_attrib {
+ u32 tss_child_count;
+ u32 rss_child_count;
+};
+
struct ib_qp_init_attr {
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
@@ -587,14 +654,26 @@
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_srq *srq;
+ struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct ib_qp_cap cap;
+ union {
+ struct ib_qp *qpg_parent; /* see qpg_type */
+ struct ib_qpg_init_attrib parent_attrib;
+ } pp;
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
enum ib_qp_create_flags create_flags;
- struct ib_xrcd *xrc_domain; /* XRC qp's only */
+ enum ib_qpg_type qpg_type;
u8 port_num; /* special QP types only */
};
+struct ib_qp_open_attr {
+ void (*event_handler)(struct ib_event *, void *);
+ void *qp_context;
+ u32 qp_num;
+ enum ib_qp_type qp_type;
+};
+
enum ib_rnr_timeout {
IB_RNR_TIMER_655_36 = 0,
IB_RNR_TIMER_000_01 = 1,
@@ -651,7 +730,8 @@
IB_QP_MAX_DEST_RD_ATOMIC = (1<<17),
IB_QP_PATH_MIG_STATE = (1<<18),
IB_QP_CAP = (1<<19),
- IB_QP_DEST_QPN = (1<<20)
+ IB_QP_DEST_QPN = (1<<20),
+ IB_QP_GROUP_RSS = (1<<21)
};
enum ib_qp_state {
@@ -724,6 +804,20 @@
IB_SEND_IP_CSUM = (1<<4)
};
+enum ib_flow_types {
+ IB_FLOW_ETH = 0,
+ IB_FLOW_IB_UC = 1,
+ IB_FLOW_IB_MC_IPV4 = 2,
+ IB_FLOW_IB_MC_IPV6 = 3
+};
+
+enum {
+ IB_FLOW_L4_NONE = 0,
+ IB_FLOW_L4_OTHER = 3,
+ IB_FLOW_L4_UDP = 5,
+ IB_FLOW_L4_TCP = 6
+};
+
struct ib_sge {
u64 addr;
u32 length;
@@ -785,7 +879,7 @@
u8 static_rate;
} raw_ety;
} wr;
- u32 xrc_remote_srq_num; /* valid for XRC sends only */
+ u32 xrc_remote_srq_num; /* XRC TGT QPs only */
};
struct ib_recv_wr {
@@ -800,7 +894,15 @@
IB_ACCESS_REMOTE_WRITE = (1<<1),
IB_ACCESS_REMOTE_READ = (1<<2),
IB_ACCESS_REMOTE_ATOMIC = (1<<3),
- IB_ACCESS_MW_BIND = (1<<4)
+ IB_ACCESS_MW_BIND = (1<<4),
+ IB_ACCESS_ALLOCATE_MR = (1<<5),
+ IB_ACCESS_SHARED_MR_USER_READ = (1<<6),
+ IB_ACCESS_SHARED_MR_USER_WRITE = (1<<7),
+ IB_ACCESS_SHARED_MR_GROUP_READ = (1<<8),
+ IB_ACCESS_SHARED_MR_GROUP_WRITE = (1<<9),
+ IB_ACCESS_SHARED_MR_OTHER_READ = (1<<10),
+ IB_ACCESS_SHARED_MR_OTHER_WRITE = (1<<11)
+
};
struct ib_phys_buf {
@@ -847,7 +949,7 @@
struct list_head qp_list;
struct list_head srq_list;
struct list_head ah_list;
- struct list_head xrc_domain_list;
+ struct list_head xrcd_list;
int closing;
};
@@ -884,12 +986,14 @@
struct ib_xrcd {
struct ib_device *device;
struct ib_uobject *uobject;
+ atomic_t usecnt; /* count all exposed resources */
struct inode *inode;
struct rb_node node;
- atomic_t usecnt; /* count all resources */
+
+ struct mutex tgt_qp_mutex;
+ struct list_head tgt_qp_list;
};
-
struct ib_ah {
struct ib_device *device;
struct ib_pd *pd;
@@ -911,13 +1015,19 @@
struct ib_srq {
struct ib_device *device;
struct ib_pd *pd;
- struct ib_cq *xrc_cq;
- struct ib_xrcd *xrcd;
struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *srq_context;
+ enum ib_srq_type srq_type;
atomic_t usecnt;
- u32 xrc_srq_num;
+
+ union {
+ struct {
+ struct ib_xrcd *xrcd;
+ struct ib_cq *cq;
+ u32 srq_num;
+ } xrc;
+ } ext;
};
struct ib_qp {
@@ -926,12 +1036,17 @@
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_srq *srq;
+ struct ib_xrcd *xrcd; /* XRC TGT QPs only */
+ struct list_head xrcd_list;
+ atomic_t usecnt; /* count times opened, mcast attaches */
+ struct list_head open_list;
+ struct ib_qp *real_qp;
struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
u32 qp_num;
enum ib_qp_type qp_type;
- struct ib_xrcd *xrcd; /* XRC QPs only */
+ enum ib_qpg_type qpg_type;
};
struct ib_mr {
@@ -958,6 +1073,32 @@
u32 rkey;
};
+struct ib_flow_spec {
+ enum ib_flow_types type;
+ union {
+ struct {
+ __be16 ethertype;
+ __be16 vlan;
+ u8 vlan_present;
+ u8 mac[6];
+ u8 port;
+ } eth;
+ struct {
+ __be32 qpn;
+ } ib_uc;
+ struct {
+ u8 mgid[16];
+ } ib_mc;
+ } l2_id;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+ u8 l4_protocol;
+ u8 block_mc_loopback;
+ u8 rule_type;
+};
+
struct ib_mad;
struct ib_grh;
@@ -1037,9 +1178,9 @@
struct list_head event_handler_list;
spinlock_t event_handler_lock;
+ spinlock_t client_data_lock;
struct list_head core_list;
struct list_head client_data_list;
- spinlock_t client_data_lock;
struct ib_cache cache;
int *pkey_tbl_len;
@@ -1143,7 +1284,8 @@
u64 start, u64 length,
u64 virt_addr,
int mr_access_flags,
- struct ib_udata *udata);
+ struct ib_udata *udata,
+ int mr_id);
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
@@ -1191,7 +1333,7 @@
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
- struct ib_ucontext *context,
+ struct ib_ucontext *ucontext,
struct ib_udata *udata);
int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
int (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr,
@@ -1211,7 +1353,17 @@
int (*unreg_xrc_rcv_qp)(struct ib_xrcd *xrcd,
void *context,
u32 qp_num);
+ int (*attach_flow)(struct ib_qp *qp,
+ struct ib_flow_spec *spec,
+ int priority);
+ int (*detach_flow)(struct ib_qp *qp,
+ struct ib_flow_spec *spec,
+ int priority);
+ unsigned long (*get_unmapped_area)(struct file *file,
+ unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
struct ib_dma_mapping_ops *dma_ops;
struct module *owner;
@@ -1225,8 +1377,8 @@
IB_DEV_UNREGISTERED
} reg_state;
+ int uverbs_abi_ver;
u64 uverbs_cmd_mask;
- int uverbs_abi_ver;
char node_desc[64];
__be64 node_guid;
@@ -1248,7 +1400,9 @@
struct ib_device *ib_alloc_device(size_t size);
void ib_dealloc_device(struct ib_device *device);
-int ib_register_device (struct ib_device *device);
+int ib_register_device(struct ib_device *device,
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *));
void ib_unregister_device(struct ib_device *device);
int ib_register_client (struct ib_client *client);
@@ -1269,15 +1423,6 @@
}
/**
- * ib_sysfs_create_port_files - iterate over port sysfs directories
- * @device: the IB device
- * @create: a function to create sysfs files in each port directory
- */
-int ib_sysfs_create_port_files(struct ib_device *device,
- int (*create)(struct ib_device *dev, u8 port_num,
- struct kobject *kobj));
-
-/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
* the given QP state transition.
@@ -1427,8 +1572,8 @@
struct ib_srq_init_attr *srq_init_attr);
/**
- * ib_create_srq - Creates an SRQ associated with the specified
- * protection domain.
+ * ib_create_srq - Creates a SRQ associated with the specified protection
+ * domain.
* @pd: The protection domain associated with the SRQ.
* @srq_init_attr: A list of initial attributes required to create the
* SRQ. If SRQ creation succeeds, then the attributes are updated to
@@ -1534,6 +1679,25 @@
int ib_destroy_qp(struct ib_qp *qp);
/**
+ * ib_open_qp - Obtain a reference to an existing sharable QP.
+ * @xrcd - XRC domain
+ * @qp_open_attr: Attributes identifying the QP to open.
+ *
+ * Returns a reference to a sharable QP.
+ */
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+ struct ib_qp_open_attr *qp_open_attr);
+
+/**
+ * ib_close_qp - Release an external reference to a QP.
+ * @qp: The QP handle to release
+ *
+ * The opened QP handle is released by the caller. The underlying
+ * shared QP is not destroyed until all internal references are released.
+ */
+int ib_close_qp(struct ib_qp *qp);
+
+/**
* ib_post_send - Posts a list of work requests to the send queue of
* the specified QP.
* @qp: The QP to post the work request on.
@@ -1540,6 +1704,11 @@
* @send_wr: A list of work requests to post on the send queue.
* @bad_send_wr: On an immediate failure, this parameter will reference
* the work request that failed to be posted on the QP.
+ *
+ * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
+ * error is returned, the QP state shall not be affected,
+ * ib_post_send() will return an immediate error after queueing any
+ * earlier work requests in the list.
*/
static inline int ib_post_send(struct ib_qp *qp,
struct ib_send_wr *send_wr,
@@ -1581,8 +1750,7 @@
* the associated completion and event handlers.
* @cqe: The minimum size of the CQ.
* @comp_vector - Completion vector used to signal completion events.
- * Must be >= 0 and < context->num_comp_vectors
- * or IB_CQ_VECTOR_LEAST_ATTACHED.
+ * Must be >= 0 and < context->num_comp_vectors.
*
* Users can examine the cq structure to determine the actual CQ size.
*/
@@ -2154,17 +2322,19 @@
*/
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
/**
- * ib_dealloc_xrcd - Deallocates an extended reliably connected domain.
- * @xrcd: The xrc domain to deallocate.
+ * ib_alloc_xrcd - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
*/
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
/**
- * ib_alloc_xrcd - Allocates an extended reliably connected domain.
- * @device: The device on which to allocate the xrcd.
+ * ib_dealloc_xrcd - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
*/
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+int ib_attach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority);
+int ib_detach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority);
+
#endif /* IB_VERBS_H */
Property changes on: trunk/sys/ofed/include/rdma/ib_verbs.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/iw_cm.h
===================================================================
--- trunk/sys/ofed/include/rdma/iw_cm.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/iw_cm.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -63,6 +63,7 @@
void *private_data;
u8 private_data_len;
void *provider_data;
+ struct socket *so;
};
/**
@@ -98,6 +99,7 @@
/* Used by provider to add and remove refs on IW cm_id */
void (*add_ref)(struct iw_cm_id *);
void (*rem_ref)(struct iw_cm_id *);
+ struct socket *so;
};
struct iw_cm_conn_param {
@@ -139,7 +141,7 @@
* returned IW CM identifier.
* @context: User specified context associated with the id.
*/
-struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device, struct socket *so,
iw_cm_handler cm_handler, void *context);
/**
Property changes on: trunk/sys/ofed/include/rdma/iw_cm.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/rdma/rdma_cm.h
===================================================================
--- trunk/sys/ofed/include/rdma/rdma_cm.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/rdma_cm.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/rdma_cm.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/rdma/rdma_cm_ib.h
===================================================================
--- trunk/sys/ofed/include/rdma/rdma_cm_ib.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/rdma_cm_ib.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/rdma_cm_ib.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Index: trunk/sys/ofed/include/rdma/rdma_user_cm.h
===================================================================
--- trunk/sys/ofed/include/rdma/rdma_user_cm.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/rdma_user_cm.h 2016-09-14 19:35:22 UTC (rev 7911)
Property changes on: trunk/sys/ofed/include/rdma/rdma_user_cm.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
Modified: trunk/sys/ofed/include/rdma/sdp_socket.h
===================================================================
--- trunk/sys/ofed/include/rdma/sdp_socket.h 2016-09-14 19:30:19 UTC (rev 7910)
+++ trunk/sys/ofed/include/rdma/sdp_socket.h 2016-09-14 19:35:22 UTC (rev 7911)
@@ -3,11 +3,6 @@
#ifndef SDP_SOCKET_H
#define SDP_SOCKET_H
-#ifndef AF_INET_SDP
-#define AF_INET_SDP 27
-#define PF_INET_SDP AF_INET_SDP
-#endif
-
#ifndef SDP_ZCOPY_THRESH
#define SDP_ZCOPY_THRESH 80
#endif
Property changes on: trunk/sys/ofed/include/rdma/sdp_socket.h
___________________________________________________________________
Deleted: cvs2svn:cvs-rev
## -1 +0,0 ##
-1.1.1.1
\ No newline at end of property
More information about the Midnightbsd-cvs
mailing list