[Midnightbsd-cvs] src [10090] trunk/sys/dev/ntb: add ntb

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun May 27 19:30:18 EDT 2018


Revision: 10090
          http://svnweb.midnightbsd.org/src/?rev=10090
Author:   laffer1
Date:     2018-05-27 19:30:17 -0400 (Sun, 27 May 2018)
Log Message:
-----------
add ntb

Added Paths:
-----------
    trunk/sys/dev/ntb/
    trunk/sys/dev/ntb/if_ntb/
    trunk/sys/dev/ntb/if_ntb/if_ntb.c
    trunk/sys/dev/ntb/ntb.c
    trunk/sys/dev/ntb/ntb.h
    trunk/sys/dev/ntb/ntb_hw/
    trunk/sys/dev/ntb/ntb_hw/ntb_hw.c
    trunk/sys/dev/ntb/ntb_hw/ntb_regs.h
    trunk/sys/dev/ntb/ntb_if.m
    trunk/sys/dev/ntb/ntb_transport.c
    trunk/sys/dev/ntb/ntb_transport.h

Added: trunk/sys/dev/ntb/if_ntb/if_ntb.c
===================================================================
--- trunk/sys/dev/ntb/if_ntb/if_ntb.c	                        (rev 0)
+++ trunk/sys/dev/ntb/if_ntb/if_ntb.c	2018-05-27 23:30:17 UTC (rev 10090)
@@ -0,0 +1,522 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav at FreeBSD.org>
+ * Copyright (C) 2013 Intel Corporation
+ * Copyright (C) 2015 EMC Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a driver for simulated Ethernet device, using
+ * underlying NTB Transport device.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/ntb/if_ntb/if_ntb.c 312742 2017-01-25 07:51:53Z mav $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/buf_ring.h>
+#include <sys/bus.h>
+#include <sys/limits.h>
+#include <sys/module.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+
+#include <net/if.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/if_media.h>
+#include <net/if_var.h>
+#include <net/bpf.h>
+#include <net/ethernet.h>
+
+#include <machine/bus.h>
+
+#include "../ntb_transport.h"
+
+#define KTR_NTB KTR_SPARE3
+#define NTB_MEDIATYPE		 (IFM_ETHER | IFM_AUTO | IFM_FDX)
+
+#define	NTB_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
+#define	NTB_CSUM_FEATURES6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
+#define	NTB_CSUM_SET		(CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
+				    CSUM_PSEUDO_HDR | \
+				    CSUM_IP_CHECKED | CSUM_IP_VALID | \
+				    CSUM_SCTP_VALID)
+
+static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb");
+
+static unsigned g_if_ntb_num_queues = UINT_MAX;
+SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
+    &g_if_ntb_num_queues, 0, "Number of queues per interface");
+
+struct ntb_net_queue {
+	struct ntb_net_ctx	*sc;
+	struct ifnet		*ifp;
+	struct ntb_transport_qp *qp;
+	struct buf_ring		*br;
+	struct task		 tx_task;
+	struct taskqueue	*tx_tq;
+	struct mtx		 tx_lock;
+	struct callout		 queue_full;
+};
+
+struct ntb_net_ctx {
+	struct ifnet 		*ifp;
+	struct ifmedia		 media;
+	u_char			 eaddr[ETHER_ADDR_LEN];
+	int			 num_queues;
+	struct ntb_net_queue	*queues;
+	int			 mtu;
+};
+
+static int ntb_net_probe(device_t dev);
+static int ntb_net_attach(device_t dev);
+static int ntb_net_detach(device_t dev);
+static void ntb_net_init(void *arg);
+static int ntb_ifmedia_upd(struct ifnet *);
+static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
+static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
+static int ntb_transmit(struct ifnet *ifp, struct mbuf *m);
+static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
+    void *data, int len);
+static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
+    void *data, int len);
+static void ntb_net_event_handler(void *data, enum ntb_link_event status);
+static void ntb_handle_tx(void *arg, int pending);
+static void ntb_qp_full(void *arg);
+static void ntb_qflush(struct ifnet *ifp);
+static void create_random_local_eui48(u_char *eaddr);
+
+static int
+ntb_net_probe(device_t dev)
+{
+
+	device_set_desc(dev, "NTB Network Interface");
+	return (0);
+}
+
+static int
+ntb_net_attach(device_t dev)
+{
+	struct ntb_net_ctx *sc = device_get_softc(dev);
+	struct ntb_net_queue *q;
+	struct ifnet *ifp;
+	struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
+	    ntb_net_tx_handler, ntb_net_event_handler };
+	int i;
+
+	ifp = sc->ifp = if_alloc(IFT_ETHER);
+	if (ifp == NULL) {
+		printf("ntb: Cannot allocate ifnet structure\n");
+		return (ENOMEM);
+	}
+	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+
+	sc->num_queues = min(g_if_ntb_num_queues,
+	    ntb_transport_queue_count(dev));
+	sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
+	    M_DEVBUF, M_WAITOK | M_ZERO);
+	sc->mtu = INT_MAX;
+	for (i = 0; i < sc->num_queues; i++) {
+		q = &sc->queues[i];
+		q->sc = sc;
+		q->ifp = ifp;
+		q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
+		if (q->qp == NULL)
+			break;
+		sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
+		mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
+		q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
+		TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
+		q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
+		    taskqueue_thread_enqueue, &q->tx_tq);
+		taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
+		    device_get_nameunit(dev), i);
+		callout_init(&q->queue_full, 1);
+	}
+	sc->num_queues = i;
+	device_printf(dev, "%d queue(s)\n", sc->num_queues);
+
+	ifp->if_init = ntb_net_init;
+	ifp->if_softc = sc;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_ioctl = ntb_ioctl;
+	ifp->if_transmit = ntb_transmit;
+	ifp->if_qflush = ntb_qflush;
+	create_random_local_eui48(sc->eaddr);
+	ether_ifattach(ifp, sc->eaddr);
+	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
+	    IFCAP_JUMBO_MTU | IFCAP_LINKSTATE;
+	ifp->if_capenable = IFCAP_JUMBO_MTU | IFCAP_LINKSTATE;
+	ifp->if_mtu = sc->mtu - ETHER_HDR_LEN;
+
+	ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
+	    ntb_ifmedia_sts);
+	ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
+	ifmedia_set(&sc->media, NTB_MEDIATYPE);
+
+	for (i = 0; i < sc->num_queues; i++)
+		ntb_transport_link_up(sc->queues[i].qp);
+	return (0);
+}
+
+static int
+ntb_net_detach(device_t dev)
+{
+	struct ntb_net_ctx *sc = device_get_softc(dev);
+	struct ntb_net_queue *q;
+	int i;
+
+	for (i = 0; i < sc->num_queues; i++)
+		ntb_transport_link_down(sc->queues[i].qp);
+	ether_ifdetach(sc->ifp);
+	if_free(sc->ifp);
+	ifmedia_removeall(&sc->media);
+	for (i = 0; i < sc->num_queues; i++) {
+		q = &sc->queues[i];
+		ntb_transport_free_queue(q->qp);
+		buf_ring_free(q->br, M_DEVBUF);
+		callout_drain(&q->queue_full);
+		taskqueue_drain_all(q->tx_tq);
+		mtx_destroy(&q->tx_lock);
+	}
+	free(sc->queues, M_DEVBUF);
+	return (0);
+}
+
+/* Network device interface */
+
+static void
+ntb_net_init(void *arg)
+{
+	struct ntb_net_ctx *sc = arg;
+	struct ifnet *ifp = sc->ifp;
+
+	ifp->if_drv_flags |= IFF_DRV_RUNNING;
+	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
+	    LINK_STATE_UP : LINK_STATE_DOWN);
+}
+
+static int
+ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+{
+	struct ntb_net_ctx *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *)data;
+	int error = 0;
+
+	switch (command) {
+	case SIOCSIFFLAGS:
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		break;
+
+	case SIOCSIFMTU:
+	    {
+		if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
+			error = EINVAL;
+			break;
+		}
+
+		ifp->if_mtu = ifr->ifr_mtu;
+		break;
+	    }
+
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
+		break;
+
+	case SIOCSIFCAP:
+		if (ifr->ifr_reqcap & IFCAP_RXCSUM)
+			ifp->if_capenable |= IFCAP_RXCSUM;
+		else
+			ifp->if_capenable &= ~IFCAP_RXCSUM;
+		if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
+			ifp->if_capenable |= IFCAP_TXCSUM;
+			ifp->if_hwassist |= NTB_CSUM_FEATURES;
+		} else {
+			ifp->if_capenable &= ~IFCAP_TXCSUM;
+			ifp->if_hwassist &= ~NTB_CSUM_FEATURES;
+		}
+		if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
+			ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
+		else
+			ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
+		if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
+			ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
+			ifp->if_hwassist |= NTB_CSUM_FEATURES6;
+		} else {
+			ifp->if_capenable &= ~IFCAP_TXCSUM_IPV6;
+			ifp->if_hwassist &= ~NTB_CSUM_FEATURES6;
+		}
+		break;
+
+	default:
+		error = ether_ioctl(ifp, command, data);
+		break;
+	}
+
+	return (error);
+}
+
+static int
+ntb_ifmedia_upd(struct ifnet *ifp)
+{
+	struct ntb_net_ctx *sc = ifp->if_softc;
+	struct ifmedia *ifm = &sc->media;
+
+	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
+		return (EINVAL);
+
+	return (0);
+}
+
+static void
+ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+	struct ntb_net_ctx *sc = ifp->if_softc;
+
+	ifmr->ifm_status = IFM_AVALID;
+	ifmr->ifm_active = NTB_MEDIATYPE;
+	if (ntb_transport_link_query(sc->queues[0].qp))
+		ifmr->ifm_status |= IFM_ACTIVE;
+}
+
+static void
+ntb_transmit_locked(struct ntb_net_queue *q)
+{
+	struct ifnet *ifp = q->ifp;
+	struct mbuf *m;
+	int rc, len;
+	short mflags;
+
+	CTR0(KTR_NTB, "TX: ntb_transmit_locked");
+	while ((m = drbr_peek(ifp, q->br)) != NULL) {
+		CTR1(KTR_NTB, "TX: start mbuf %p", m);
+		ETHER_BPF_MTAP(ifp, m);
+		len = m->m_pkthdr.len;
+		mflags = m->m_flags;
+		rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
+		if (rc != 0) {
+			CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
+			if (rc == EAGAIN) {
+				drbr_putback(ifp, q->br, m);
+				callout_reset_sbt(&q->queue_full,
+				    SBT_1MS / 4, SBT_1MS / 4,
+				    ntb_qp_full, q, 0);
+			} else {
+				m_freem(m);
+				drbr_advance(ifp, q->br);
+				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+			}
+			break;
+		}
+		drbr_advance(ifp, q->br);
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+		if (mflags & M_MCAST)
+			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+	}
+}
+
+static int
+ntb_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+	struct ntb_net_ctx *sc = ifp->if_softc;
+	struct ntb_net_queue *q;
+	int error, i;
+
+	CTR0(KTR_NTB, "TX: ntb_transmit");
+	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+		i = m->m_pkthdr.flowid % sc->num_queues;
+	else
+		i = curcpu % sc->num_queues;
+	q = &sc->queues[i];
+
+	error = drbr_enqueue(ifp, q->br, m);
+	if (error)
+		return (error);
+
+	if (mtx_trylock(&q->tx_lock)) {
+		ntb_transmit_locked(q);
+		mtx_unlock(&q->tx_lock);
+	} else
+		taskqueue_enqueue(q->tx_tq, &q->tx_task);
+	return (0);
+}
+
+static void
+ntb_handle_tx(void *arg, int pending)
+{
+	struct ntb_net_queue *q = arg;
+
+	mtx_lock(&q->tx_lock);
+	ntb_transmit_locked(q);
+	mtx_unlock(&q->tx_lock);
+}
+
+static void
+ntb_qp_full(void *arg)
+{
+	struct ntb_net_queue *q = arg;
+
+	CTR0(KTR_NTB, "TX: qp_full callout");
+	if (ntb_transport_tx_free_entry(q->qp) > 0)
+		taskqueue_enqueue(q->tx_tq, &q->tx_task);
+	else
+		callout_schedule_sbt(&q->queue_full,
+		    SBT_1MS / 4, SBT_1MS / 4, 0);
+}
+
+static void
+ntb_qflush(struct ifnet *ifp)
+{
+	struct ntb_net_ctx *sc = ifp->if_softc;
+	struct ntb_net_queue *q;
+	struct mbuf *m;
+	int i;
+
+	for (i = 0; i < sc->num_queues; i++) {
+		q = &sc->queues[i];
+		mtx_lock(&q->tx_lock);
+		while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
+			m_freem(m);
+		mtx_unlock(&q->tx_lock);
+	}
+	if_qflush(ifp);
+}
+
+/* Network Device Callbacks */
+static void
+ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
+    int len)
+{
+
+	m_freem(data);
+	CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
+}
+
+static void
+ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
+    int len)
+{
+	struct ntb_net_queue *q = qp_data;
+	struct ntb_net_ctx *sc = q->sc;
+	struct mbuf *m = data;
+	struct ifnet *ifp = q->ifp;
+	uint16_t proto;
+
+	CTR1(KTR_NTB, "RX: rx handler (%d)", len);
+	if (len < 0) {
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+		return;
+	}
+
+	m->m_pkthdr.rcvif = ifp;
+	if (sc->num_queues > 1) {
+		m->m_pkthdr.flowid = q - sc->queues;
+		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
+	}
+	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
+		m_copydata(m, 12, 2, (void *)&proto);
+		switch (ntohs(proto)) {
+		case ETHERTYPE_IP:
+			if (ifp->if_capenable & IFCAP_RXCSUM) {
+				m->m_pkthdr.csum_data = 0xffff;
+				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
+			}
+			break;
+		case ETHERTYPE_IPV6:
+			if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
+				m->m_pkthdr.csum_data = 0xffff;
+				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
+			}
+			break;
+		}
+	}
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	ifp->if_input(ifp, m);
+}
+
+static void
+ntb_net_event_handler(void *data, enum ntb_link_event status)
+{
+	struct ntb_net_queue *q = data;
+	int new_state;
+
+	switch (status) {
+	case NTB_LINK_DOWN:
+		new_state = LINK_STATE_DOWN;
+		break;
+	case NTB_LINK_UP:
+		new_state = LINK_STATE_UP;
+		break;
+	default:
+		new_state = LINK_STATE_UNKNOWN;
+		break;
+	}
+	if_link_state_change(q->ifp, new_state);
+}
+
+/* Helper functions */
+/* TODO: This too should really be part of the kernel */
+#define EUI48_MULTICAST			1 << 0
+#define EUI48_LOCALLY_ADMINISTERED	1 << 1
+static void
+create_random_local_eui48(u_char *eaddr)
+{
+	static uint8_t counter = 0;
+
+	eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
+	arc4rand(&eaddr[1], 4, 0);
+	eaddr[5] = counter++;
+}
+
+static device_method_t ntb_net_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,     ntb_net_probe),
+	DEVMETHOD(device_attach,    ntb_net_attach),
+	DEVMETHOD(device_detach,    ntb_net_detach),
+	DEVMETHOD_END
+};
+
+devclass_t ntb_net_devclass;
+static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
+    sizeof(struct ntb_net_ctx));
+DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass,
+    NULL, NULL);
+MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
+MODULE_VERSION(if_ntb, 1);


Property changes on: trunk/sys/dev/ntb/if_ntb/if_ntb.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ntb/ntb.c
===================================================================
--- trunk/sys/dev/ntb/ntb.c	                        (rev 0)
+++ trunk/sys/dev/ntb/ntb.c	2018-05-27 23:30:17 UTC (rev 10090)
@@ -0,0 +1,464 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/ntb/ntb.c 304404 2016-08-18 10:59:12Z mav $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <machine/bus.h>
+#include <sys/rmlock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+
+#include "ntb.h"
+
+devclass_t ntb_hw_devclass;
+SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
+
+struct ntb_child {
+	device_t	dev;
+	int		enabled;
+	int		mwoff;
+	int		mwcnt;
+	int		spadoff;
+	int		spadcnt;
+	int		dboff;
+	int		dbmask;
+	void		*ctx;
+	const struct ntb_ctx_ops *ctx_ops;
+	struct rmlock	ctx_lock;
+	struct ntb_child *next;
+};
+
+int
+ntb_register_device(device_t dev)
+{
+	struct ntb_child **cpp = device_get_softc(dev);
+	struct ntb_child *nc;
+	int i, mw, mwu, mwt, spad, spadu, spadt, db, dbu, dbt;
+	char cfg[128] = "";
+	char buf[32];
+	char *n, *np, *c, *p, *name;
+
+	mwu = 0;
+	mwt = NTB_MW_COUNT(dev);
+	spadu = 0;
+	spadt = NTB_SPAD_COUNT(dev);
+	dbu = 0;
+	dbt = flsll(NTB_DB_VALID_MASK(dev));
+
+	device_printf(dev, "%d memory windows, %d scratchpads, "
+	    "%d doorbells\n", mwt, spadt, dbt);
+
+	snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev),
+	    device_get_unit(dev));
+	TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg));
+	n = cfg;
+	i = 0;
+	while ((c = strsep(&n, ",")) != NULL) {
+		np = c;
+		name = strsep(&np, ":");
+		if (name != NULL && name[0] == 0)
+			name = NULL;
+		p = strsep(&np, ":");
+		mw = (p && p[0] != 0) ? strtol(p, NULL, 10) : mwt - mwu;
+		p = strsep(&np, ":");
+		spad = (p && p[0] != 0) ? strtol(p, NULL, 10) : spadt - spadu;
+		db = (np && np[0] != 0) ? strtol(np, NULL, 10) : dbt - dbu;
+
+		if (mw > mwt - mwu || spad > spadt - spadu || db > dbt - dbu) {
+			device_printf(dev, "Not enough resources for config\n");
+			break;
+		}
+
+		nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO);
+		nc->mwoff = mwu;
+		nc->mwcnt = mw;
+		nc->spadoff = spadu;
+		nc->spadcnt = spad;
+		nc->dboff = dbu;
+		nc->dbmask = (db == 0) ? 0 : (0xffffffffffffffff >> (64 - db));
+		rm_init(&nc->ctx_lock, "ntb ctx");
+		nc->dev = device_add_child(dev, name, -1);
+		if (nc->dev == NULL) {
+			ntb_unregister_device(dev);
+			return (ENOMEM);
+		}
+		device_set_ivars(nc->dev, nc);
+		*cpp = nc;
+		cpp = &nc->next;
+
+		if (bootverbose) {
+			device_printf(dev, "%d \"%s\":", i, name);
+			if (mw > 0) {
+				printf(" memory windows %d", mwu);
+				if (mw > 1)
+					printf("-%d", mwu + mw - 1);
+			}
+			if (spad > 0) {
+				printf(" scratchpads %d", spadu);
+				if (spad > 1)
+					printf("-%d", spadu + spad - 1);
+			}
+			if (db > 0) {
+				printf(" doorbells %d", dbu);
+				if (db > 1)
+					printf("-%d", dbu + db - 1);
+			}
+			printf("\n");
+		}
+
+		mwu += mw;
+		spadu += spad;
+		dbu += db;
+		i++;
+	}
+
+	bus_generic_attach(dev);
+	return (0);
+}
+
+int
+ntb_unregister_device(device_t dev)
+{
+	struct ntb_child **cpp = device_get_softc(dev);
+	struct ntb_child *nc;
+	int error = 0;
+
+	while ((nc = *cpp) != NULL) {
+		*cpp = (*cpp)->next;
+		error = device_delete_child(dev, nc->dev);
+		if (error)
+			break;
+		rm_destroy(&nc->ctx_lock);
+		free(nc, M_DEVBUF);
+	}
+	return (error);
+}
+
+void
+ntb_link_event(device_t dev)
+{
+	struct ntb_child **cpp = device_get_softc(dev);
+	struct ntb_child *nc;
+	struct rm_priotracker ctx_tracker;
+
+	for (nc = *cpp; nc != NULL; nc = nc->next) {
+		rm_rlock(&nc->ctx_lock, &ctx_tracker);
+		if (nc->ctx_ops != NULL && nc->ctx_ops->link_event != NULL)
+			nc->ctx_ops->link_event(nc->ctx);
+		rm_runlock(&nc->ctx_lock, &ctx_tracker);
+	}
+}
+
+void
+ntb_db_event(device_t dev, uint32_t vec)
+{
+	struct ntb_child **cpp = device_get_softc(dev);
+	struct ntb_child *nc;
+	struct rm_priotracker ctx_tracker;
+
+	for (nc = *cpp; nc != NULL; nc = nc->next) {
+		rm_rlock(&nc->ctx_lock, &ctx_tracker);
+		if (nc->ctx_ops != NULL && nc->ctx_ops->db_event != NULL)
+			nc->ctx_ops->db_event(nc->ctx, vec);
+		rm_runlock(&nc->ctx_lock, &ctx_tracker);
+	}
+}
+
+bool
+ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width)
+{
+
+	return (NTB_LINK_IS_UP(device_get_parent(ntb), speed, width));
+}
+
+int
+ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+	struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
+	struct ntb_child *nc1;
+
+	for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
+		if (nc1->enabled) {
+			nc->enabled = 1;
+			return (0);
+		}
+	}
+	nc->enabled = 1;
+	return (NTB_LINK_ENABLE(device_get_parent(ntb), speed, width));
+}
+
+int
+ntb_link_disable(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+	struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
+	struct ntb_child *nc1;
+
+	if (!nc->enabled)
+		return (0);
+	nc->enabled = 0;
+	for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
+		if (nc1->enabled)
+			return (0);
+	}
+	return (NTB_LINK_DISABLE(device_get_parent(ntb)));
+}
+
+bool
+ntb_link_enabled(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (nc->enabled && NTB_LINK_ENABLED(device_get_parent(ntb)));
+}
+
+int
+ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	if (ctx == NULL || ctx_ops == NULL)
+		return (EINVAL);
+
+	rm_wlock(&nc->ctx_lock);
+	if (nc->ctx_ops != NULL) {
+		rm_wunlock(&nc->ctx_lock);
+		return (EINVAL);
+	}
+	nc->ctx = ctx;
+	nc->ctx_ops = ctx_ops;
+	rm_wunlock(&nc->ctx_lock);
+
+	return (0);
+}
+
+void *
+ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	KASSERT(nc->ctx != NULL && nc->ctx_ops != NULL, ("bogus"));
+	if (ctx_ops != NULL)
+		*ctx_ops = nc->ctx_ops;
+	return (nc->ctx);
+}
+
+void
+ntb_clear_ctx(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	rm_wlock(&nc->ctx_lock);
+	nc->ctx = NULL;
+	nc->ctx_ops = NULL;
+	rm_wunlock(&nc->ctx_lock);
+}
+
+uint8_t
+ntb_mw_count(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (nc->mwcnt);
+}
+
+int
+ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base,
+    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
+    bus_addr_t *plimit)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_GET_RANGE(device_get_parent(ntb), mw_idx + nc->mwoff,
+	    base, vbase, size, align, align_size, plimit));
+}
+
+int
+ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr, size_t size)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_SET_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff,
+	    addr, size));
+}
+
+int
+ntb_mw_clear_trans(device_t ntb, unsigned mw_idx)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_CLEAR_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff));
+}
+
+int
+ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_GET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
+}
+
+int
+ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_SET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
+}
+
+uint8_t
+ntb_spad_count(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (nc->spadcnt);
+}
+
+void
+ntb_spad_clear(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+	unsigned i;
+
+	for (i = 0; i < nc->spadcnt; i++)
+		NTB_SPAD_WRITE(device_get_parent(ntb), i + nc->spadoff, 0);
+}
+
+int
+ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff, val));
+}
+
+int
+ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff, val));
+}
+
+int
+ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_PEER_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff,
+	    val));
+}
+
+int
+ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_PEER_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff,
+	    val));
+}
+
+uint64_t
+ntb_db_valid_mask(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (nc->dbmask);
+}
+
+int
+ntb_db_vector_count(device_t ntb)
+{
+
+	return (NTB_DB_VECTOR_COUNT(device_get_parent(ntb)));
+}
+
+uint64_t
+ntb_db_vector_mask(device_t ntb, uint32_t vector)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return ((NTB_DB_VECTOR_MASK(device_get_parent(ntb), vector)
+	    >> nc->dboff) & nc->dbmask);
+}
+
+int
+ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size)
+{
+
+	return (NTB_PEER_DB_ADDR(device_get_parent(ntb), db_addr, db_size));
+}
+
+void
+ntb_db_clear(device_t ntb, uint64_t bits)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_DB_CLEAR(device_get_parent(ntb), bits << nc->dboff));
+}
+
+void
+ntb_db_clear_mask(device_t ntb, uint64_t bits)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_DB_CLEAR_MASK(device_get_parent(ntb), bits << nc->dboff));
+}
+
+uint64_t
+ntb_db_read(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return ((NTB_DB_READ(device_get_parent(ntb)) >> nc->dboff)
+	    & nc->dbmask);
+}
+
+void
+ntb_db_set_mask(device_t ntb, uint64_t bits)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_DB_SET_MASK(device_get_parent(ntb), bits << nc->dboff));
+}
+
+void
+ntb_peer_db_set(device_t ntb, uint64_t bits)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_PEER_DB_SET(device_get_parent(ntb), bits << nc->dboff));
+}
+
+MODULE_VERSION(ntb, 1);


Property changes on: trunk/sys/dev/ntb/ntb.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ntb/ntb.h
===================================================================
--- trunk/sys/dev/ntb/ntb.h	                        (rev 0)
+++ trunk/sys/dev/ntb/ntb.h	2018-05-27 23:30:17 UTC (rev 10090)
@@ -0,0 +1,410 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/ntb/ntb.h 304404 2016-08-18 10:59:12Z mav $
+ */
+
+#ifndef _NTB_H_
+#define _NTB_H_
+
+#include "ntb_if.h"
+
+extern devclass_t ntb_hw_devclass;
+SYSCTL_DECL(_hw_ntb);
+
+int ntb_register_device(device_t ntb);
+int ntb_unregister_device(device_t ntb);
+
+/*
+ * ntb_link_event() - notify driver context of a change in link status
+ * @ntb:        NTB device context
+ *
+ * Notify the driver context that the link status may have changed.  The driver
+ * should call intb_link_is_up() to get the current status.
+ */
+void ntb_link_event(device_t ntb);
+
+/*
+ * ntb_db_event() - notify driver context of a doorbell event
+ * @ntb:        NTB device context
+ * @vector:     Interrupt vector number
+ *
+ * Notify the driver context of a doorbell event.  If hardware supports
+ * multiple interrupt vectors for doorbells, the vector number indicates which
+ * vector received the interrupt.  The vector number is relative to the first
+ * vector used for doorbells, starting at zero, and must be less than
+ * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
+ * doorbell bits need service, and ntb_db_vector_mask() to determine which of
+ * those bits are associated with the vector number.
+ */
+void ntb_db_event(device_t ntb, uint32_t vec);
+
+/*
+ * ntb_link_is_up() - get the current ntb link state
+ * @ntb:        NTB device context
+ * @speed:      OUT - The link speed expressed as PCIe generation number
+ * @width:      OUT - The link width expressed as the number of PCIe lanes
+ *
+ * RETURNS: true or false based on the hardware link state
+ */
+bool ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width);
+
+/*
+ * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * @ntb:        NTB device context
+ * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
+ * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
+ *
+ * Enable the link on the secondary side of the ntb.  This can only be done
+ * from the primary side of the ntb in primary or b2b topology.  The ntb device
+ * should train the link to its maximum speed and width, or the requested speed
+ * and width, whichever is smaller, if supported.
+ *
+ * Return: Zero on success, otherwise an error number.
+ *
+ * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
+ *      and width input will be ignored.
+ */
+int ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width);
+
+/*
+ * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * @ntb:        NTB device context
+ *
+ * Disable the link on the secondary side of the ntb.  This can only be done
+ * from the primary side of the ntb in primary or b2b topology.  The ntb device
+ * should disable the link.  Returning from this call must indicate that a
+ * barrier has passed, though with no more writes may pass in either direction
+ * across the link, except if this call returns an error number.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_link_disable(device_t ntb);
+
+/*
+ * get enable status of the link on the secondary side of the ntb
+ */
+bool ntb_link_enabled(device_t ntb);
+
+/*
+ * ntb_set_ctx() - associate a driver context with an ntb device
+ * @ntb:        NTB device context
+ * @ctx:        Driver context
+ * @ctx_ops:    Driver context operations
+ *
+ * Associate a driver context and operations with a ntb device.  The context is
+ * provided by the client driver, and the driver may associate a different
+ * context with each ntb device.
+ *
+ * Return: Zero if the context is associated, otherwise an error number.
+ */
+int ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops);
+
+/*
+ * ntb_set_ctx() - get a driver context associated with an ntb device
+ * @ntb:        NTB device context
+ * @ctx_ops:    Driver context operations
+ *
+ * Get a driver context and operations associated with a ntb device.
+ */
+void * ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops);
+
+/*
+ * ntb_clear_ctx() - disassociate any driver context from an ntb device
+ * @ntb:        NTB device context
+ *
+ * Clear any association that may exist between a driver context and the ntb
+ * device.
+ */
+void ntb_clear_ctx(device_t ntb);
+
+/*
+ * ntb_mw_count() - Get the number of memory windows available for KPI
+ * consumers.
+ *
+ * (Excludes any MW wholly reserved for register access.)
+ */
+uint8_t ntb_mw_count(device_t ntb);
+
+/*
+ * ntb_mw_get_range() - get the range of a memory window
+ * @ntb:        NTB device context
+ * @idx:        Memory window number
+ * @base:       OUT - the base address for mapping the memory window
+ * @size:       OUT - the size for mapping the memory window
+ * @align:      OUT - the base alignment for translating the memory window
+ * @align_size: OUT - the size alignment for translating the memory window
+ *
+ * Get the range of a memory window.  NULL may be given for any output
+ * parameter if the value is not needed.  The base and size may be used for
+ * mapping the memory window, to access the peer memory.  The alignment and
+ * size may be used for translating the memory window, for the peer to access
+ * memory on the local system.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base,
+    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
+    bus_addr_t *plimit);
+
+/*
+ * ntb_mw_set_trans() - set the translation of a memory window
+ * @ntb:        NTB device context
+ * @idx:        Memory window number
+ * @addr:       The dma address local memory to expose to the peer
+ * @size:       The size of the local memory to expose to the peer
+ *
+ * Set the translation of a memory window.  The peer may access local memory
+ * through the window starting at the address, up to the size.  The address
+ * must be aligned to the alignment specified by ntb_mw_get_range().  The size
+ * must be aligned to the size alignment specified by ntb_mw_get_range().  The
+ * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
+ * 32-bit BARs).
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr,
+    size_t size);
+
+/*
+ * ntb_mw_clear_trans() - clear the translation of a memory window
+ * @ntb:	NTB device context
+ * @idx:	Memory window number
+ *
+ * Clear the translation of a memory window.  The peer may no longer access
+ * local memory through the window.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_clear_trans(device_t ntb, unsigned mw_idx);
+
+/*
+ * ntb_mw_get_wc - Get the write-combine status of a memory window
+ *
+ * Returns:  Zero on success, setting *wc; otherwise an error number (e.g. if
+ * idx is an invalid memory window).
+ *
+ * Mode is a VM_MEMATTR_* type.
+ */
+int ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode);
+
+/*
+ * ntb_mw_set_wc - Set the write-combine status of a memory window
+ *
+ * If 'mode' matches the current status, this does nothing and succeeds.  Mode
+ * is a VM_MEMATTR_* type.
+ *
+ * Returns:  Zero on success, setting the caching attribute on the virtual
+ * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
+ * memory window, or if changing the caching attribute fails).
+ */
+int ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode);
+
+/*
+ * ntb_spad_count() - get the total scratch regs usable
+ * @ntb: pointer to ntb_softc instance
+ *
+ * This function returns the max 32bit scratchpad registers usable by the
+ * upper layer.
+ *
+ * RETURNS: total number of scratch pad registers available
+ */
+uint8_t ntb_spad_count(device_t ntb);
+
+/*
+ * ntb_get_max_spads() - zero local scratch registers
+ * @ntb: pointer to ntb_softc instance
+ *
+ * This functions overwrites all local scratchpad registers with zeroes.
+ */
+void ntb_spad_clear(device_t ntb);
+
+/*
+ * ntb_spad_write() - write to the secondary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. The register resides on the secondary (external) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val);
+
+/*
+ * ntb_spad_read() - read from the primary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val);
+
+/*
+ * ntb_peer_spad_write() - write to the secondary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. The register resides on the secondary (external) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val);
+
+/*
+ * ntb_peer_spad_read() - read from the primary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val);
+
+/*
+ * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
+ * @ntb:	NTB device context
+ *
+ * Hardware may support different number or arrangement of doorbell bits.
+ *
+ * Return: A mask of doorbell bits supported by the ntb.
+ */
+uint64_t ntb_db_valid_mask(device_t ntb);
+
+/*
+ * ntb_db_vector_count() - get the number of doorbell interrupt vectors
+ * @ntb:	NTB device context.
+ *
+ * Hardware may support different number of interrupt vectors.
+ *
+ * Return: The number of doorbell interrupt vectors.
+ */
+int ntb_db_vector_count(device_t ntb);
+
+/*
+ * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
+ * @ntb:	NTB device context
+ * @vector:	Doorbell vector number
+ *
+ * Each interrupt vector may have a different number or arrangement of bits.
+ *
+ * Return: A mask of doorbell bits serviced by a vector.
+ */
+uint64_t ntb_db_vector_mask(device_t ntb, uint32_t vector);
+
+/*
+ * ntb_peer_db_addr() - address and size of the peer doorbell register
+ * @ntb:	NTB device context.
+ * @db_addr:	OUT - The address of the peer doorbell register.
+ * @db_size:	OUT - The number of bytes to write the peer doorbell register.
+ *
+ * Return the address of the peer doorbell register.  This may be used, for
+ * example, by drivers that offload memory copy operations to a dma engine.
+ * The drivers may wish to ring the peer doorbell at the completion of memory
+ * copy operations.  For efficiency, and to simplify ordering of operations
+ * between the dma memory copies and the ringing doorbell, the driver may
+ * append one additional dma memory copy with the doorbell register as the
+ * destination, after the memory copy operations.
+ *
+ * Return: Zero on success, otherwise an error number.
+ *
+ * Note that writing the peer doorbell via a memory window will *not* generate
+ * an interrupt on the remote host; that must be done separately.
+ */
+int ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size);
+
+/*
+ * ntb_db_clear() - clear bits in the local doorbell register
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * Clear bits in the local doorbell register, arming the bits for the next
+ * doorbell.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_clear(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_db_clear_mask() - clear bits in the local doorbell mask
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * Clear bits in the local doorbell mask register, allowing doorbell interrupts
+ * from being generated for those doorbell bits.  If a doorbell bit is already
+ * set at the time the mask is cleared, and the corresponding mask bit is
+ * changed from set to clear, then the ntb driver must ensure that
+ * ntb_db_event() is called.  If the hardware does not generate the interrupt
+ * on clearing the mask bit, then the driver must call ntb_db_event() anyway.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_clear_mask(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_db_read() - read the local doorbell register
+ * @ntb:	NTB device context.
+ *
+ * Read the local doorbell register, and return the bits that are set.
+ *
+ * Return: The bits currently set in the local doorbell register.
+ */
+uint64_t ntb_db_read(device_t ntb);
+
+/*
+ * ntb_db_set_mask() - set bits in the local doorbell mask
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell mask bits to set.
+ *
+ * Set bits in the local doorbell mask register, preventing doorbell interrupts
+ * from being generated for those doorbell bits.  Bits that were already set
+ * must remain set.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_set_mask(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_peer_db_set() - Set the doorbell on the secondary/external side
+ * @ntb: pointer to ntb_softc instance
+ * @bit: doorbell bits to ring
+ *
+ * This function allows triggering of a doorbell on the secondary/external
+ * side that will initiate an interrupt on the remote host
+ */
+void ntb_peer_db_set(device_t ntb, uint64_t bits);
+
+#endif /* _NTB_H_ */


Property changes on: trunk/sys/dev/ntb/ntb.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ntb/ntb_hw/ntb_hw.c
===================================================================
--- trunk/sys/dev/ntb/ntb_hw/ntb_hw.c	                        (rev 0)
+++ trunk/sys/dev/ntb/ntb_hw/ntb_hw.c	2018-05-27 23:30:17 UTC (rev 10090)
@@ -0,0 +1,3135 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav at FreeBSD.org>
+ * Copyright (C) 2013 Intel Corporation
+ * Copyright (C) 2015 EMC Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/ntb/ntb_hw/ntb_hw.c 314667 2017-03-04 13:03:31Z avg $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/interrupt.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/pciio.h>
+#include <sys/queue.h>
+#include <sys/rman.h>
+#include <sys/sbuf.h>
+#include <sys/sysctl.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/bus.h>
+#include <machine/intr_machdep.h>
+#include <machine/pmap.h>
+#include <machine/resource.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "ntb_regs.h"
+#include "../ntb.h"
+
+#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
+
+#define NTB_HB_TIMEOUT		1 /* second */
+#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
+#define BAR_HIGH_MASK		(~((1ull << 12) - 1))
+
+#define	NTB_MSIX_VER_GUARD	0xaabbccdd
+#define	NTB_MSIX_RECEIVED	0xe0f0e0f0
+
+/*
+ * PCI constants could be somewhere more generic, but aren't defined/used in
+ * pci.c.
+ */
+#define	PCI_MSIX_ENTRY_SIZE		16
+#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
+#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
+#define	PCI_MSIX_ENTRY_DATA		8
+
+enum ntb_device_type {
+	NTB_XEON,
+	NTB_ATOM
+};
+
+/* ntb_conn_type are hardware numbers, cannot change. */
+enum ntb_conn_type {
+	NTB_CONN_TRANSPARENT = 0,
+	NTB_CONN_B2B = 1,
+	NTB_CONN_RP = 2,
+};
+
+enum ntb_b2b_direction {
+	NTB_DEV_USD = 0,
+	NTB_DEV_DSD = 1,
+};
+
+enum ntb_bar {
+	NTB_CONFIG_BAR = 0,
+	NTB_B2B_BAR_1,
+	NTB_B2B_BAR_2,
+	NTB_B2B_BAR_3,
+	NTB_MAX_BARS
+};
+
+enum {
+	NTB_MSIX_GUARD = 0,
+	NTB_MSIX_DATA0,
+	NTB_MSIX_DATA1,
+	NTB_MSIX_DATA2,
+	NTB_MSIX_OFS0,
+	NTB_MSIX_OFS1,
+	NTB_MSIX_OFS2,
+	NTB_MSIX_DONE,
+	NTB_MAX_MSIX_SPAD
+};
+
+/* Device features and workarounds */
+#define HAS_FEATURE(ntb, feature)	\
+	(((ntb)->features & (feature)) != 0)
+
+struct ntb_hw_info {
+	uint32_t		device_id;
+	const char		*desc;
+	enum ntb_device_type	type;
+	uint32_t		features;
+};
+
+struct ntb_pci_bar_info {
+	bus_space_tag_t		pci_bus_tag;
+	bus_space_handle_t	pci_bus_handle;
+	int			pci_resource_id;
+	struct resource		*pci_resource;
+	vm_paddr_t		pbase;
+	caddr_t			vbase;
+	vm_size_t		size;
+	vm_memattr_t		map_mode;
+
+	/* Configuration register offsets */
+	uint32_t		psz_off;
+	uint32_t		ssz_off;
+	uint32_t		pbarxlat_off;
+};
+
+struct ntb_int_info {
+	struct resource	*res;
+	int		rid;
+	void		*tag;
+};
+
+struct ntb_vec {
+	struct ntb_softc	*ntb;
+	uint32_t		num;
+	unsigned		masked;
+};
+
+struct ntb_reg {
+	uint32_t	ntb_ctl;
+	uint32_t	lnk_sta;
+	uint8_t		db_size;
+	unsigned	mw_bar[NTB_MAX_BARS];
+};
+
+struct ntb_alt_reg {
+	uint32_t	db_bell;
+	uint32_t	db_mask;
+	uint32_t	spad;
+};
+
+struct ntb_xlat_reg {
+	uint32_t	bar0_base;
+	uint32_t	bar2_base;
+	uint32_t	bar4_base;
+	uint32_t	bar5_base;
+
+	uint32_t	bar2_xlat;
+	uint32_t	bar4_xlat;
+	uint32_t	bar5_xlat;
+
+	uint32_t	bar2_limit;
+	uint32_t	bar4_limit;
+	uint32_t	bar5_limit;
+};
+
+struct ntb_b2b_addr {
+	uint64_t	bar0_addr;
+	uint64_t	bar2_addr64;
+	uint64_t	bar4_addr64;
+	uint64_t	bar4_addr32;
+	uint64_t	bar5_addr32;
+};
+
+struct ntb_msix_data {
+	uint32_t	nmd_ofs;
+	uint32_t	nmd_data;
+};
+
+struct ntb_softc {
+	/* ntb.c context. Do not move! Must go first! */
+	void			*ntb_store;
+
+	device_t		device;
+	enum ntb_device_type	type;
+	uint32_t		features;
+
+	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
+	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
+	uint32_t		allocated_interrupts;
+
+	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
+	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
+	bool			peer_msix_good;
+	bool			peer_msix_done;
+	struct ntb_pci_bar_info	*peer_lapic_bar;
+	struct callout		peer_msix_work;
+
+	struct callout		heartbeat_timer;
+	struct callout		lr_timer;
+
+	struct ntb_vec		*msix_vec;
+
+	uint32_t		ppd;
+	enum ntb_conn_type	conn_type;
+	enum ntb_b2b_direction	dev_type;
+
+	/* Offset of peer bar0 in B2B BAR */
+	uint64_t			b2b_off;
+	/* Memory window used to access peer bar0 */
+#define B2B_MW_DISABLED			UINT8_MAX
+	uint8_t				b2b_mw_idx;
+	uint32_t			msix_xlat;
+	uint8_t				msix_mw_idx;
+
+	uint8_t				mw_count;
+	uint8_t				spad_count;
+	uint8_t				db_count;
+	uint8_t				db_vec_count;
+	uint8_t				db_vec_shift;
+
+	/* Protects local db_mask. */
+#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
+#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
+#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
+	struct mtx			db_mask_lock;
+
+	volatile uint32_t		ntb_ctl;
+	volatile uint32_t		lnk_sta;
+
+	uint64_t			db_valid_mask;
+	uint64_t			db_link_mask;
+	uint64_t			db_mask;
+	uint64_t			fake_db_bell;	/* NTB_SB01BASE_LOCKUP*/
+
+	int				last_ts;	/* ticks @ last irq */
+
+	const struct ntb_reg		*reg;
+	const struct ntb_alt_reg	*self_reg;
+	const struct ntb_alt_reg	*peer_reg;
+	const struct ntb_xlat_reg	*xlat_reg;
+};
+
+#ifdef __i386__
+static __inline uint64_t
+bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
+    bus_size_t offset)
+{
+
+	return (bus_space_read_4(tag, handle, offset) |
+	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
+}
+
+static __inline void
+bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
+    bus_size_t offset, uint64_t val)
+{
+
+	bus_space_write_4(tag, handle, offset, val);
+	bus_space_write_4(tag, handle, offset + 4, val >> 32);
+}
+#endif
+
+#define intel_ntb_bar_read(SIZE, bar, offset) \
+	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
+	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
+#define intel_ntb_bar_write(SIZE, bar, offset, val) \
+	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
+	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
+#define intel_ntb_reg_read(SIZE, offset) \
+	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
+#define intel_ntb_reg_write(SIZE, offset, val) \
+	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
+#define intel_ntb_mw_read(SIZE, offset) \
+	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
+		offset)
+#define intel_ntb_mw_write(SIZE, offset, val) \
+	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
+		offset, val)
+
+static int intel_ntb_probe(device_t device);
+static int intel_ntb_attach(device_t device);
+static int intel_ntb_detach(device_t device);
+static uint64_t intel_ntb_db_valid_mask(device_t dev);
+static void intel_ntb_spad_clear(device_t dev);
+static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
+static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
+    enum ntb_width *width);
+static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
+    enum ntb_width width);
+static int intel_ntb_link_disable(device_t dev);
+static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
+static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
+
+static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
+static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
+static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
+static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
+    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
+static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
+static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
+    vm_memattr_t);
+static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
+    const char *);
+static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
+static int map_memory_window_bar(struct ntb_softc *ntb,
+    struct ntb_pci_bar_info *bar);
+static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
+static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
+static int intel_ntb_init_isr(struct ntb_softc *ntb);
+static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
+static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
+static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
+static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
+static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
+static void ndev_vec_isr(void *arg);
+static void ndev_irq_isr(void *arg);
+static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
+static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
+static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
+static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
+static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
+static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
+static void intel_ntb_exchange_msix(void *);
+static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
+static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
+static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
+static int intel_ntb_detect_atom(struct ntb_softc *ntb);
+static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
+static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
+static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
+static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
+static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
+    enum ntb_bar regbar);
+static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
+    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
+static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
+    enum ntb_bar idx);
+static int xeon_setup_b2b_mw(struct ntb_softc *,
+    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
+static inline bool link_is_up(struct ntb_softc *ntb);
+static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
+static inline bool atom_link_is_err(struct ntb_softc *ntb);
+static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
+static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
+static void atom_link_hb(void *arg);
+static void recover_atom_link(void *arg);
+static bool intel_ntb_poll_link(struct ntb_softc *ntb);
+static void save_bar_parameters(struct ntb_pci_bar_info *bar);
+static void intel_ntb_sysctl_init(struct ntb_softc *);
+static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
+static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
+static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
+static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
+static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
+
+static unsigned g_ntb_hw_debug_level;
+TUNABLE_INT("hw.ntb.debug_level", &g_ntb_hw_debug_level);
+SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
+    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
+#define intel_ntb_printf(lvl, ...) do {				\
+	if ((lvl) <= g_ntb_hw_debug_level) {			\
+		device_printf(ntb->device, __VA_ARGS__);	\
+	}							\
+} while (0)
+
+#define	_NTB_PAT_UC	0
+#define	_NTB_PAT_WC	1
+#define	_NTB_PAT_WT	4
+#define	_NTB_PAT_WP	5
+#define	_NTB_PAT_WB	6
+#define	_NTB_PAT_UCM	7
+static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
+TUNABLE_INT("hw.ntb.default_mw_pat", &g_ntb_mw_pat);
+SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
+    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
+    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
+    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
+    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
+    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
+    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
+    "UC-: " __XSTRING(_NTB_PAT_UCM));
+
+static inline vm_memattr_t
+intel_ntb_pat_flags(void)
+{
+
+	switch (g_ntb_mw_pat) {
+	case _NTB_PAT_WC:
+		return (VM_MEMATTR_WRITE_COMBINING);
+	case _NTB_PAT_WT:
+		return (VM_MEMATTR_WRITE_THROUGH);
+	case _NTB_PAT_WP:
+		return (VM_MEMATTR_WRITE_PROTECTED);
+	case _NTB_PAT_WB:
+		return (VM_MEMATTR_WRITE_BACK);
+	case _NTB_PAT_UCM:
+		return (VM_MEMATTR_WEAK_UNCACHEABLE);
+	case _NTB_PAT_UC:
+		/* FALLTHROUGH */
+	default:
+		return (VM_MEMATTR_UNCACHEABLE);
+	}
+}
+
+/*
+ * Well, this obviously doesn't belong here, but it doesn't seem to exist
+ * anywhere better yet.
+ */
+static inline const char *
+intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
+{
+
+	switch (pat) {
+	case VM_MEMATTR_WRITE_COMBINING:
+		return ("WRITE_COMBINING");
+	case VM_MEMATTR_WRITE_THROUGH:
+		return ("WRITE_THROUGH");
+	case VM_MEMATTR_WRITE_PROTECTED:
+		return ("WRITE_PROTECTED");
+	case VM_MEMATTR_WRITE_BACK:
+		return ("WRITE_BACK");
+	case VM_MEMATTR_WEAK_UNCACHEABLE:
+		return ("UNCACHED");
+	case VM_MEMATTR_UNCACHEABLE:
+		return ("UNCACHEABLE");
+	default:
+		return ("UNKNOWN");
+	}
+}
+
+static int g_ntb_msix_idx = 1;
+TUNABLE_INT("hw.ntb.msix_mw_idx", &g_ntb_msix_idx);
+SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
+    0, "Use this memory window to access the peer MSIX message complex on "
+    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
+    "Like b2b_mw_idx, negative values index from the last available memory "
+    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
+
+static int g_ntb_mw_idx = -1;
+TUNABLE_INT("hw.ntb.b2b_mw_idx", &g_ntb_mw_idx);
+SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
+    0, "Use this memory window to access the peer NTB registers.  A "
+    "non-negative value starts from the first MW index; a negative value "
+    "starts from the last MW index.  The default is -1, i.e., the last "
+    "available memory window.  Both sides of the NTB MUST set the same "
+    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
+
+/* Hardware owns the low 16 bits of features. */
+#define NTB_BAR_SIZE_4K		(1 << 0)
+#define NTB_SDOORBELL_LOCKUP	(1 << 1)
+#define NTB_SB01BASE_LOCKUP	(1 << 2)
+#define NTB_B2BDOORBELL_BIT14	(1 << 3)
+/* Software/configuration owns the top 16 bits. */
+#define NTB_SPLIT_BAR		(1ull << 16)
+
+#define NTB_FEATURES_STR \
+    "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
+    "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
+
+static struct ntb_hw_info pci_ids[] = {
+	/* XXX: PS/SS IDs left out until they are supported. */
+	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
+		NTB_ATOM, 0 },
+
+	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
+		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
+	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
+		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
+	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
+		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
+		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
+	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
+		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
+		    NTB_SB01BASE_LOCKUP },
+	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
+		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
+		    NTB_SB01BASE_LOCKUP },
+
+	{ 0x00000000, NULL, NTB_ATOM, 0 }
+};
+
+static const struct ntb_reg atom_reg = {
+	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
+	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
+	.db_size = sizeof(uint64_t),
+	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
+};
+
+static const struct ntb_alt_reg atom_pri_reg = {
+	.db_bell = ATOM_PDOORBELL_OFFSET,
+	.db_mask = ATOM_PDBMSK_OFFSET,
+	.spad = ATOM_SPAD_OFFSET,
+};
+
+static const struct ntb_alt_reg atom_b2b_reg = {
+	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
+	.spad = ATOM_B2B_SPAD_OFFSET,
+};
+
+static const struct ntb_xlat_reg atom_sec_xlat = {
+#if 0
+	/* "FIXME" says the Linux driver. */
+	.bar0_base = ATOM_SBAR0BASE_OFFSET,
+	.bar2_base = ATOM_SBAR2BASE_OFFSET,
+	.bar4_base = ATOM_SBAR4BASE_OFFSET,
+
+	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
+	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
+#endif
+
+	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
+	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
+};
+
+static const struct ntb_reg xeon_reg = {
+	.ntb_ctl = XEON_NTBCNTL_OFFSET,
+	.lnk_sta = XEON_LINK_STATUS_OFFSET,
+	.db_size = sizeof(uint16_t),
+	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
+};
+
+static const struct ntb_alt_reg xeon_pri_reg = {
+	.db_bell = XEON_PDOORBELL_OFFSET,
+	.db_mask = XEON_PDBMSK_OFFSET,
+	.spad = XEON_SPAD_OFFSET,
+};
+
+static const struct ntb_alt_reg xeon_b2b_reg = {
+	.db_bell = XEON_B2B_DOORBELL_OFFSET,
+	.spad = XEON_B2B_SPAD_OFFSET,
+};
+
+static const struct ntb_xlat_reg xeon_sec_xlat = {
+	.bar0_base = XEON_SBAR0BASE_OFFSET,
+	.bar2_base = XEON_SBAR2BASE_OFFSET,
+	.bar4_base = XEON_SBAR4BASE_OFFSET,
+	.bar5_base = XEON_SBAR5BASE_OFFSET,
+
+	.bar2_limit = XEON_SBAR2LMT_OFFSET,
+	.bar4_limit = XEON_SBAR4LMT_OFFSET,
+	.bar5_limit = XEON_SBAR5LMT_OFFSET,
+
+	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
+	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
+	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
+};
+
+static struct ntb_b2b_addr xeon_b2b_usd_addr = {
+	.bar0_addr = XEON_B2B_BAR0_ADDR,
+	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
+	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
+	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
+	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
+};
+
+static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
+	.bar0_addr = XEON_B2B_BAR0_ADDR,
+	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
+	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
+	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
+	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
+};
+
+SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
+    "B2B MW segment overrides -- MUST be the same on both sides");
+
+TUNABLE_QUAD("hw.ntb.usd_bar2_addr64", &xeon_b2b_usd_addr.bar2_addr64);
+SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
+    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
+    "hardware, use this 64-bit address on the bus between the NTB devices for "
+    "the window at BAR2, on the upstream side of the link.  MUST be the same "
+    "address on both sides.");
+TUNABLE_QUAD("hw.ntb.usd_bar4_addr64", &xeon_b2b_usd_addr.bar4_addr64);
+SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
+    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
+TUNABLE_QUAD("hw.ntb.usd_bar4_addr32", &xeon_b2b_usd_addr.bar4_addr32);
+SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
+    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
+    "(split-BAR mode).");
+TUNABLE_QUAD("hw.ntb.usd_bar5_addr32", &xeon_b2b_usd_addr.bar5_addr32);
+SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
+    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
+    "(split-BAR mode).");
+
+TUNABLE_QUAD("hw.ntb.dsd_bar2_addr64", &xeon_b2b_dsd_addr.bar2_addr64);
+SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
+    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
+    "hardware, use this 64-bit address on the bus between the NTB devices for "
+    "the window at BAR2, on the downstream side of the link.  MUST be the same"
+    " address on both sides.");
+TUNABLE_QUAD("hw.ntb.dsd_bar4_addr64", &xeon_b2b_dsd_addr.bar4_addr64);
+SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
+    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
+TUNABLE_QUAD("hw.ntb.dsd_bar4_addr32", &xeon_b2b_dsd_addr.bar4_addr32);
+SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
+    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
+    "(split-BAR mode).");
+TUNABLE_QUAD("hw.ntb.dsd_bar5_addr32", &xeon_b2b_dsd_addr.bar5_addr32);
+SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
+    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
+    "(split-BAR mode).");
+
+/*
+ * OS <-> Driver interface structures
+ */
+MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
+
+/*
+ * OS <-> Driver linkage functions
+ */
+static int
+intel_ntb_probe(device_t device)
+{
+	struct ntb_hw_info *p;
+
+	p = intel_ntb_get_device_info(pci_get_devid(device));
+	if (p == NULL)
+		return (ENXIO);
+
+	device_set_desc(device, p->desc);
+	return (0);
+}
+
+static int
+intel_ntb_attach(device_t device)
+{
+	struct ntb_softc *ntb;
+	struct ntb_hw_info *p;
+	int error;
+
+	ntb = device_get_softc(device);
+	p = intel_ntb_get_device_info(pci_get_devid(device));
+
+	ntb->device = device;
+	ntb->type = p->type;
+	ntb->features = p->features;
+	ntb->b2b_mw_idx = B2B_MW_DISABLED;
+	ntb->msix_mw_idx = B2B_MW_DISABLED;
+
+	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
+	callout_init(&ntb->heartbeat_timer, 1);
+	callout_init(&ntb->lr_timer, 1);
+	callout_init(&ntb->peer_msix_work, 1);
+	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
+
+	if (ntb->type == NTB_ATOM)
+		error = intel_ntb_detect_atom(ntb);
+	else
+		error = intel_ntb_detect_xeon(ntb);
+	if (error != 0)
+		goto out;
+
+	intel_ntb_detect_max_mw(ntb);
+
+	pci_enable_busmaster(ntb->device);
+
+	error = intel_ntb_map_pci_bars(ntb);
+	if (error != 0)
+		goto out;
+	if (ntb->type == NTB_ATOM)
+		error = intel_ntb_atom_init_dev(ntb);
+	else
+		error = intel_ntb_xeon_init_dev(ntb);
+	if (error != 0)
+		goto out;
+
+	intel_ntb_spad_clear(device);
+
+	intel_ntb_poll_link(ntb);
+
+	intel_ntb_sysctl_init(ntb);
+
+	/* Attach children to this controller */
+	error = ntb_register_device(device);
+
+out:
+	if (error != 0)
+		intel_ntb_detach(device);
+	return (error);
+}
+
+static int
+intel_ntb_detach(device_t device)
+{
+	struct ntb_softc *ntb;
+
+	ntb = device_get_softc(device);
+
+	/* Detach & delete all children */
+	ntb_unregister_device(device);
+
+	if (ntb->self_reg != NULL) {
+		DB_MASK_LOCK(ntb);
+		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
+		DB_MASK_UNLOCK(ntb);
+	}
+	callout_drain(&ntb->heartbeat_timer);
+	callout_drain(&ntb->lr_timer);
+	callout_drain(&ntb->peer_msix_work);
+	pci_disable_busmaster(ntb->device);
+	if (ntb->type == NTB_XEON)
+		intel_ntb_teardown_xeon(ntb);
+	intel_ntb_teardown_interrupts(ntb);
+
+	mtx_destroy(&ntb->db_mask_lock);
+
+	intel_ntb_unmap_pci_bar(ntb);
+
+	return (0);
+}
+
+/*
+ * Driver internal routines
+ */
+static inline enum ntb_bar
+intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
+{
+
+	KASSERT(mw < ntb->mw_count,
+	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
+	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
+
+	return (ntb->reg->mw_bar[mw]);
+}
+
+static inline bool
+bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
+{
+	/* XXX This assertion could be stronger. */
+	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
+	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
+}
+
+static inline void
+bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
+    uint32_t *xlat, uint32_t *lmt)
+{
+	uint32_t basev, lmtv, xlatv;
+
+	switch (bar) {
+	case NTB_B2B_BAR_1:
+		basev = ntb->xlat_reg->bar2_base;
+		lmtv = ntb->xlat_reg->bar2_limit;
+		xlatv = ntb->xlat_reg->bar2_xlat;
+		break;
+	case NTB_B2B_BAR_2:
+		basev = ntb->xlat_reg->bar4_base;
+		lmtv = ntb->xlat_reg->bar4_limit;
+		xlatv = ntb->xlat_reg->bar4_xlat;
+		break;
+	case NTB_B2B_BAR_3:
+		basev = ntb->xlat_reg->bar5_base;
+		lmtv = ntb->xlat_reg->bar5_limit;
+		xlatv = ntb->xlat_reg->bar5_xlat;
+		break;
+	default:
+		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
+		    ("bad bar"));
+		basev = lmtv = xlatv = 0;
+		break;
+	}
+
+	if (base != NULL)
+		*base = basev;
+	if (xlat != NULL)
+		*xlat = xlatv;
+	if (lmt != NULL)
+		*lmt = lmtv;
+}
+
+static int
+intel_ntb_map_pci_bars(struct ntb_softc *ntb)
+{
+	int rc;
+
+	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
+	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
+	if (rc != 0)
+		goto out;
+
+	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
+	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
+	if (rc != 0)
+		goto out;
+	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
+	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
+	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
+
+	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
+	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
+	if (rc != 0)
+		goto out;
+	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
+	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
+	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
+
+	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
+		goto out;
+
+	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
+	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
+	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
+	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
+	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
+
+out:
+	if (rc != 0)
+		device_printf(ntb->device,
+		    "unable to allocate pci resource\n");
+	return (rc);
+}
+
+static void
+print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
+    const char *kind)
+{
+
+	device_printf(ntb->device,
+	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
+	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
+	    (char *)bar->vbase + bar->size - 1,
+	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
+	    (uintmax_t)bar->size, kind);
+}
+
+static int
+map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
+{
+
+	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
+	    &bar->pci_resource_id, RF_ACTIVE);
+	if (bar->pci_resource == NULL)
+		return (ENXIO);
+
+	save_bar_parameters(bar);
+	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
+	print_map_success(ntb, bar, "mmr");
+	return (0);
+}
+
+static int
+map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
+{
+	int rc;
+	vm_memattr_t mapmode;
+	uint8_t bar_size_bits = 0;
+
+	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
+	    &bar->pci_resource_id, RF_ACTIVE);
+
+	if (bar->pci_resource == NULL)
+		return (ENXIO);
+
+	save_bar_parameters(bar);
+	/*
+	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
+	 * hardware issue. To work around this, query the size it should be
+	 * configured to by the device and modify the resource to correspond to
+	 * this new size. The BIOS on systems with this problem is required to
+	 * provide enough address space to allow the driver to make this change
+	 * safely.
+	 *
+	 * Ideally I could have just specified the size when I allocated the
+	 * resource like:
+	 *  bus_alloc_resource(ntb->device,
+	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
+	 *	1ul << bar_size_bits, RF_ACTIVE);
+	 * but the PCI driver does not honor the size in this call, so we have
+	 * to modify it after the fact.
+	 */
+	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
+		if (bar->pci_resource_id == PCIR_BAR(2))
+			bar_size_bits = pci_read_config(ntb->device,
+			    XEON_PBAR23SZ_OFFSET, 1);
+		else
+			bar_size_bits = pci_read_config(ntb->device,
+			    XEON_PBAR45SZ_OFFSET, 1);
+
+		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
+		    bar->pci_resource, bar->pbase,
+		    bar->pbase + (1ul << bar_size_bits) - 1);
+		if (rc != 0) {
+			device_printf(ntb->device,
+			    "unable to resize bar\n");
+			return (rc);
+		}
+
+		save_bar_parameters(bar);
+	}
+
+	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
+	print_map_success(ntb, bar, "mw");
+
+	/*
+	 * Optionally, mark MW BARs as anything other than UC to improve
+	 * performance.
+	 */
+	mapmode = intel_ntb_pat_flags();
+	if (mapmode == bar->map_mode)
+		return (0);
+
+	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
+	if (rc == 0) {
+		bar->map_mode = mapmode;
+		device_printf(ntb->device,
+		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
+		    "%s.\n",
+		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
+		    (char *)bar->vbase + bar->size - 1,
+		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
+		    intel_ntb_vm_memattr_to_str(mapmode));
+	} else
+		device_printf(ntb->device,
+		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
+		    "%s: %d\n",
+		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
+		    (char *)bar->vbase + bar->size - 1,
+		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
+		    intel_ntb_vm_memattr_to_str(mapmode), rc);
+		/* Proceed anyway */
+	return (0);
+}
+
+static void
+intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
+{
+	struct ntb_pci_bar_info *current_bar;
+	int i;
+
+	for (i = 0; i < NTB_MAX_BARS; i++) {
+		current_bar = &ntb->bar_info[i];
+		if (current_bar->pci_resource != NULL)
+			bus_release_resource(ntb->device, SYS_RES_MEMORY,
+			    current_bar->pci_resource_id,
+			    current_bar->pci_resource);
+	}
+}
+
+static int
+intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
+{
+	uint32_t i;
+	int rc;
+
+	for (i = 0; i < num_vectors; i++) {
+		ntb->int_info[i].rid = i + 1;
+		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
+		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
+		if (ntb->int_info[i].res == NULL) {
+			device_printf(ntb->device,
+			    "bus_alloc_resource failed\n");
+			return (ENOMEM);
+		}
+		ntb->int_info[i].tag = NULL;
+		ntb->allocated_interrupts++;
+		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
+		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
+		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
+		if (rc != 0) {
+			device_printf(ntb->device, "bus_setup_intr failed\n");
+			return (ENXIO);
+		}
+	}
+	return (0);
+}
+
+/*
+ * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
+ * cannot be allocated for each MSI-X message.  JHB seems to think remapping
+ * should be okay.  This tunable should enable us to test that hypothesis
+ * when someone gets their hands on some Xeon hardware.
+ */
+static int ntb_force_remap_mode;
+TUNABLE_INT("hw.ntb.force_remap_mode", &ntb_force_remap_mode);
+SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
+    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
+    " to a smaller number of ithreads, even if the desired number are "
+    "available");
+
+/*
+ * In case it is NOT ok, give consumers an abort button.
+ */
+static int ntb_prefer_intx;
+TUNABLE_INT("hw.ntb.prefer_intx_to_remap", &ntb_prefer_intx);
+SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
+    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
+    "than remapping MSI-X messages over available slots (match Linux driver "
+    "behavior)");
+
+/*
+ * Remap the desired number of MSI-X messages to available ithreads in a simple
+ * round-robin fashion.
+ */
+static int
+intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
+{
+	u_int *vectors;
+	uint32_t i;
+	int rc;
+
+	if (ntb_prefer_intx != 0)
+		return (ENXIO);
+
+	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
+
+	for (i = 0; i < desired; i++)
+		vectors[i] = (i % avail) + 1;
+
+	rc = pci_remap_msix(dev, desired, vectors);
+	free(vectors, M_NTB);
+	return (rc);
+}
+
+static int
+intel_ntb_init_isr(struct ntb_softc *ntb)
+{
+	uint32_t desired_vectors, num_vectors;
+	int rc;
+
+	ntb->allocated_interrupts = 0;
+	ntb->last_ts = ticks;
+
+	/*
+	 * Mask all doorbell interrupts.  (Except link events!)
+	 */
+	DB_MASK_LOCK(ntb);
+	ntb->db_mask = ntb->db_valid_mask;
+	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	DB_MASK_UNLOCK(ntb);
+
+	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
+	    ntb->db_count);
+	if (desired_vectors >= 1) {
+		rc = pci_alloc_msix(ntb->device, &num_vectors);
+
+		if (ntb_force_remap_mode != 0 && rc == 0 &&
+		    num_vectors == desired_vectors)
+			num_vectors--;
+
+		if (rc == 0 && num_vectors < desired_vectors) {
+			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
+			    num_vectors);
+			if (rc == 0)
+				num_vectors = desired_vectors;
+			else
+				pci_release_msi(ntb->device);
+		}
+		if (rc != 0)
+			num_vectors = 1;
+	} else
+		num_vectors = 1;
+
+	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
+		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+			device_printf(ntb->device,
+			    "Errata workaround does not support MSI or INTX\n");
+			return (EINVAL);
+		}
+
+		ntb->db_vec_count = 1;
+		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
+		rc = intel_ntb_setup_legacy_interrupt(ntb);
+	} else {
+		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
+		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+			device_printf(ntb->device,
+			    "Errata workaround expects %d doorbell bits\n",
+			    XEON_NONLINK_DB_MSIX_BITS);
+			return (EINVAL);
+		}
+
+		intel_ntb_create_msix_vec(ntb, num_vectors);
+		rc = intel_ntb_setup_msix(ntb, num_vectors);
+	}
+	if (rc != 0) {
+		device_printf(ntb->device,
+		    "Error allocating interrupts: %d\n", rc);
+		intel_ntb_free_msix_vec(ntb);
+	}
+
+	return (rc);
+}
+
+static int
+intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
+{
+	int rc;
+
+	ntb->int_info[0].rid = 0;
+	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
+	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
+	if (ntb->int_info[0].res == NULL) {
+		device_printf(ntb->device, "bus_alloc_resource failed\n");
+		return (ENOMEM);
+	}
+
+	ntb->int_info[0].tag = NULL;
+	ntb->allocated_interrupts = 1;
+
+	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
+	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
+	    ntb, &ntb->int_info[0].tag);
+	if (rc != 0) {
+		device_printf(ntb->device, "bus_setup_intr failed\n");
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+static void
+intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
+{
+	struct ntb_int_info *current_int;
+	int i;
+
+	for (i = 0; i < ntb->allocated_interrupts; i++) {
+		current_int = &ntb->int_info[i];
+		if (current_int->tag != NULL)
+			bus_teardown_intr(ntb->device, current_int->res,
+			    current_int->tag);
+
+		if (current_int->res != NULL)
+			bus_release_resource(ntb->device, SYS_RES_IRQ,
+			    rman_get_rid(current_int->res), current_int->res);
+	}
+
+	intel_ntb_free_msix_vec(ntb);
+	pci_release_msi(ntb->device);
+}
+
+/*
+ * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
+ * out to make code clearer.
+ */
+static inline uint64_t
+db_ioread(struct ntb_softc *ntb, uint64_t regoff)
+{
+
+	if (ntb->type == NTB_ATOM)
+		return (intel_ntb_reg_read(8, regoff));
+
+	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
+
+	return (intel_ntb_reg_read(2, regoff));
+}
+
+static inline void
+db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
+{
+
+	KASSERT((val & ~ntb->db_valid_mask) == 0,
+	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
+	     (uintmax_t)(val & ~ntb->db_valid_mask),
+	     (uintmax_t)ntb->db_valid_mask));
+
+	if (regoff == ntb->self_reg->db_mask)
+		DB_MASK_ASSERT(ntb, MA_OWNED);
+	db_iowrite_raw(ntb, regoff, val);
+}
+
+static inline void
+db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
+{
+
+	if (ntb->type == NTB_ATOM) {
+		intel_ntb_reg_write(8, regoff, val);
+		return;
+	}
+
+	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
+	intel_ntb_reg_write(2, regoff, (uint16_t)val);
+}
+
+static void
+intel_ntb_db_set_mask(device_t dev, uint64_t bits)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	DB_MASK_LOCK(ntb);
+	ntb->db_mask |= bits;
+	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
+		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	DB_MASK_UNLOCK(ntb);
+}
+
+static void
+intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	uint64_t ibits;
+	int i;
+
+	KASSERT((bits & ~ntb->db_valid_mask) == 0,
+	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
+	     (uintmax_t)(bits & ~ntb->db_valid_mask),
+	     (uintmax_t)ntb->db_valid_mask));
+
+	DB_MASK_LOCK(ntb);
+	ibits = ntb->fake_db_bell & ntb->db_mask & bits;
+	ntb->db_mask &= ~bits;
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		/* Simulate fake interrupts if unmasked DB bits are set. */
+		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
+				swi_sched(ntb->int_info[i].tag, 0);
+		}
+	} else {
+		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	}
+	DB_MASK_UNLOCK(ntb);
+}
+
+static uint64_t
+intel_ntb_db_read(device_t dev)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
+		return (ntb->fake_db_bell);
+
+	return (db_ioread(ntb, ntb->self_reg->db_bell));
+}
+
+static void
+intel_ntb_db_clear(device_t dev, uint64_t bits)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	KASSERT((bits & ~ntb->db_valid_mask) == 0,
+	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
+	     (uintmax_t)(bits & ~ntb->db_valid_mask),
+	     (uintmax_t)ntb->db_valid_mask));
+
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		DB_MASK_LOCK(ntb);
+		ntb->fake_db_bell &= ~bits;
+		DB_MASK_UNLOCK(ntb);
+		return;
+	}
+
+	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
+}
+
+static inline uint64_t
+intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
+{
+	uint64_t shift, mask;
+
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		/*
+		 * Remap vectors in custom way to make at least first
+		 * three doorbells to not generate stray events.
+		 * This breaks Linux compatibility (if one existed)
+		 * when more then one DB is used (not by if_ntb).
+		 */
+		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
+			return (1 << db_vector);
+		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
+			return (0x7ffc);
+	}
+
+	shift = ntb->db_vec_shift;
+	mask = (1ull << shift) - 1;
+	return (mask << (shift * db_vector));
+}
+
+static void
+intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
+{
+	uint64_t vec_mask;
+
+	ntb->last_ts = ticks;
+	vec_mask = intel_ntb_vec_mask(ntb, vec);
+
+	if ((vec_mask & ntb->db_link_mask) != 0) {
+		if (intel_ntb_poll_link(ntb))
+			ntb_link_event(ntb->device);
+	}
+
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
+	    (vec_mask & ntb->db_link_mask) == 0) {
+		DB_MASK_LOCK(ntb);
+
+		/* Do not report same DB events again if not cleared yet. */
+		vec_mask &= ~ntb->fake_db_bell;
+
+		/* Update our internal doorbell register. */
+		ntb->fake_db_bell |= vec_mask;
+
+		/* Do not report masked DB events. */
+		vec_mask &= ~ntb->db_mask;
+
+		DB_MASK_UNLOCK(ntb);
+	}
+
+	if ((vec_mask & ntb->db_valid_mask) != 0)
+		ntb_db_event(ntb->device, vec);
+}
+
+static void
+ndev_vec_isr(void *arg)
+{
+	struct ntb_vec *nvec = arg;
+
+	intel_ntb_interrupt(nvec->ntb, nvec->num);
+}
+
+static void
+ndev_irq_isr(void *arg)
+{
+	/* If we couldn't set up MSI-X, we only have the one vector. */
+	intel_ntb_interrupt(arg, 0);
+}
+
+static int
+intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
+{
+	uint32_t i;
+
+	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
+	    M_ZERO | M_WAITOK);
+	for (i = 0; i < num_vectors; i++) {
+		ntb->msix_vec[i].num = i;
+		ntb->msix_vec[i].ntb = ntb;
+	}
+
+	return (0);
+}
+
+static void
+intel_ntb_free_msix_vec(struct ntb_softc *ntb)
+{
+
+	if (ntb->msix_vec == NULL)
+		return;
+
+	free(ntb->msix_vec, M_NTB);
+	ntb->msix_vec = NULL;
+}
+
+static void
+intel_ntb_get_msix_info(struct ntb_softc *ntb)
+{
+	struct pci_devinfo *dinfo;
+	struct pcicfg_msix *msix;
+	uint32_t laddr, data, i, offset;
+
+	dinfo = device_get_ivars(ntb->device);
+	msix = &dinfo->cfg.msix;
+
+	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
+
+	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
+
+		laddr = bus_read_4(msix->msix_table_res, offset +
+		    PCI_MSIX_ENTRY_LOWER_ADDR);
+		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
+
+		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
+		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
+		     MSI_INTEL_ADDR_BASE));
+		ntb->msix_data[i].nmd_ofs = laddr;
+
+		data = bus_read_4(msix->msix_table_res, offset +
+		    PCI_MSIX_ENTRY_DATA);
+		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
+
+		ntb->msix_data[i].nmd_data = data;
+	}
+}
+
+static struct ntb_hw_info *
+intel_ntb_get_device_info(uint32_t device_id)
+{
+	struct ntb_hw_info *ep = pci_ids;
+
+	while (ep->device_id) {
+		if (ep->device_id == device_id)
+			return (ep);
+		++ep;
+	}
+	return (NULL);
+}
+
+static void
+intel_ntb_teardown_xeon(struct ntb_softc *ntb)
+{
+
+	if (ntb->reg != NULL)
+		intel_ntb_link_disable(ntb->device);
+}
+
+static void
+intel_ntb_detect_max_mw(struct ntb_softc *ntb)
+{
+
+	if (ntb->type == NTB_ATOM) {
+		ntb->mw_count = ATOM_MW_COUNT;
+		return;
+	}
+
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
+		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
+	else
+		ntb->mw_count = XEON_SNB_MW_COUNT;
+}
+
+static int
+intel_ntb_detect_xeon(struct ntb_softc *ntb)
+{
+	uint8_t ppd, conn_type;
+
+	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
+	ntb->ppd = ppd;
+
+	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
+		ntb->dev_type = NTB_DEV_DSD;
+	else
+		ntb->dev_type = NTB_DEV_USD;
+
+	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
+		ntb->features |= NTB_SPLIT_BAR;
+
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
+	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+		device_printf(ntb->device,
+		    "Can not apply SB01BASE_LOCKUP workaround "
+		    "with split BARs disabled!\n");
+		device_printf(ntb->device,
+		    "Expect system hangs under heavy NTB traffic!\n");
+		ntb->features &= ~NTB_SB01BASE_LOCKUP;
+	}
+
+	/*
+	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
+	 * errata workaround; only do one at a time.
+	 */
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
+		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
+
+	conn_type = ppd & XEON_PPD_CONN_TYPE;
+	switch (conn_type) {
+	case NTB_CONN_B2B:
+		ntb->conn_type = conn_type;
+		break;
+	case NTB_CONN_RP:
+	case NTB_CONN_TRANSPARENT:
+	default:
+		device_printf(ntb->device, "Unsupported connection type: %u\n",
+		    (unsigned)conn_type);
+		return (ENXIO);
+	}
+	return (0);
+}
+
+static int
+intel_ntb_detect_atom(struct ntb_softc *ntb)
+{
+	uint32_t ppd, conn_type;
+
+	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
+	ntb->ppd = ppd;
+
+	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
+		ntb->dev_type = NTB_DEV_DSD;
+	else
+		ntb->dev_type = NTB_DEV_USD;
+
+	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
+	switch (conn_type) {
+	case NTB_CONN_B2B:
+		ntb->conn_type = conn_type;
+		break;
+	default:
+		device_printf(ntb->device, "Unsupported NTB configuration\n");
+		return (ENXIO);
+	}
+	return (0);
+}
+
+static int
+intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
+{
+	int rc;
+
+	ntb->spad_count		= XEON_SPAD_COUNT;
+	ntb->db_count		= XEON_DB_COUNT;
+	ntb->db_link_mask	= XEON_DB_LINK_BIT;
+	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
+	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
+
+	if (ntb->conn_type != NTB_CONN_B2B) {
+		device_printf(ntb->device, "Connection type %d not supported\n",
+		    ntb->conn_type);
+		return (ENXIO);
+	}
+
+	ntb->reg = &xeon_reg;
+	ntb->self_reg = &xeon_pri_reg;
+	ntb->peer_reg = &xeon_b2b_reg;
+	ntb->xlat_reg = &xeon_sec_xlat;
+
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		ntb->fake_db_bell = 0;
+		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
+		    ntb->mw_count;
+		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
+		    g_ntb_msix_idx, ntb->msix_mw_idx);
+		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
+		    VM_MEMATTR_UNCACHEABLE);
+		KASSERT(rc == 0, ("shouldn't fail"));
+	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
+		/*
+		 * There is a Xeon hardware errata related to writes to SDOORBELL or
+		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
+		 * which may hang the system.  To workaround this, use a memory
+		 * window to access the interrupt and scratch pad registers on the
+		 * remote system.
+		 */
+		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
+		    ntb->mw_count;
+		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
+		    g_ntb_mw_idx, ntb->b2b_mw_idx);
+		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
+		    VM_MEMATTR_UNCACHEABLE);
+		KASSERT(rc == 0, ("shouldn't fail"));
+	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
+		/*
+		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
+		 * mirrored to the remote system.  Shrink the number of bits by one,
+		 * since bit 14 is the last bit.
+		 *
+		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
+		 * anyway.  Nor for non-B2B connection types.
+		 */
+		ntb->db_count = XEON_DB_COUNT - 1;
+
+	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
+
+	if (ntb->dev_type == NTB_DEV_USD)
+		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
+		    &xeon_b2b_usd_addr);
+	else
+		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
+		    &xeon_b2b_dsd_addr);
+	if (rc != 0)
+		return (rc);
+
+	/* Enable Bus Master and Memory Space on the secondary side */
+	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
+	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
+
+	/*
+	 * Mask all doorbell interrupts.
+	 */
+	DB_MASK_LOCK(ntb);
+	ntb->db_mask = ntb->db_valid_mask;
+	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	DB_MASK_UNLOCK(ntb);
+
+	rc = intel_ntb_init_isr(ntb);
+	return (rc);
+}
+
+static int
+intel_ntb_atom_init_dev(struct ntb_softc *ntb)
+{
+	int error;
+
+	KASSERT(ntb->conn_type == NTB_CONN_B2B,
+	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
+
+	ntb->spad_count		 = ATOM_SPAD_COUNT;
+	ntb->db_count		 = ATOM_DB_COUNT;
+	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
+	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
+	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
+
+	ntb->reg = &atom_reg;
+	ntb->self_reg = &atom_pri_reg;
+	ntb->peer_reg = &atom_b2b_reg;
+	ntb->xlat_reg = &atom_sec_xlat;
+
+	/*
+	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
+	 * resolved.  Mask transaction layer internal parity errors.
+	 */
+	pci_write_config(ntb->device, 0xFC, 0x4, 4);
+
+	configure_atom_secondary_side_bars(ntb);
+
+	/* Enable Bus Master and Memory Space on the secondary side */
+	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
+	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
+
+	error = intel_ntb_init_isr(ntb);
+	if (error != 0)
+		return (error);
+
+	/* Initiate PCI-E link training */
+	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+
+	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
+
+	return (0);
+}
+
+/* XXX: Linux driver doesn't seem to do any of this for Atom. */
+static void
+configure_atom_secondary_side_bars(struct ntb_softc *ntb)
+{
+
+	if (ntb->dev_type == NTB_DEV_USD) {
+		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
+		    XEON_B2B_BAR2_ADDR64);
+		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
+		    XEON_B2B_BAR4_ADDR64);
+		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
+		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
+	} else {
+		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
+		    XEON_B2B_BAR2_ADDR64);
+		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
+		    XEON_B2B_BAR4_ADDR64);
+		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
+		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
+	}
+}
+
+
+/*
+ * When working around Xeon SDOORBELL errata by remapping remote registers in a
+ * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
+ * remains for use by a higher layer.
+ *
+ * Will only be used if working around SDOORBELL errata and the BIOS-configured
+ * MW size is sufficiently large.
+ */
+static unsigned int ntb_b2b_mw_share;
+TUNABLE_INT("hw.ntb.b2b_mw_share", &ntb_b2b_mw_share);
+SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
+    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
+    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
+    "value here.");
+
+static void
+xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
+    enum ntb_bar regbar)
+{
+	struct ntb_pci_bar_info *bar;
+	uint8_t bar_sz;
+
+	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
+		return;
+
+	bar = &ntb->bar_info[idx];
+	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
+	if (idx == regbar) {
+		if (ntb->b2b_off != 0)
+			bar_sz--;
+		else
+			bar_sz = 0;
+	}
+	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
+	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
+	(void)bar_sz;
+}
+
+static void
+xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
+    enum ntb_bar idx, enum ntb_bar regbar)
+{
+	uint64_t reg_val;
+	uint32_t base_reg, lmt_reg;
+
+	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
+	if (idx == regbar) {
+		if (ntb->b2b_off)
+			bar_addr += ntb->b2b_off;
+		else
+			bar_addr = 0;
+	}
+
+	if (!bar_is_64bit(ntb, idx)) {
+		intel_ntb_reg_write(4, base_reg, bar_addr);
+		reg_val = intel_ntb_reg_read(4, base_reg);
+		(void)reg_val;
+
+		intel_ntb_reg_write(4, lmt_reg, bar_addr);
+		reg_val = intel_ntb_reg_read(4, lmt_reg);
+		(void)reg_val;
+	} else {
+		intel_ntb_reg_write(8, base_reg, bar_addr);
+		reg_val = intel_ntb_reg_read(8, base_reg);
+		(void)reg_val;
+
+		intel_ntb_reg_write(8, lmt_reg, bar_addr);
+		reg_val = intel_ntb_reg_read(8, lmt_reg);
+		(void)reg_val;
+	}
+}
+
+static void
+xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
+{
+	struct ntb_pci_bar_info *bar;
+
+	bar = &ntb->bar_info[idx];
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
+		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
+		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
+	} else {
+		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
+		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
+	}
+	(void)base_addr;
+}
+
+static int
+xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
+    const struct ntb_b2b_addr *peer_addr)
+{
+	struct ntb_pci_bar_info *b2b_bar;
+	vm_size_t bar_size;
+	uint64_t bar_addr;
+	enum ntb_bar b2b_bar_num, i;
+
+	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
+		b2b_bar = NULL;
+		b2b_bar_num = NTB_CONFIG_BAR;
+		ntb->b2b_off = 0;
+	} else {
+		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
+		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
+		    ("invalid b2b mw bar"));
+
+		b2b_bar = &ntb->bar_info[b2b_bar_num];
+		bar_size = b2b_bar->size;
+
+		if (ntb_b2b_mw_share != 0 &&
+		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
+			ntb->b2b_off = bar_size >> 1;
+		else if (bar_size >= XEON_B2B_MIN_SIZE) {
+			ntb->b2b_off = 0;
+		} else {
+			device_printf(ntb->device,
+			    "B2B bar size is too small!\n");
+			return (EIO);
+		}
+	}
+
+	/*
+	 * Reset the secondary bar sizes to match the primary bar sizes.
+	 * (Except, disable or halve the size of the B2B secondary bar.)
+	 */
+	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
+		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
+
+	bar_addr = 0;
+	if (b2b_bar_num == NTB_CONFIG_BAR)
+		bar_addr = addr->bar0_addr;
+	else if (b2b_bar_num == NTB_B2B_BAR_1)
+		bar_addr = addr->bar2_addr64;
+	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
+		bar_addr = addr->bar4_addr64;
+	else if (b2b_bar_num == NTB_B2B_BAR_2)
+		bar_addr = addr->bar4_addr32;
+	else if (b2b_bar_num == NTB_B2B_BAR_3)
+		bar_addr = addr->bar5_addr32;
+	else
+		KASSERT(false, ("invalid bar"));
+
+	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
+
+	/*
+	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
+	 * register BAR.  The B2B BAR is either disabled above or configured
+	 * half-size.  It starts at PBAR xlat + offset.
+	 *
+	 * Also set up incoming BAR limits == base (zero length window).
+	 */
+	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
+	    b2b_bar_num);
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
+		    NTB_B2B_BAR_2, b2b_bar_num);
+		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
+		    NTB_B2B_BAR_3, b2b_bar_num);
+	} else
+		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
+		    NTB_B2B_BAR_2, b2b_bar_num);
+
+	/* Zero incoming translation addrs */
+	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
+	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
+
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		uint32_t xlat_reg, lmt_reg;
+		enum ntb_bar bar_num;
+
+		/*
+		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
+		 * workaround
+		 */
+		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
+		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
+		if (bar_is_64bit(ntb, bar_num)) {
+			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
+			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
+			intel_ntb_reg_write(8, lmt_reg, 0);
+		} else {
+			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
+			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
+			intel_ntb_reg_write(4, lmt_reg, 0);
+		}
+
+		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
+	}
+	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
+	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
+
+	/* Zero outgoing translation limits (whole bar size windows) */
+	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
+	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
+
+	/* Set outgoing translation offsets */
+	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
+		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
+	} else
+		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
+
+	/* Set the translation offset for B2B registers */
+	bar_addr = 0;
+	if (b2b_bar_num == NTB_CONFIG_BAR)
+		bar_addr = peer_addr->bar0_addr;
+	else if (b2b_bar_num == NTB_B2B_BAR_1)
+		bar_addr = peer_addr->bar2_addr64;
+	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
+		bar_addr = peer_addr->bar4_addr64;
+	else if (b2b_bar_num == NTB_B2B_BAR_2)
+		bar_addr = peer_addr->bar4_addr32;
+	else if (b2b_bar_num == NTB_B2B_BAR_3)
+		bar_addr = peer_addr->bar5_addr32;
+	else
+		KASSERT(false, ("invalid bar"));
+
+	/*
+	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
+	 * at a time.
+	 */
+	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
+	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
+	return (0);
+}
+
+static inline bool
+_xeon_link_is_up(struct ntb_softc *ntb)
+{
+
+	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
+		return (true);
+	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
+}
+
+static inline bool
+link_is_up(struct ntb_softc *ntb)
+{
+
+	if (ntb->type == NTB_XEON)
+		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
+		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
+
+	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
+	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
+}
+
+static inline bool
+atom_link_is_err(struct ntb_softc *ntb)
+{
+	uint32_t status;
+
+	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
+
+	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
+	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
+		return (true);
+
+	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
+	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
+}
+
+/* Atom does not have link status interrupt, poll on that platform */
+static void
+atom_link_hb(void *arg)
+{
+	struct ntb_softc *ntb = arg;
+	sbintime_t timo, poll_ts;
+
+	timo = NTB_HB_TIMEOUT * hz;
+	poll_ts = ntb->last_ts + timo;
+
+	/*
+	 * Delay polling the link status if an interrupt was received, unless
+	 * the cached link status says the link is down.
+	 */
+	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
+		timo = poll_ts - ticks;
+		goto out;
+	}
+
+	if (intel_ntb_poll_link(ntb))
+		ntb_link_event(ntb->device);
+
+	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
+		/* Link is down with error, proceed with recovery */
+		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
+		return;
+	}
+
+out:
+	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
+}
+
+static void
+atom_perform_link_restart(struct ntb_softc *ntb)
+{
+	uint32_t status;
+
+	/* Driver resets the NTB ModPhy lanes - magic! */
+	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
+	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
+	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
+	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
+
+	/* Driver waits 100ms to allow the NTB ModPhy to settle */
+	pause("ModPhy", hz / 10);
+
+	/* Clear AER Errors, write to clear */
+	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
+	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
+	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
+
+	/* Clear unexpected electrical idle event in LTSSM, write to clear */
+	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
+	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
+	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
+
+	/* Clear DeSkew Buffer error, write to clear */
+	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
+	status |= ATOM_DESKEWSTS_DBERR;
+	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
+
+	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
+	status &= ATOM_IBIST_ERR_OFLOW;
+	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
+
+	/* Releases the NTB state machine to allow the link to retrain */
+	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
+	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
+	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
+}
+
+static int
+intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
+    enum ntb_width width __unused)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	uint32_t cntl;
+
+	intel_ntb_printf(2, "%s\n", __func__);
+
+	if (ntb->type == NTB_ATOM) {
+		pci_write_config(ntb->device, NTB_PPD_OFFSET,
+		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
+		return (0);
+	}
+
+	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
+		ntb_link_event(dev);
+		return (0);
+	}
+
+	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
+	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
+	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
+	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
+		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
+	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
+	return (0);
+}
+
+static int
+intel_ntb_link_disable(device_t dev)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	uint32_t cntl;
+
+	intel_ntb_printf(2, "%s\n", __func__);
+
+	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
+		ntb_link_event(dev);
+		return (0);
+	}
+
+	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
+	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
+	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
+		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
+	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
+	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
+	return (0);
+}
+
+static bool
+intel_ntb_link_enabled(device_t dev)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	uint32_t cntl;
+
+	if (ntb->type == NTB_ATOM) {
+		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
+		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
+	}
+
+	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
+		return (true);
+
+	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
+	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
+}
+
+static void
+recover_atom_link(void *arg)
+{
+	struct ntb_softc *ntb = arg;
+	unsigned speed, width, oldspeed, oldwidth;
+	uint32_t status32;
+
+	atom_perform_link_restart(ntb);
+
+	/*
+	 * There is a potential race between the 2 NTB devices recovering at
+	 * the same time.  If the times are the same, the link will not recover
+	 * and the driver will be stuck in this loop forever.  Add a random
+	 * interval to the recovery time to prevent this race.
+	 */
+	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
+	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
+
+	if (atom_link_is_err(ntb))
+		goto retry;
+
+	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
+	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
+		goto out;
+
+	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
+	width = NTB_LNK_STA_WIDTH(status32);
+	speed = status32 & NTB_LINK_SPEED_MASK;
+
+	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
+	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
+	if (oldwidth != width || oldspeed != speed)
+		goto retry;
+
+out:
+	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
+	    ntb);
+	return;
+
+retry:
+	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
+	    ntb);
+}
+
+/*
+ * Polls the HW link status register(s); returns true if something has changed.
+ */
+static bool
+intel_ntb_poll_link(struct ntb_softc *ntb)
+{
+	uint32_t ntb_cntl;
+	uint16_t reg_val;
+
+	if (ntb->type == NTB_ATOM) {
+		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
+		if (ntb_cntl == ntb->ntb_ctl)
+			return (false);
+
+		ntb->ntb_ctl = ntb_cntl;
+		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
+	} else {
+		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
+
+		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
+		if (reg_val == ntb->lnk_sta)
+			return (false);
+
+		ntb->lnk_sta = reg_val;
+
+		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+			if (_xeon_link_is_up(ntb)) {
+				if (!ntb->peer_msix_good) {
+					callout_reset(&ntb->peer_msix_work, 0,
+					    intel_ntb_exchange_msix, ntb);
+					return (false);
+				}
+			} else {
+				ntb->peer_msix_good = false;
+				ntb->peer_msix_done = false;
+			}
+		}
+	}
+	return (true);
+}
+
+static inline enum ntb_speed
+intel_ntb_link_sta_speed(struct ntb_softc *ntb)
+{
+
+	if (!link_is_up(ntb))
+		return (NTB_SPEED_NONE);
+	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
+}
+
+static inline enum ntb_width
+intel_ntb_link_sta_width(struct ntb_softc *ntb)
+{
+
+	if (!link_is_up(ntb))
+		return (NTB_WIDTH_NONE);
+	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
+}
+
+SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
+    "Driver state, statistics, and HW registers");
+
+#define NTB_REGSZ_MASK	(3ul << 30)
+#define NTB_REG_64	(1ul << 30)
+#define NTB_REG_32	(2ul << 30)
+#define NTB_REG_16	(3ul << 30)
+#define NTB_REG_8	(0ul << 30)
+
+#define NTB_DB_READ	(1ul << 29)
+#define NTB_PCI_REG	(1ul << 28)
+#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
+
+static void
+intel_ntb_sysctl_init(struct ntb_softc *ntb)
+{
+	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
+	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid *tree, *tmptree;
+
+	ctx = device_get_sysctl_ctx(ntb->device);
+	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
+
+	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
+	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0,
+	    sysctl_handle_link_status_human, "A",
+	    "Link status (human readable)");
+	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
+	    CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status,
+	    "IU", "Link status (1=active, 0=inactive)");
+	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
+	    CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin,
+	    "IU", "Set/get interface status (1=UP, 0=DOWN)");
+
+	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
+	    CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers");
+	tree_par = SYSCTL_CHILDREN(tree);
+
+	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
+	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
+	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
+	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
+	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
+	    &ntb->ppd, 0, "Raw PPD register (cached)");
+
+	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
+#ifdef notyet
+		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
+		    &ntb->b2b_mw_idx, 0,
+		    "Index of the MW used for B2B remote register access");
+#endif
+		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
+		    CTLFLAG_RD, &ntb->b2b_off,
+		    "If non-zero, offset of B2B register region in shared MW");
+	}
+
+	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
+	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
+	    "Features/errata of this NTB device");
+
+	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
+	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
+	    "NTB CTL register (cached)");
+	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
+	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
+	    "LNK STA register (cached)");
+
+#ifdef notyet
+	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
+	    &ntb->mw_count, 0, "MW count");
+	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
+	    &ntb->spad_count, 0, "Scratchpad count");
+	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
+	    &ntb->db_count, 0, "Doorbell count");
+	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
+	    &ntb->db_vec_count, 0, "Doorbell vector count");
+	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
+	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
+#endif
+
+	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
+	    &ntb->db_valid_mask, "Doorbell valid mask");
+	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
+	    &ntb->db_link_mask, "Doorbell link mask");
+	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
+	    &ntb->db_mask, "Doorbell mask (cached)");
+
+	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
+	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
+	regpar = SYSCTL_CHILDREN(tmptree);
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
+	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
+	    "NTB Control register");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
+	    0x19c, sysctl_handle_register, "IU",
+	    "NTB Link Capabilities");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
+	    0x1a0, sysctl_handle_register, "IU",
+	    "NTB Link Control register");
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
+	    sysctl_handle_register, "QU", "Doorbell mask register");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
+	    sysctl_handle_register, "QU", "Doorbell register");
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
+	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
+		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
+		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
+	} else {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
+		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
+	}
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
+	    sysctl_handle_register, "QU", "Incoming LMT23 register");
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
+		    sysctl_handle_register, "IU", "Incoming LMT4 register");
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
+		    sysctl_handle_register, "IU", "Incoming LMT5 register");
+	} else {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
+		    sysctl_handle_register, "QU", "Incoming LMT45 register");
+	}
+
+	if (ntb->type == NTB_ATOM)
+		return;
+
+	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
+	    CTLFLAG_RD, NULL, "Xeon HW statistics");
+	statpar = SYSCTL_CHILDREN(tmptree);
+	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
+	    sysctl_handle_register, "SU", "Upstream Memory Miss");
+
+	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
+	    CTLFLAG_RD, NULL, "Xeon HW errors");
+	errpar = SYSCTL_CHILDREN(tmptree);
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
+	    sysctl_handle_register, "CU", "PPD");
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
+	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
+	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
+	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
+	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
+	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
+	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
+	    sysctl_handle_register, "SU", "DEVSTS");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
+	    sysctl_handle_register, "SU", "LNKSTS");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
+	    sysctl_handle_register, "SU", "SLNKSTS");
+
+	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
+	    sysctl_handle_register, "IU", "UNCERRSTS");
+	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
+	    sysctl_handle_register, "IU", "CORERRSTS");
+
+	if (ntb->conn_type != NTB_CONN_B2B)
+		return;
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
+	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
+		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
+		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
+	} else {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
+		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
+	}
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
+	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
+		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
+		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
+	} else {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
+		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
+	}
+
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
+	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
+	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
+	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
+	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
+		    sysctl_handle_register, "IU",
+		    "Secondary BAR4 base register");
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
+		    sysctl_handle_register, "IU",
+		    "Secondary BAR5 base register");
+	} else {
+		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
+		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
+		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
+		    sysctl_handle_register, "QU",
+		    "Secondary BAR45 base register");
+	}
+}
+
+static int
+sysctl_handle_features(SYSCTL_HANDLER_ARGS)
+{
+	struct ntb_softc *ntb = arg1;
+	struct sbuf sb;
+	int error;
+
+	sbuf_new_for_sysctl(&sb, NULL, 256, req);
+
+	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
+	error = sbuf_finish(&sb);
+	sbuf_delete(&sb);
+
+	if (error || !req->newptr)
+		return (error);
+	return (EINVAL);
+}
+
+static int
+sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
+{
+	struct ntb_softc *ntb = arg1;
+	unsigned old, new;
+	int error;
+
+	old = intel_ntb_link_enabled(ntb->device);
+
+	error = SYSCTL_OUT(req, &old, sizeof(old));
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	error = SYSCTL_IN(req, &new, sizeof(new));
+	if (error != 0)
+		return (error);
+
+	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
+	    (new != 0)? "en" : "dis");
+
+	if (new != 0)
+		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	else
+		error = intel_ntb_link_disable(ntb->device);
+	return (error);
+}
+
+static int
+sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
+{
+	struct ntb_softc *ntb = arg1;
+	struct sbuf sb;
+	enum ntb_speed speed;
+	enum ntb_width width;
+	int error;
+
+	sbuf_new_for_sysctl(&sb, NULL, 32, req);
+
+	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
+		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
+		    (unsigned)speed, (unsigned)width);
+	else
+		sbuf_printf(&sb, "down");
+
+	error = sbuf_finish(&sb);
+	sbuf_delete(&sb);
+
+	if (error || !req->newptr)
+		return (error);
+	return (EINVAL);
+}
+
+static int
+sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
+{
+	struct ntb_softc *ntb = arg1;
+	unsigned res;
+	int error;
+
+	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
+
+	error = SYSCTL_OUT(req, &res, sizeof(res));
+	if (error || !req->newptr)
+		return (error);
+	return (EINVAL);
+}
+
+static int
+sysctl_handle_register(SYSCTL_HANDLER_ARGS)
+{
+	struct ntb_softc *ntb;
+	const void *outp;
+	uintptr_t sz;
+	uint64_t umv;
+	char be[sizeof(umv)];
+	size_t outsz;
+	uint32_t reg;
+	bool db, pci;
+	int error;
+
+	ntb = arg1;
+	reg = arg2 & ~NTB_REGFLAGS_MASK;
+	sz = arg2 & NTB_REGSZ_MASK;
+	db = (arg2 & NTB_DB_READ) != 0;
+	pci = (arg2 & NTB_PCI_REG) != 0;
+
+	KASSERT(!(db && pci), ("bogus"));
+
+	if (db) {
+		KASSERT(sz == NTB_REG_64, ("bogus"));
+		umv = db_ioread(ntb, reg);
+		outsz = sizeof(uint64_t);
+	} else {
+		switch (sz) {
+		case NTB_REG_64:
+			if (pci)
+				umv = pci_read_config(ntb->device, reg, 8);
+			else
+				umv = intel_ntb_reg_read(8, reg);
+			outsz = sizeof(uint64_t);
+			break;
+		case NTB_REG_32:
+			if (pci)
+				umv = pci_read_config(ntb->device, reg, 4);
+			else
+				umv = intel_ntb_reg_read(4, reg);
+			outsz = sizeof(uint32_t);
+			break;
+		case NTB_REG_16:
+			if (pci)
+				umv = pci_read_config(ntb->device, reg, 2);
+			else
+				umv = intel_ntb_reg_read(2, reg);
+			outsz = sizeof(uint16_t);
+			break;
+		case NTB_REG_8:
+			if (pci)
+				umv = pci_read_config(ntb->device, reg, 1);
+			else
+				umv = intel_ntb_reg_read(1, reg);
+			outsz = sizeof(uint8_t);
+			break;
+		default:
+			panic("bogus");
+			break;
+		}
+	}
+
+	/* Encode bigendian so that sysctl -x is legible. */
+	be64enc(be, umv);
+	outp = ((char *)be) + sizeof(umv) - outsz;
+
+	error = SYSCTL_OUT(req, outp, outsz);
+	if (error || !req->newptr)
+		return (error);
+	return (EINVAL);
+}
+
+static unsigned
+intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
+{
+
+	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
+	    uidx >= ntb->b2b_mw_idx) ||
+	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
+		uidx++;
+	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
+	    uidx >= ntb->b2b_mw_idx) &&
+	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
+		uidx++;
+	return (uidx);
+}
+
+#ifndef EARLY_AP_STARTUP
+static int msix_ready;
+
+static void
+intel_ntb_msix_ready(void *arg __unused)
+{
+
+	msix_ready = 1;
+}
+SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
+    intel_ntb_msix_ready, NULL);
+#endif
+
+static void
+intel_ntb_exchange_msix(void *ctx)
+{
+	struct ntb_softc *ntb;
+	uint32_t val;
+	unsigned i;
+
+	ntb = ctx;
+
+	if (ntb->peer_msix_good)
+		goto msix_good;
+	if (ntb->peer_msix_done)
+		goto msix_done;
+
+#ifndef EARLY_AP_STARTUP
+	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
+	if (!msix_ready)
+		goto reschedule;
+#endif
+
+	intel_ntb_get_msix_info(ntb);
+	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
+		    ntb->msix_data[i].nmd_data);
+		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
+		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
+	}
+	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
+
+	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
+	if (val != NTB_MSIX_VER_GUARD)
+		goto reschedule;
+
+	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
+		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
+		ntb->peer_msix_data[i].nmd_data = val;
+		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
+		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
+		ntb->peer_msix_data[i].nmd_ofs = val;
+	}
+
+	ntb->peer_msix_done = true;
+
+msix_done:
+	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
+	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
+	if (val != NTB_MSIX_RECEIVED)
+		goto reschedule;
+
+	intel_ntb_spad_clear(ntb->device);
+	ntb->peer_msix_good = true;
+	/* Give peer time to see our NTB_MSIX_RECEIVED. */
+	goto reschedule;
+
+msix_good:
+	intel_ntb_poll_link(ntb);
+	ntb_link_event(ntb->device);
+	return;
+
+reschedule:
+	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
+	if (_xeon_link_is_up(ntb)) {
+		callout_reset(&ntb->peer_msix_work,
+		    hz * (ntb->peer_msix_good ? 2 : 1) / 100,
+		    intel_ntb_exchange_msix, ntb);
+	} else
+		intel_ntb_spad_clear(ntb->device);
+}
+
+/*
+ * Public API to the rest of the OS
+ */
+
+static uint8_t
+intel_ntb_spad_count(device_t dev)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	return (ntb->spad_count);
+}
+
+static uint8_t
+intel_ntb_mw_count(device_t dev)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	uint8_t res;
+
+	res = ntb->mw_count;
+	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
+		res--;
+	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
+		res--;
+	return (res);
+}
+
+static int
+intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	if (idx >= ntb->spad_count)
+		return (EINVAL);
+
+	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
+
+	return (0);
+}
+
+/*
+ * Zeros the local scratchpad.
+ */
+static void
+intel_ntb_spad_clear(device_t dev)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	unsigned i;
+
+	for (i = 0; i < ntb->spad_count; i++)
+		intel_ntb_spad_write(dev, i, 0);
+}
+
+static int
+intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	if (idx >= ntb->spad_count)
+		return (EINVAL);
+
+	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
+
+	return (0);
+}
+
+static int
+intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	if (idx >= ntb->spad_count)
+		return (EINVAL);
+
+	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
+		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
+	else
+		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
+
+	return (0);
+}
+
+static int
+intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	if (idx >= ntb->spad_count)
+		return (EINVAL);
+
+	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
+		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
+	else
+		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
+
+	return (0);
+}
+
+static int
+intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
+    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
+    bus_addr_t *plimit)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	struct ntb_pci_bar_info *bar;
+	bus_addr_t limit;
+	size_t bar_b2b_off;
+	enum ntb_bar bar_num;
+
+	if (mw_idx >= intel_ntb_mw_count(dev))
+		return (EINVAL);
+	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
+
+	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
+	bar = &ntb->bar_info[bar_num];
+	bar_b2b_off = 0;
+	if (mw_idx == ntb->b2b_mw_idx) {
+		KASSERT(ntb->b2b_off != 0,
+		    ("user shouldn't get non-shared b2b mw"));
+		bar_b2b_off = ntb->b2b_off;
+	}
+
+	if (bar_is_64bit(ntb, bar_num))
+		limit = BUS_SPACE_MAXADDR;
+	else
+		limit = BUS_SPACE_MAXADDR_32BIT;
+
+	if (base != NULL)
+		*base = bar->pbase + bar_b2b_off;
+	if (vbase != NULL)
+		*vbase = bar->vbase + bar_b2b_off;
+	if (size != NULL)
+		*size = bar->size - bar_b2b_off;
+	if (align != NULL)
+		*align = bar->size;
+	if (align_size != NULL)
+		*align_size = 1;
+	if (plimit != NULL)
+		*plimit = limit;
+	return (0);
+}
+
+static int
+intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	struct ntb_pci_bar_info *bar;
+	uint64_t base, limit, reg_val;
+	size_t bar_size, mw_size;
+	uint32_t base_reg, xlat_reg, limit_reg;
+	enum ntb_bar bar_num;
+
+	if (idx >= intel_ntb_mw_count(dev))
+		return (EINVAL);
+	idx = intel_ntb_user_mw_to_idx(ntb, idx);
+
+	bar_num = intel_ntb_mw_to_bar(ntb, idx);
+	bar = &ntb->bar_info[bar_num];
+
+	bar_size = bar->size;
+	if (idx == ntb->b2b_mw_idx)
+		mw_size = bar_size - ntb->b2b_off;
+	else
+		mw_size = bar_size;
+
+	/* Hardware requires that addr is aligned to bar size */
+	if ((addr & (bar_size - 1)) != 0)
+		return (EINVAL);
+
+	if (size > mw_size)
+		return (EINVAL);
+
+	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
+
+	limit = 0;
+	if (bar_is_64bit(ntb, bar_num)) {
+		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
+
+		if (limit_reg != 0 && size != mw_size)
+			limit = base + size;
+
+		/* Set and verify translation address */
+		intel_ntb_reg_write(8, xlat_reg, addr);
+		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
+		if (reg_val != addr) {
+			intel_ntb_reg_write(8, xlat_reg, 0);
+			return (EIO);
+		}
+
+		/* Set and verify the limit */
+		intel_ntb_reg_write(8, limit_reg, limit);
+		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
+		if (reg_val != limit) {
+			intel_ntb_reg_write(8, limit_reg, base);
+			intel_ntb_reg_write(8, xlat_reg, 0);
+			return (EIO);
+		}
+	} else {
+		/* Configure 32-bit (split) BAR MW */
+
+		if ((addr & UINT32_MAX) != addr)
+			return (ERANGE);
+		if (((addr + size) & UINT32_MAX) != (addr + size))
+			return (ERANGE);
+
+		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
+
+		if (limit_reg != 0 && size != mw_size)
+			limit = base + size;
+
+		/* Set and verify translation address */
+		intel_ntb_reg_write(4, xlat_reg, addr);
+		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
+		if (reg_val != addr) {
+			intel_ntb_reg_write(4, xlat_reg, 0);
+			return (EIO);
+		}
+
+		/* Set and verify the limit */
+		intel_ntb_reg_write(4, limit_reg, limit);
+		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
+		if (reg_val != limit) {
+			intel_ntb_reg_write(4, limit_reg, base);
+			intel_ntb_reg_write(4, xlat_reg, 0);
+			return (EIO);
+		}
+	}
+	return (0);
+}
+
+static int
+intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
+{
+
+	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
+}
+
+static int
+intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	struct ntb_pci_bar_info *bar;
+
+	if (idx >= intel_ntb_mw_count(dev))
+		return (EINVAL);
+	idx = intel_ntb_user_mw_to_idx(ntb, idx);
+
+	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
+	*mode = bar->map_mode;
+	return (0);
+}
+
+static int
+intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	if (idx >= intel_ntb_mw_count(dev))
+		return (EINVAL);
+
+	idx = intel_ntb_user_mw_to_idx(ntb, idx);
+	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
+}
+
+static int
+intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
+{
+	struct ntb_pci_bar_info *bar;
+	int rc;
+
+	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
+	if (bar->map_mode == mode)
+		return (0);
+
+	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
+	if (rc == 0)
+		bar->map_mode = mode;
+
+	return (rc);
+}
+
+static void
+intel_ntb_peer_db_set(device_t dev, uint64_t bit)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		struct ntb_pci_bar_info *lapic;
+		unsigned i;
+
+		lapic = ntb->peer_lapic_bar;
+
+		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+			if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
+				bus_space_write_4(lapic->pci_bus_tag,
+				    lapic->pci_bus_handle,
+				    ntb->peer_msix_data[i].nmd_ofs,
+				    ntb->peer_msix_data[i].nmd_data);
+		}
+		return;
+	}
+
+	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
+		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
+		return;
+	}
+
+	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
+}
+
+static int
+intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+	struct ntb_pci_bar_info *bar;
+	uint64_t regoff;
+
+	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
+
+	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
+		bar = &ntb->bar_info[NTB_CONFIG_BAR];
+		regoff = ntb->peer_reg->db_bell;
+	} else {
+		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
+		    ("invalid b2b idx"));
+
+		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
+		regoff = XEON_PDOORBELL_OFFSET;
+	}
+	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
+
+	/* HACK: Specific to current x86 bus implementation. */
+	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
+	*db_size = ntb->reg->db_size;
+	return (0);
+}
+
+static uint64_t
+intel_ntb_db_valid_mask(device_t dev)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	return (ntb->db_valid_mask);
+}
+
+static int
+intel_ntb_db_vector_count(device_t dev)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	return (ntb->db_vec_count);
+}
+
+static uint64_t
+intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	if (vector > ntb->db_vec_count)
+		return (0);
+	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
+}
+
+static bool
+intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	if (speed != NULL)
+		*speed = intel_ntb_link_sta_speed(ntb);
+	if (width != NULL)
+		*width = intel_ntb_link_sta_width(ntb);
+	return (link_is_up(ntb));
+}
+
+static void
+save_bar_parameters(struct ntb_pci_bar_info *bar)
+{
+
+	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
+	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
+	bar->pbase = rman_get_start(bar->pci_resource);
+	bar->size = rman_get_size(bar->pci_resource);
+	bar->vbase = rman_get_virtual(bar->pci_resource);
+}
+
+static device_method_t ntb_intel_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		intel_ntb_probe),
+	DEVMETHOD(device_attach,	intel_ntb_attach),
+	DEVMETHOD(device_detach,	intel_ntb_detach),
+	/* NTB interface */
+	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
+	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
+	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
+	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
+	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
+	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
+	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
+	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
+	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
+	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
+	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
+	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
+	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
+	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
+	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
+	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
+	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
+	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
+	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
+	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
+	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
+	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
+	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
+	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
+	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
+	DEVMETHOD_END
+};
+
+static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
+    sizeof(struct ntb_softc));
+DRIVER_MODULE(ntb_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
+MODULE_DEPEND(ntb_intel, ntb, 1, 1, 1);
+MODULE_VERSION(ntb_intel, 1);


Property changes on: trunk/sys/dev/ntb/ntb_hw/ntb_hw.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ntb/ntb_hw/ntb_regs.h
===================================================================
--- trunk/sys/dev/ntb/ntb_hw/ntb_regs.h	                        (rev 0)
+++ trunk/sys/dev/ntb/ntb_hw/ntb_regs.h	2018-05-27 23:30:17 UTC (rev 10090)
@@ -0,0 +1,169 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav at FreeBSD.org>
+ * Copyright (C) 2013 Intel Corporation
+ * Copyright (C) 2015 EMC Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/ntb/ntb_hw/ntb_regs.h 304380 2016-08-18 10:39:00Z mav $
+ */
+
+#ifndef _NTB_REGS_H_
+#define _NTB_REGS_H_
+
+#define NTB_LINK_STATUS_ACTIVE	0x2000
+#define NTB_LINK_SPEED_MASK	0x000f
+#define NTB_LINK_WIDTH_MASK	0x03f0
+#define NTB_LNK_STA_WIDTH(sta)	(((sta) & NTB_LINK_WIDTH_MASK) >> 4)
+
+#define XEON_SNB_MW_COUNT	2
+#define XEON_HSX_SPLIT_MW_COUNT	3
+/* Reserve the uppermost bit for link interrupt */
+#define XEON_DB_COUNT		15
+#define XEON_DB_TOTAL_SHIFT	16
+#define XEON_DB_LINK		15
+#define XEON_DB_MSIX_VECTOR_COUNT	4
+#define XEON_DB_MSIX_VECTOR_SHIFT	5
+#define XEON_DB_LINK_BIT	(1 << XEON_DB_LINK)
+#define XEON_NONLINK_DB_MSIX_BITS	3
+
+#define XEON_SPCICMD_OFFSET	0x0504
+#define XEON_DEVCTRL_OFFSET	0x0598
+#define XEON_DEVSTS_OFFSET	0x059a
+#define XEON_LINK_STATUS_OFFSET	0x01a2
+#define XEON_SLINK_STATUS_OFFSET	0x05a2
+
+#define XEON_PBAR2LMT_OFFSET	0x0000
+#define XEON_PBAR4LMT_OFFSET	0x0008
+#define XEON_PBAR5LMT_OFFSET	0x000c
+#define XEON_PBAR2XLAT_OFFSET	0x0010
+#define XEON_PBAR4XLAT_OFFSET	0x0018
+#define XEON_PBAR5XLAT_OFFSET	0x001c
+#define XEON_SBAR2LMT_OFFSET	0x0020
+#define XEON_SBAR4LMT_OFFSET	0x0028
+#define XEON_SBAR5LMT_OFFSET	0x002c
+#define XEON_SBAR2XLAT_OFFSET	0x0030
+#define XEON_SBAR4XLAT_OFFSET	0x0038
+#define XEON_SBAR5XLAT_OFFSET	0x003c
+#define XEON_SBAR0BASE_OFFSET	0x0040
+#define XEON_SBAR2BASE_OFFSET	0x0048
+#define XEON_SBAR4BASE_OFFSET	0x0050
+#define XEON_SBAR5BASE_OFFSET	0x0054
+#define XEON_NTBCNTL_OFFSET	0x0058
+#define XEON_SBDF_OFFSET	0x005c
+#define XEON_PDOORBELL_OFFSET	0x0060
+#define XEON_PDBMSK_OFFSET	0x0062
+#define XEON_SDOORBELL_OFFSET	0x0064
+#define XEON_SDBMSK_OFFSET	0x0066
+#define XEON_USMEMMISS_OFFSET	0x0070
+#define XEON_SPAD_OFFSET	0x0080
+#define XEON_SPAD_COUNT		16
+#define XEON_SPADSEMA4_OFFSET	0x00c0
+#define XEON_WCCNTRL_OFFSET	0x00e0
+#define XEON_UNCERRSTS_OFFSET	0x014c
+#define XEON_CORERRSTS_OFFSET	0x0158
+#define XEON_B2B_SPAD_OFFSET	0x0100
+#define XEON_B2B_DOORBELL_OFFSET	0x0140
+#define XEON_B2B_XLAT_OFFSETL	0x0144
+#define XEON_B2B_XLAT_OFFSETU	0x0148
+
+#define ATOM_MW_COUNT		2
+#define ATOM_DB_COUNT		34
+#define ATOM_DB_MSIX_VECTOR_COUNT	34
+#define ATOM_DB_MSIX_VECTOR_SHIFT	1
+
+#define ATOM_SPCICMD_OFFSET	0xb004
+#define ATOM_MBAR23_OFFSET	0xb018
+#define ATOM_MBAR45_OFFSET	0xb020
+#define ATOM_DEVCTRL_OFFSET	0xb048
+#define ATOM_LINK_STATUS_OFFSET	0xb052
+#define ATOM_ERRCORSTS_OFFSET	0xb110
+
+#define ATOM_SBAR2XLAT_OFFSET	0x0008
+#define ATOM_SBAR4XLAT_OFFSET	0x0010
+#define ATOM_PDOORBELL_OFFSET	0x0020
+#define ATOM_PDBMSK_OFFSET	0x0028
+#define ATOM_NTBCNTL_OFFSET	0x0060
+#define ATOM_EBDF_OFFSET		0x0064
+#define ATOM_SPAD_OFFSET		0x0080
+#define ATOM_SPAD_COUNT		16
+#define ATOM_SPADSEMA_OFFSET	0x00c0
+#define ATOM_STKYSPAD_OFFSET	0x00c4
+#define ATOM_PBAR2XLAT_OFFSET	0x8008
+#define ATOM_PBAR4XLAT_OFFSET	0x8010
+#define ATOM_B2B_DOORBELL_OFFSET	0x8020
+#define ATOM_B2B_SPAD_OFFSET	0x8080
+#define ATOM_B2B_SPADSEMA_OFFSET	0x80c0
+#define ATOM_B2B_STKYSPAD_OFFSET	0x80c4
+
+#define ATOM_MODPHY_PCSREG4	0x1c004
+#define ATOM_MODPHY_PCSREG6	0x1c006
+
+#define ATOM_IP_BASE		0xc000
+#define ATOM_DESKEWSTS_OFFSET	(ATOM_IP_BASE + 0x3024)
+#define	ATOM_LTSSMERRSTS0_OFFSET (ATOM_IP_BASE + 0x3180)
+#define ATOM_LTSSMSTATEJMP_OFFSET	(ATOM_IP_BASE + 0x3040)
+#define ATOM_IBSTERRRCRVSTS0_OFFSET	(ATOM_IP_BASE + 0x3324)
+
+#define ATOM_DESKEWSTS_DBERR		(1 << 15)
+#define ATOM_LTSSMERRSTS0_UNEXPECTEDEI	(1 << 20)
+#define ATOM_LTSSMSTATEJMP_FORCEDETECT	(1 << 2)
+#define ATOM_IBIST_ERR_OFLOW		0x7fff7fff
+
+#define NTB_CNTL_CFG_LOCK		(1 << 0)
+#define NTB_CNTL_LINK_DISABLE		(1 << 1)
+#define NTB_CNTL_S2P_BAR23_SNOOP	(1 << 2)
+#define NTB_CNTL_P2S_BAR23_SNOOP	(1 << 4)
+#define NTB_CNTL_S2P_BAR4_SNOOP		(1 << 6)
+#define NTB_CNTL_P2S_BAR4_SNOOP		(1 << 8)
+#define NTB_CNTL_S2P_BAR5_SNOOP		(1 << 12)
+#define NTB_CNTL_P2S_BAR5_SNOOP		(1 << 14)
+#define ATOM_CNTL_LINK_DOWN		(1 << 16)
+
+#define XEON_PBAR23SZ_OFFSET	0x00d0
+#define XEON_PBAR45SZ_OFFSET	0x00d1
+#define XEON_PBAR4SZ_OFFSET	0x00d1
+#define XEON_PBAR5SZ_OFFSET	0x00d5
+#define XEON_SBAR23SZ_OFFSET	0x00d2
+#define XEON_SBAR4SZ_OFFSET	0x00d3
+#define XEON_SBAR5SZ_OFFSET	0x00d6
+#define NTB_PPD_OFFSET		0x00d4
+#define XEON_PPD_CONN_TYPE	0x0003
+#define XEON_PPD_DEV_TYPE	0x0010
+#define XEON_PPD_SPLIT_BAR	0x0040
+#define ATOM_PPD_INIT_LINK	0x0008
+#define ATOM_PPD_CONN_TYPE	0x0300
+#define ATOM_PPD_DEV_TYPE	0x1000
+
+/* All addresses are in low 32-bit space so 32-bit BARs can function */
+#define XEON_B2B_BAR0_ADDR	0x1000000000000000ull
+#define XEON_B2B_BAR2_ADDR64	0x2000000000000000ull
+#define XEON_B2B_BAR4_ADDR64	0x4000000000000000ull
+#define XEON_B2B_BAR4_ADDR32	0x20000000ull
+#define XEON_B2B_BAR5_ADDR32	0x40000000ull
+
+/* The peer ntb secondary config space is 32KB fixed size */
+#define XEON_B2B_MIN_SIZE		0x8000
+
+#endif /* _NTB_REGS_H_ */


Property changes on: trunk/sys/dev/ntb/ntb_hw/ntb_regs.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ntb/ntb_if.m
===================================================================
--- trunk/sys/dev/ntb/ntb_if.m	                        (rev 0)
+++ trunk/sys/dev/ntb/ntb_if.m	2018-05-27 23:30:17 UTC (rev 10090)
@@ -0,0 +1,211 @@
+/* $MidnightBSD$ */
+#-
+# Copyright (c) 2016 Alexander Motin <mav at FreeBSD.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD: stable/10/sys/dev/ntb/ntb_if.m 304404 2016-08-18 10:59:12Z mav $
+#
+
+#include <sys/bus.h>
+#include <machine/bus.h>
+
+INTERFACE ntb;
+
+HEADER {
+	enum ntb_speed {
+		NTB_SPEED_AUTO = -1,
+		NTB_SPEED_NONE = 0,
+		NTB_SPEED_GEN1 = 1,
+		NTB_SPEED_GEN2 = 2,
+		NTB_SPEED_GEN3 = 3,
+	};
+
+	enum ntb_width {
+		NTB_WIDTH_AUTO = -1,
+		NTB_WIDTH_NONE = 0,
+		NTB_WIDTH_1 = 1,
+		NTB_WIDTH_2 = 2,
+		NTB_WIDTH_4 = 4,
+		NTB_WIDTH_8 = 8,
+		NTB_WIDTH_12 = 12,
+		NTB_WIDTH_16 = 16,
+		NTB_WIDTH_32 = 32,
+	};
+
+	typedef void (*ntb_db_callback)(void *data, uint32_t vector);
+	typedef void (*ntb_event_callback)(void *data);
+	struct ntb_ctx_ops {
+		ntb_event_callback	link_event;
+		ntb_db_callback		db_event;
+	};
+};
+
+METHOD bool link_is_up {
+	device_t	 ntb;
+	enum ntb_speed	*speed;
+	enum ntb_width	*width;
+};
+
+METHOD int link_enable {
+	device_t	 ntb;
+	enum ntb_speed	 speed;
+	enum ntb_width	 width;
+};
+
+METHOD int link_disable {
+	device_t	 ntb;
+};
+
+METHOD bool link_enabled {
+	device_t	 ntb;
+};
+
+METHOD int set_ctx {
+	device_t	 ntb;
+	void		*ctx;
+	const struct ntb_ctx_ops *ctx_ops;
+};
+
+METHOD void * get_ctx {
+	device_t	 ntb;
+	const struct ntb_ctx_ops **ctx_ops;
+};
+
+METHOD void clear_ctx {
+	device_t	 ntb;
+};
+
+METHOD uint8_t mw_count {
+	device_t	 ntb;
+};
+
+METHOD int mw_get_range {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+	vm_paddr_t	*base;
+	caddr_t		*vbase;
+	size_t		*size;
+	size_t		*align;
+	size_t		*align_size;
+	bus_addr_t	*plimit;
+};
+
+METHOD int mw_set_trans {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+	bus_addr_t	 addr;
+	size_t		 size;
+};
+
+METHOD int mw_clear_trans {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+};
+
+METHOD int mw_get_wc {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+	vm_memattr_t	*mode;
+};
+
+METHOD int mw_set_wc {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+	vm_memattr_t	 mode;
+};
+
+METHOD uint8_t spad_count {
+	device_t	 ntb;
+};
+
+METHOD void spad_clear {
+	device_t	 ntb;
+};
+
+METHOD int spad_write {
+	device_t	 ntb;
+	unsigned int	 idx;
+	uint32_t	 val;
+};
+
+METHOD int spad_read {
+	device_t	 ntb;
+	unsigned int	 idx;
+	uint32_t	 *val;
+};
+
+METHOD int peer_spad_write {
+	device_t	 ntb;
+	unsigned int	 idx;
+	uint32_t	 val;
+};
+
+METHOD int peer_spad_read {
+	device_t	 ntb;
+	unsigned int	 idx;
+	uint32_t	*val;
+};
+
+METHOD uint64_t db_valid_mask {
+	device_t	 ntb;
+};
+
+METHOD int db_vector_count {
+	device_t	 ntb;
+};
+
+METHOD uint64_t db_vector_mask {
+	device_t	 ntb;
+	uint32_t	 vector;
+};
+
+METHOD int peer_db_addr {
+	device_t	 ntb;
+	bus_addr_t	*db_addr;
+	vm_size_t	*db_size;
+};
+
+METHOD void db_clear {
+	device_t	 ntb;
+	uint64_t	 bits;
+};
+
+METHOD void db_clear_mask {
+	device_t	 ntb;
+	uint64_t	 bits;
+};
+
+METHOD uint64_t db_read {
+	device_t	 ntb;
+};
+
+METHOD void db_set_mask {
+	device_t	 ntb;
+	uint64_t	 bits;
+};
+
+METHOD void peer_db_set {
+	device_t	 ntb;
+	uint64_t	 bits;
+};


Property changes on: trunk/sys/dev/ntb/ntb_if.m
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: trunk/sys/dev/ntb/ntb_transport.c
===================================================================
--- trunk/sys/dev/ntb/ntb_transport.c	                        (rev 0)
+++ trunk/sys/dev/ntb/ntb_transport.c	2018-05-27 23:30:17 UTC (rev 10090)
@@ -0,0 +1,1522 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav at FreeBSD.org>
+ * Copyright (C) 2013 Intel Corporation
+ * Copyright (C) 2015 EMC Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a transport for sending and receiving messages by
+ * writing to remote memory window(s) provided by underlying NTB device.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/ntb/ntb_transport.c 304407 2016-08-18 11:02:01Z mav $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+
+#include "ntb.h"
+#include "ntb_transport.h"
+
+#define KTR_NTB KTR_SPARE3
+
+#define NTB_TRANSPORT_VERSION	4
+
+static SYSCTL_NODE(_hw, OID_AUTO, ntb_transport, CTLFLAG_RW, 0, "ntb_transport");
+
+static unsigned g_ntb_transport_debug_level;
+TUNABLE_INT("hw.ntb_transport.debug_level", &g_ntb_transport_debug_level);
+SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, debug_level, CTLFLAG_RWTUN,
+    &g_ntb_transport_debug_level, 0,
+    "ntb_transport log level -- higher is more verbose");
+#define ntb_printf(lvl, ...) do {			\
+	if ((lvl) <= g_ntb_transport_debug_level) {	\
+		printf(__VA_ARGS__);			\
+	}						\
+} while (0)
+
+static unsigned transport_mtu = 0x10000;
+
+static uint64_t max_mw_size;
+TUNABLE_QUAD("hw.ntb_transport.max_mw_size", &max_mw_size);
+SYSCTL_UQUAD(_hw_ntb_transport, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0,
+    "If enabled (non-zero), limit the size of large memory windows. "
+    "Both sides of the NTB MUST set the same value here.");
+
+static unsigned enable_xeon_watchdog;
+TUNABLE_INT("hw.ntb_transport.enable_xeon_watchdog", &enable_xeon_watchdog);
+SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
+    &enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
+    "keep a watchdog from tearing down the NTB link");
+
+STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
+
+typedef uint32_t ntb_q_idx_t;
+
+struct ntb_queue_entry {
+	/* ntb_queue list reference */
+	STAILQ_ENTRY(ntb_queue_entry) entry;
+
+	/* info on data to be transferred */
+	void		*cb_data;
+	void		*buf;
+	uint32_t	len;
+	uint32_t	flags;
+
+	struct ntb_transport_qp		*qp;
+	struct ntb_payload_header	*x_hdr;
+	ntb_q_idx_t	index;
+};
+
+struct ntb_rx_info {
+	ntb_q_idx_t	entry;
+};
+
+struct ntb_transport_qp {
+	struct ntb_transport_ctx	*transport;
+	device_t		 dev;
+
+	void			*cb_data;
+
+	bool			client_ready;
+	volatile bool		link_is_up;
+	uint8_t			qp_num;	/* Only 64 QPs are allowed.  0-63 */
+
+	struct ntb_rx_info	*rx_info;
+	struct ntb_rx_info	*remote_rx_info;
+
+	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+	    void *data, int len);
+	struct ntb_queue_list	tx_free_q;
+	struct mtx		ntb_tx_free_q_lock;
+	caddr_t			tx_mw;
+	bus_addr_t		tx_mw_phys;
+	ntb_q_idx_t		tx_index;
+	ntb_q_idx_t		tx_max_entry;
+	uint64_t		tx_max_frame;
+
+	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+	    void *data, int len);
+	struct ntb_queue_list	rx_post_q;
+	struct ntb_queue_list	rx_pend_q;
+	/* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
+	struct mtx		ntb_rx_q_lock;
+	struct task		rxc_db_work;
+	struct taskqueue	*rxc_tq;
+	caddr_t			rx_buff;
+	ntb_q_idx_t		rx_index;
+	ntb_q_idx_t		rx_max_entry;
+	uint64_t		rx_max_frame;
+
+	void (*event_handler)(void *data, enum ntb_link_event status);
+	struct callout		link_work;
+	struct callout		rx_full;
+
+	uint64_t		last_rx_no_buf;
+
+	/* Stats */
+	uint64_t		rx_bytes;
+	uint64_t		rx_pkts;
+	uint64_t		rx_ring_empty;
+	uint64_t		rx_err_no_buf;
+	uint64_t		rx_err_oflow;
+	uint64_t		rx_err_ver;
+	uint64_t		tx_bytes;
+	uint64_t		tx_pkts;
+	uint64_t		tx_ring_full;
+	uint64_t		tx_err_no_buf;
+
+	struct mtx		tx_lock;
+};
+
+struct ntb_transport_mw {
+	vm_paddr_t	phys_addr;
+	size_t		phys_size;
+	size_t		xlat_align;
+	size_t		xlat_align_size;
+	bus_addr_t	addr_limit;
+	/* Tx buff is off vbase / phys_addr */
+	caddr_t		vbase;
+	size_t		xlat_size;
+	size_t		buff_size;
+	/* Rx buff is off virt_addr / dma_addr */
+	caddr_t		virt_addr;
+	bus_addr_t	dma_addr;
+};
+
+struct ntb_transport_child {
+	device_t	dev;
+	int		qpoff;
+	int		qpcnt;
+	struct ntb_transport_child *next;
+};
+
+struct ntb_transport_ctx {
+	device_t		 dev;
+	struct ntb_transport_child *child;
+	struct ntb_transport_mw	*mw_vec;
+	struct ntb_transport_qp	*qp_vec;
+	unsigned		mw_count;
+	unsigned		qp_count;
+	uint64_t		qp_bitmap;
+	volatile bool		link_is_up;
+	struct callout		link_work;
+	struct callout		link_watchdog;
+	struct task		link_cleanup;
+};
+
+enum {
+	NTBT_DESC_DONE_FLAG = 1 << 0,
+	NTBT_LINK_DOWN_FLAG = 1 << 1,
+};
+
+struct ntb_payload_header {
+	ntb_q_idx_t ver;
+	uint32_t len;
+	uint32_t flags;
+};
+
+enum {
+	/*
+	 * The order of this enum is part of the remote protocol.  Do not
+	 * reorder without bumping protocol version (and it's probably best
+	 * to keep the protocol in lock-step with the Linux NTB driver.
+	 */
+	NTBT_VERSION = 0,
+	NTBT_QP_LINKS,
+	NTBT_NUM_QPS,
+	NTBT_NUM_MWS,
+	/*
+	 * N.B.: transport_link_work assumes MW1 enums = MW0 + 2.
+	 */
+	NTBT_MW0_SZ_HIGH,
+	NTBT_MW0_SZ_LOW,
+	NTBT_MW1_SZ_HIGH,
+	NTBT_MW1_SZ_LOW,
+
+	/*
+	 * Some NTB-using hardware have a watchdog to work around NTB hangs; if
+	 * a register or doorbell isn't written every few seconds, the link is
+	 * torn down.  Write an otherwise unused register every few seconds to
+	 * work around this watchdog.
+	 */
+	NTBT_WATCHDOG_SPAD = 15
+};
+
+#define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
+#define NTB_QP_DEF_NUM_ENTRIES	100
+#define NTB_LINK_DOWN_TIMEOUT	10
+
+static int ntb_transport_probe(device_t dev);
+static int ntb_transport_attach(device_t dev);
+static int ntb_transport_detach(device_t dev);
+static void ntb_transport_init_queue(struct ntb_transport_ctx *nt,
+    unsigned int qp_num);
+static int ntb_process_tx(struct ntb_transport_qp *qp,
+    struct ntb_queue_entry *entry);
+static void ntb_transport_rxc_db(void *arg, int pending);
+static int ntb_process_rxc(struct ntb_transport_qp *qp);
+static void ntb_memcpy_rx(struct ntb_transport_qp *qp,
+    struct ntb_queue_entry *entry, void *offset);
+static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp,
+    void *data);
+static void ntb_complete_rxc(struct ntb_transport_qp *qp);
+static void ntb_transport_doorbell_callback(void *data, uint32_t vector);
+static void ntb_transport_event_callback(void *data);
+static void ntb_transport_link_work(void *arg);
+static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size);
+static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw);
+static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
+    unsigned int qp_num);
+static void ntb_qp_link_work(void *arg);
+static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt);
+static void ntb_transport_link_cleanup_work(void *, int);
+static void ntb_qp_link_down(struct ntb_transport_qp *qp);
+static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp);
+static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp);
+static void ntb_send_link_down(struct ntb_transport_qp *qp);
+static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
+    struct ntb_queue_list *list);
+static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock,
+    struct ntb_queue_list *list);
+static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
+    struct ntb_queue_list *from, struct ntb_queue_list *to);
+static void xeon_link_watchdog_hb(void *);
+
+static const struct ntb_ctx_ops ntb_transport_ops = {
+	.link_event = ntb_transport_event_callback,
+	.db_event = ntb_transport_doorbell_callback,
+};
+
+MALLOC_DEFINE(M_NTB_T, "ntb_transport", "ntb transport driver");
+
+static inline void
+iowrite32(uint32_t val, void *addr)
+{
+
+	bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr,
+	    val);
+}
+
+/* Transport Init and teardown */
+
+static void
+xeon_link_watchdog_hb(void *arg)
+{
+	struct ntb_transport_ctx *nt;
+
+	nt = arg;
+	ntb_spad_write(nt->dev, NTBT_WATCHDOG_SPAD, 0);
+	callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
+}
+
+static int
+ntb_transport_probe(device_t dev)
+{
+
+	device_set_desc(dev, "NTB Transport");
+	return (0);
+}
+
+static int
+ntb_transport_attach(device_t dev)
+{
+	struct ntb_transport_ctx *nt = device_get_softc(dev);
+	struct ntb_transport_child **cpp = &nt->child;
+	struct ntb_transport_child *nc;
+	struct ntb_transport_mw *mw;
+	uint64_t db_bitmap;
+	int rc, i, db_count, spad_count, qp, qpu, qpo, qpt;
+	char cfg[128] = "";
+	char buf[32];
+	char *n, *np, *c, *name;
+
+	nt->dev = dev;
+	nt->mw_count = ntb_mw_count(dev);
+	spad_count = ntb_spad_count(dev);
+	db_bitmap = ntb_db_valid_mask(dev);
+	db_count = flsll(db_bitmap);
+	KASSERT(db_bitmap == (1 << db_count) - 1,
+	    ("Doorbells are not sequential (%jx).\n", db_bitmap));
+
+	device_printf(dev, "%d memory windows, %d scratchpads, "
+	    "%d doorbells\n", nt->mw_count, spad_count, db_count);
+
+	if (nt->mw_count == 0) {
+		device_printf(dev, "At least 1 memory window required.\n");
+		return (ENXIO);
+	}
+	if (spad_count < 6) {
+		device_printf(dev, "At least 6 scratchpads required.\n");
+		return (ENXIO);
+	}
+	if (spad_count < 4 + 2 * nt->mw_count) {
+		nt->mw_count = (spad_count - 4) / 2;
+		device_printf(dev, "Scratchpads enough only for %d "
+		    "memory windows.\n", nt->mw_count);
+	}
+	if (db_bitmap == 0) {
+		device_printf(dev, "At least one doorbell required.\n");
+		return (ENXIO);
+	}
+
+	nt->mw_vec = malloc(nt->mw_count * sizeof(*nt->mw_vec), M_NTB_T,
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < nt->mw_count; i++) {
+		mw = &nt->mw_vec[i];
+
+		rc = ntb_mw_get_range(dev, i, &mw->phys_addr, &mw->vbase,
+		    &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size,
+		    &mw->addr_limit);
+		if (rc != 0)
+			goto err;
+
+		mw->buff_size = 0;
+		mw->xlat_size = 0;
+		mw->virt_addr = NULL;
+		mw->dma_addr = 0;
+
+		rc = ntb_mw_set_wc(dev, i, VM_MEMATTR_WRITE_COMBINING);
+		if (rc)
+			ntb_printf(0, "Unable to set mw%d caching\n", i);
+	}
+
+	qpu = 0;
+	qpo = imin(db_count, nt->mw_count);
+	qpt = db_count;
+
+	snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev),
+	    device_get_unit(dev));
+	TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg));
+	n = cfg;
+	i = 0;
+	while ((c = strsep(&n, ",")) != NULL) {
+		np = c;
+		name = strsep(&np, ":");
+		if (name != NULL && name[0] == 0)
+			name = NULL;
+		qp = (np && np[0] != 0) ? strtol(np, NULL, 10) : qpo - qpu;
+		if (qp <= 0)
+			qp = 1;
+
+		if (qp > qpt - qpu) {
+			device_printf(dev, "Not enough resources for config\n");
+			break;
+		}
+
+		nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO);
+		nc->qpoff = qpu;
+		nc->qpcnt = qp;
+		nc->dev = device_add_child(dev, name, -1);
+		if (nc->dev == NULL) {
+			device_printf(dev, "Can not add child.\n");
+			break;
+		}
+		device_set_ivars(nc->dev, nc);
+		*cpp = nc;
+		cpp = &nc->next;
+
+		if (bootverbose) {
+			device_printf(dev, "%d \"%s\": queues %d",
+			    i, name, qpu);
+			if (qp > 1)
+				printf("-%d", qpu + qp - 1);
+			printf("\n");
+		}
+
+		qpu += qp;
+		i++;
+	}
+	nt->qp_count = qpu;
+
+	nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_T,
+	    M_WAITOK | M_ZERO);
+
+	for (i = 0; i < nt->qp_count; i++)
+		ntb_transport_init_queue(nt, i);
+
+	callout_init(&nt->link_work, 0);
+	callout_init(&nt->link_watchdog, 0);
+	TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
+
+	rc = ntb_set_ctx(dev, nt, &ntb_transport_ops);
+	if (rc != 0)
+		goto err;
+
+	nt->link_is_up = false;
+	ntb_link_enable(dev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+
+	if (enable_xeon_watchdog != 0)
+		callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
+
+	bus_generic_attach(dev);
+	return (0);
+
+err:
+	free(nt->qp_vec, M_NTB_T);
+	free(nt->mw_vec, M_NTB_T);
+	return (rc);
+}
+
+static int
+ntb_transport_detach(device_t dev)
+{
+	struct ntb_transport_ctx *nt = device_get_softc(dev);
+	struct ntb_transport_child **cpp = &nt->child;
+	struct ntb_transport_child *nc;
+	int error = 0, i;
+
+	while ((nc = *cpp) != NULL) {
+		*cpp = (*cpp)->next;
+		error = device_delete_child(dev, nc->dev);
+		if (error)
+			break;
+		free(nc, M_DEVBUF);
+	}
+	KASSERT(nt->qp_bitmap == 0,
+	    ("Some queues not freed on detach (%jx)", nt->qp_bitmap));
+
+	ntb_transport_link_cleanup(nt);
+	taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
+	callout_drain(&nt->link_work);
+	callout_drain(&nt->link_watchdog);
+
+	ntb_link_disable(dev);
+	ntb_clear_ctx(dev);
+
+	for (i = 0; i < nt->mw_count; i++)
+		ntb_free_mw(nt, i);
+
+	free(nt->qp_vec, M_NTB_T);
+	free(nt->mw_vec, M_NTB_T);
+	return (0);
+}
+
+int
+ntb_transport_queue_count(device_t dev)
+{
+	struct ntb_transport_child *nc = device_get_ivars(dev);
+
+	return (nc->qpcnt);
+}
+
+static void
+ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num)
+{
+	struct ntb_transport_mw *mw;
+	struct ntb_transport_qp *qp;
+	vm_paddr_t mw_base;
+	uint64_t mw_size, qp_offset;
+	size_t tx_size;
+	unsigned num_qps_mw, mw_num, mw_count;
+
+	mw_count = nt->mw_count;
+	mw_num = QP_TO_MW(nt, qp_num);
+	mw = &nt->mw_vec[mw_num];
+
+	qp = &nt->qp_vec[qp_num];
+	qp->qp_num = qp_num;
+	qp->transport = nt;
+	qp->dev = nt->dev;
+	qp->client_ready = false;
+	qp->event_handler = NULL;
+	ntb_qp_link_down_reset(qp);
+
+	if (mw_num < nt->qp_count % mw_count)
+		num_qps_mw = nt->qp_count / mw_count + 1;
+	else
+		num_qps_mw = nt->qp_count / mw_count;
+
+	mw_base = mw->phys_addr;
+	mw_size = mw->phys_size;
+
+	tx_size = mw_size / num_qps_mw;
+	qp_offset = tx_size * (qp_num / mw_count);
+
+	qp->tx_mw = mw->vbase + qp_offset;
+	KASSERT(qp->tx_mw != NULL, ("uh oh?"));
+
+	/* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */
+	qp->tx_mw_phys = mw_base + qp_offset;
+	KASSERT(qp->tx_mw_phys != 0, ("uh oh?"));
+
+	tx_size -= sizeof(struct ntb_rx_info);
+	qp->rx_info = (void *)(qp->tx_mw + tx_size);
+
+	/* Due to house-keeping, there must be at least 2 buffs */
+	qp->tx_max_frame = qmin(transport_mtu, tx_size / 2);
+	qp->tx_max_entry = tx_size / qp->tx_max_frame;
+
+	callout_init(&qp->link_work, 0);
+	callout_init(&qp->rx_full, 1);
+
+	mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN);
+	mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN);
+	mtx_init(&qp->tx_lock, "ntb transport tx", NULL, MTX_DEF);
+	TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp);
+	qp->rxc_tq = taskqueue_create("ntbt_rx", M_WAITOK,
+	    taskqueue_thread_enqueue, &qp->rxc_tq);
+	taskqueue_start_threads(&qp->rxc_tq, 1, PI_NET, "%s rx%d",
+	    device_get_nameunit(nt->dev), qp_num);
+
+	STAILQ_INIT(&qp->rx_post_q);
+	STAILQ_INIT(&qp->rx_pend_q);
+	STAILQ_INIT(&qp->tx_free_q);
+}
+
+void
+ntb_transport_free_queue(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport_ctx *nt = qp->transport;
+	struct ntb_queue_entry *entry;
+
+	callout_drain(&qp->link_work);
+
+	ntb_db_set_mask(qp->dev, 1ull << qp->qp_num);
+	taskqueue_drain_all(qp->rxc_tq);
+	taskqueue_free(qp->rxc_tq);
+
+	qp->cb_data = NULL;
+	qp->rx_handler = NULL;
+	qp->tx_handler = NULL;
+	qp->event_handler = NULL;
+
+	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q)))
+		free(entry, M_NTB_T);
+
+	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q)))
+		free(entry, M_NTB_T);
+
+	while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
+		free(entry, M_NTB_T);
+
+	nt->qp_bitmap &= ~(1 << qp->qp_num);
+}
+
+/**
+ * ntb_transport_create_queue - Create a new NTB transport layer queue
+ * @rx_handler: receive callback function
+ * @tx_handler: transmit callback function
+ * @event_handler: event callback function
+ *
+ * Create a new NTB transport layer queue and provide the queue with a callback
+ * routine for both transmit and receive.  The receive callback routine will be
+ * used to pass up data when the transport has received it on the queue.   The
+ * transmit callback routine will be called when the transport has completed the
+ * transmission of the data on the queue and the data is ready to be freed.
+ *
+ * RETURNS: pointer to newly created ntb_queue, NULL on error.
+ */
+struct ntb_transport_qp *
+ntb_transport_create_queue(device_t dev, int q,
+    const struct ntb_queue_handlers *handlers, void *data)
+{
+	struct ntb_transport_child *nc = device_get_ivars(dev);
+	struct ntb_transport_ctx *nt = device_get_softc(device_get_parent(dev));
+	struct ntb_queue_entry *entry;
+	struct ntb_transport_qp *qp;
+	int i;
+
+	if (q < 0 || q >= nc->qpcnt)
+		return (NULL);
+
+	qp = &nt->qp_vec[nc->qpoff + q];
+	nt->qp_bitmap |= (1 << qp->qp_num);
+	qp->cb_data = data;
+	qp->rx_handler = handlers->rx_handler;
+	qp->tx_handler = handlers->tx_handler;
+	qp->event_handler = handlers->event_handler;
+
+	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+		entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
+		entry->cb_data = data;
+		entry->buf = NULL;
+		entry->len = transport_mtu;
+		entry->qp = qp;
+		ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q);
+	}
+
+	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+		entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
+		entry->qp = qp;
+		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+	}
+
+	ntb_db_clear(dev, 1ull << qp->qp_num);
+	return (qp);
+}
+
+/**
+ * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
+ * @qp: NTB transport layer queue to be enabled
+ *
+ * Notify NTB transport layer of client readiness to use queue
+ */
+void
+ntb_transport_link_up(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport_ctx *nt = qp->transport;
+
+	qp->client_ready = true;
+
+	ntb_printf(2, "qp %d client ready\n", qp->qp_num);
+
+	if (nt->link_is_up)
+		callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+}
+
+
+
+/* Transport Tx */
+
+/**
+ * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
+ * @qp: NTB transport layer queue the entry is to be enqueued on
+ * @cb: per buffer pointer for callback function to use
+ * @data: pointer to data buffer that will be sent
+ * @len: length of the data buffer
+ *
+ * Enqueue a new transmit buffer onto the transport queue from which a NTB
+ * payload will be transmitted.  This assumes that a lock is being held to
+ * serialize access to the qp.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int
+ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+    unsigned int len)
+{
+	struct ntb_queue_entry *entry;
+	int rc;
+
+	if (!qp->link_is_up || len == 0) {
+		CTR0(KTR_NTB, "TX: link not up");
+		return (EINVAL);
+	}
+
+	entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+	if (entry == NULL) {
+		CTR0(KTR_NTB, "TX: could not get entry from tx_free_q");
+		qp->tx_err_no_buf++;
+		return (EBUSY);
+	}
+	CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry);
+
+	entry->cb_data = cb;
+	entry->buf = data;
+	entry->len = len;
+	entry->flags = 0;
+
+	mtx_lock(&qp->tx_lock);
+	rc = ntb_process_tx(qp, entry);
+	mtx_unlock(&qp->tx_lock);
+	if (rc != 0) {
+		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+		CTR1(KTR_NTB,
+		    "TX: process_tx failed. Returning entry %p to tx_free_q",
+		    entry);
+	}
+	return (rc);
+}
+
+static void
+ntb_tx_copy_callback(void *data)
+{
+	struct ntb_queue_entry *entry = data;
+	struct ntb_transport_qp *qp = entry->qp;
+	struct ntb_payload_header *hdr = entry->x_hdr;
+
+	iowrite32(entry->flags | NTBT_DESC_DONE_FLAG, &hdr->flags);
+	CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr);
+
+	ntb_peer_db_set(qp->dev, 1ull << qp->qp_num);
+
+	/*
+	 * The entry length can only be zero if the packet is intended to be a
+	 * "link down" or similar.  Since no payload is being sent in these
+	 * cases, there is nothing to add to the completion queue.
+	 */
+	if (entry->len > 0) {
+		qp->tx_bytes += entry->len;
+
+		if (qp->tx_handler)
+			qp->tx_handler(qp, qp->cb_data, entry->buf,
+			    entry->len);
+		else
+			m_freem(entry->buf);
+		entry->buf = NULL;
+	}
+
+	CTR3(KTR_NTB,
+	    "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning "
+	    "to tx_free_q", entry, hdr->ver, hdr->flags);
+	ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+}
+
+static void
+ntb_memcpy_tx(struct ntb_queue_entry *entry, void *offset)
+{
+
+	CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset);
+	if (entry->buf != NULL) {
+		m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset);
+
+		/*
+		 * Ensure that the data is fully copied before setting the
+		 * flags
+		 */
+		wmb();
+	}
+
+	ntb_tx_copy_callback(entry);
+}
+
+static void
+ntb_async_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
+{
+	struct ntb_payload_header *hdr;
+	void *offset;
+
+	offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
+	hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame -
+	    sizeof(struct ntb_payload_header));
+	entry->x_hdr = hdr;
+
+	iowrite32(entry->len, &hdr->len);
+	iowrite32(qp->tx_pkts, &hdr->ver);
+
+	ntb_memcpy_tx(entry, offset);
+}
+
+static int
+ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
+{
+
+	CTR3(KTR_NTB,
+	    "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u",
+	    qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry);
+	if (qp->tx_index == qp->remote_rx_info->entry) {
+		CTR0(KTR_NTB, "TX: ring full");
+		qp->tx_ring_full++;
+		return (EAGAIN);
+	}
+
+	if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
+		if (qp->tx_handler != NULL)
+			qp->tx_handler(qp, qp->cb_data, entry->buf,
+			    EIO);
+		else
+			m_freem(entry->buf);
+
+		entry->buf = NULL;
+		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+		CTR1(KTR_NTB,
+		    "TX: frame too big. returning entry %p to tx_free_q",
+		    entry);
+		return (0);
+	}
+	CTR2(KTR_NTB, "TX: copying entry %p to index %u", entry, qp->tx_index);
+	ntb_async_tx(qp, entry);
+
+	qp->tx_index++;
+	qp->tx_index %= qp->tx_max_entry;
+
+	qp->tx_pkts++;
+
+	return (0);
+}
+
+/* Transport Rx */
+static void
+ntb_transport_rxc_db(void *arg, int pending __unused)
+{
+	struct ntb_transport_qp *qp = arg;
+	int rc;
+
+	CTR0(KTR_NTB, "RX: transport_rx");
+again:
+	while ((rc = ntb_process_rxc(qp)) == 0)
+		;
+	CTR1(KTR_NTB, "RX: process_rxc returned %d", rc);
+
+	if ((ntb_db_read(qp->dev) & (1ull << qp->qp_num)) != 0) {
+		/* If db is set, clear it and check queue once more. */
+		ntb_db_clear(qp->dev, 1ull << qp->qp_num);
+		goto again;
+	}
+}
+
+static int
+ntb_process_rxc(struct ntb_transport_qp *qp)
+{
+	struct ntb_payload_header *hdr;
+	struct ntb_queue_entry *entry;
+	caddr_t offset;
+
+	offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
+	hdr = (void *)(offset + qp->rx_max_frame -
+	    sizeof(struct ntb_payload_header));
+
+	CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index);
+	if ((hdr->flags & NTBT_DESC_DONE_FLAG) == 0) {
+		CTR0(KTR_NTB, "RX: hdr not done");
+		qp->rx_ring_empty++;
+		return (EAGAIN);
+	}
+
+	if ((hdr->flags & NTBT_LINK_DOWN_FLAG) != 0) {
+		CTR0(KTR_NTB, "RX: link down");
+		ntb_qp_link_down(qp);
+		hdr->flags = 0;
+		return (EAGAIN);
+	}
+
+	if (hdr->ver != (uint32_t)qp->rx_pkts) {
+		CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). "
+		    "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts);
+		qp->rx_err_ver++;
+		return (EIO);
+	}
+
+	entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
+	if (entry == NULL) {
+		qp->rx_err_no_buf++;
+		CTR0(KTR_NTB, "RX: No entries in rx_pend_q");
+		return (EAGAIN);
+	}
+	callout_stop(&qp->rx_full);
+	CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry);
+
+	entry->x_hdr = hdr;
+	entry->index = qp->rx_index;
+
+	if (hdr->len > entry->len) {
+		CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju",
+		    (uintmax_t)hdr->len, (uintmax_t)entry->len);
+		qp->rx_err_oflow++;
+
+		entry->len = -EIO;
+		entry->flags |= NTBT_DESC_DONE_FLAG;
+
+		ntb_complete_rxc(qp);
+	} else {
+		qp->rx_bytes += hdr->len;
+		qp->rx_pkts++;
+
+		CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts);
+
+		entry->len = hdr->len;
+
+		ntb_memcpy_rx(qp, entry, offset);
+	}
+
+	qp->rx_index++;
+	qp->rx_index %= qp->rx_max_entry;
+	return (0);
+}
+
+static void
+ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
+    void *offset)
+{
+	struct ifnet *ifp = entry->cb_data;
+	unsigned int len = entry->len;
+
+	CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset);
+
+	entry->buf = (void *)m_devget(offset, len, 0, ifp, NULL);
+	if (entry->buf == NULL)
+		entry->len = -ENOMEM;
+
+	/* Ensure that the data is globally visible before clearing the flag */
+	wmb();
+
+	CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, entry->buf);
+	ntb_rx_copy_callback(qp, entry);
+}
+
+static inline void
+ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data)
+{
+	struct ntb_queue_entry *entry;
+
+	entry = data;
+	entry->flags |= NTBT_DESC_DONE_FLAG;
+	ntb_complete_rxc(qp);
+}
+
+static void
+ntb_complete_rxc(struct ntb_transport_qp *qp)
+{
+	struct ntb_queue_entry *entry;
+	struct mbuf *m;
+	unsigned len;
+
+	CTR0(KTR_NTB, "RX: rx_completion_task");
+
+	mtx_lock_spin(&qp->ntb_rx_q_lock);
+
+	while (!STAILQ_EMPTY(&qp->rx_post_q)) {
+		entry = STAILQ_FIRST(&qp->rx_post_q);
+		if ((entry->flags & NTBT_DESC_DONE_FLAG) == 0)
+			break;
+
+		entry->x_hdr->flags = 0;
+		iowrite32(entry->index, &qp->rx_info->entry);
+
+		STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry);
+
+		len = entry->len;
+		m = entry->buf;
+
+		/*
+		 * Re-initialize queue_entry for reuse; rx_handler takes
+		 * ownership of the mbuf.
+		 */
+		entry->buf = NULL;
+		entry->len = transport_mtu;
+		entry->cb_data = qp->cb_data;
+
+		STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry);
+
+		mtx_unlock_spin(&qp->ntb_rx_q_lock);
+
+		CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m);
+		if (qp->rx_handler != NULL && qp->client_ready)
+			qp->rx_handler(qp, qp->cb_data, m, len);
+		else
+			m_freem(m);
+
+		mtx_lock_spin(&qp->ntb_rx_q_lock);
+	}
+
+	mtx_unlock_spin(&qp->ntb_rx_q_lock);
+}
+
+static void
+ntb_transport_doorbell_callback(void *data, uint32_t vector)
+{
+	struct ntb_transport_ctx *nt = data;
+	struct ntb_transport_qp *qp;
+	uint64_t vec_mask;
+	unsigned qp_num;
+
+	vec_mask = ntb_db_vector_mask(nt->dev, vector);
+	vec_mask &= nt->qp_bitmap;
+	if ((vec_mask & (vec_mask - 1)) != 0)
+		vec_mask &= ntb_db_read(nt->dev);
+	while (vec_mask != 0) {
+		qp_num = ffsll(vec_mask) - 1;
+
+		qp = &nt->qp_vec[qp_num];
+		if (qp->link_is_up)
+			taskqueue_enqueue(qp->rxc_tq, &qp->rxc_db_work);
+
+		vec_mask &= ~(1ull << qp_num);
+	}
+}
+
+/* Link Event handler */
+static void
+ntb_transport_event_callback(void *data)
+{
+	struct ntb_transport_ctx *nt = data;
+
+	if (ntb_link_is_up(nt->dev, NULL, NULL)) {
+		ntb_printf(1, "HW link up\n");
+		callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
+	} else {
+		ntb_printf(1, "HW link down\n");
+		taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup);
+	}
+}
+
+/* Link bring up */
+static void
+ntb_transport_link_work(void *arg)
+{
+	struct ntb_transport_ctx *nt = arg;
+	device_t dev = nt->dev;
+	struct ntb_transport_qp *qp;
+	uint64_t val64, size;
+	uint32_t val;
+	unsigned i;
+	int rc;
+
+	/* send the local info, in the opposite order of the way we read it */
+	for (i = 0; i < nt->mw_count; i++) {
+		size = nt->mw_vec[i].phys_size;
+
+		if (max_mw_size != 0 && size > max_mw_size)
+			size = max_mw_size;
+
+		ntb_peer_spad_write(dev, NTBT_MW0_SZ_HIGH + (i * 2),
+		    size >> 32);
+		ntb_peer_spad_write(dev, NTBT_MW0_SZ_LOW + (i * 2), size);
+	}
+	ntb_peer_spad_write(dev, NTBT_NUM_MWS, nt->mw_count);
+	ntb_peer_spad_write(dev, NTBT_NUM_QPS, nt->qp_count);
+	ntb_peer_spad_write(dev, NTBT_QP_LINKS, 0);
+	ntb_peer_spad_write(dev, NTBT_VERSION, NTB_TRANSPORT_VERSION);
+
+	/* Query the remote side for its info */
+	val = 0;
+	ntb_spad_read(dev, NTBT_VERSION, &val);
+	if (val != NTB_TRANSPORT_VERSION)
+		goto out;
+
+	ntb_spad_read(dev, NTBT_NUM_QPS, &val);
+	if (val != nt->qp_count)
+		goto out;
+
+	ntb_spad_read(dev, NTBT_NUM_MWS, &val);
+	if (val != nt->mw_count)
+		goto out;
+
+	for (i = 0; i < nt->mw_count; i++) {
+		ntb_spad_read(dev, NTBT_MW0_SZ_HIGH + (i * 2), &val);
+		val64 = (uint64_t)val << 32;
+
+		ntb_spad_read(dev, NTBT_MW0_SZ_LOW + (i * 2), &val);
+		val64 |= val;
+
+		rc = ntb_set_mw(nt, i, val64);
+		if (rc != 0)
+			goto free_mws;
+	}
+
+	nt->link_is_up = true;
+	ntb_printf(1, "transport link up\n");
+
+	for (i = 0; i < nt->qp_count; i++) {
+		qp = &nt->qp_vec[i];
+
+		ntb_transport_setup_qp_mw(nt, i);
+
+		if (qp->client_ready)
+			callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+	}
+
+	return;
+
+free_mws:
+	for (i = 0; i < nt->mw_count; i++)
+		ntb_free_mw(nt, i);
+out:
+	if (ntb_link_is_up(dev, NULL, NULL))
+		callout_reset(&nt->link_work,
+		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt);
+}
+
+static int
+ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size)
+{
+	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+	size_t xlat_size, buff_size;
+	int rc;
+
+	if (size == 0)
+		return (EINVAL);
+
+	xlat_size = roundup(size, mw->xlat_align_size);
+	buff_size = xlat_size;
+
+	/* No need to re-setup */
+	if (mw->xlat_size == xlat_size)
+		return (0);
+
+	if (mw->buff_size != 0)
+		ntb_free_mw(nt, num_mw);
+
+	/* Alloc memory for receiving data.  Must be aligned */
+	mw->xlat_size = xlat_size;
+	mw->buff_size = buff_size;
+
+	mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_T, M_ZERO, 0,
+	    mw->addr_limit, mw->xlat_align, 0);
+	if (mw->virt_addr == NULL) {
+		ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n",
+		    mw->buff_size, mw->xlat_size);
+		mw->xlat_size = 0;
+		mw->buff_size = 0;
+		return (ENOMEM);
+	}
+	/* TODO: replace with bus_space_* functions */
+	mw->dma_addr = vtophys(mw->virt_addr);
+
+	/*
+	 * Ensure that the allocation from contigmalloc is aligned as
+	 * requested.  XXX: This may not be needed -- brought in for parity
+	 * with the Linux driver.
+	 */
+	if (mw->dma_addr % mw->xlat_align != 0) {
+		ntb_printf(0,
+		    "DMA memory 0x%jx not aligned to BAR size 0x%zx\n",
+		    (uintmax_t)mw->dma_addr, size);
+		ntb_free_mw(nt, num_mw);
+		return (ENOMEM);
+	}
+
+	/* Notify HW the memory location of the receive buffer */
+	rc = ntb_mw_set_trans(nt->dev, num_mw, mw->dma_addr, mw->xlat_size);
+	if (rc) {
+		ntb_printf(0, "Unable to set mw%d translation\n", num_mw);
+		ntb_free_mw(nt, num_mw);
+		return (rc);
+	}
+
+	return (0);
+}
+
+static void
+ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
+{
+	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+
+	if (mw->virt_addr == NULL)
+		return;
+
+	ntb_mw_clear_trans(nt->dev, num_mw);
+	contigfree(mw->virt_addr, mw->xlat_size, M_NTB_T);
+	mw->xlat_size = 0;
+	mw->buff_size = 0;
+	mw->virt_addr = NULL;
+}
+
+static int
+ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+	struct ntb_transport_mw *mw;
+	void *offset;
+	ntb_q_idx_t i;
+	size_t rx_size;
+	unsigned num_qps_mw, mw_num, mw_count;
+
+	mw_count = nt->mw_count;
+	mw_num = QP_TO_MW(nt, qp_num);
+	mw = &nt->mw_vec[mw_num];
+
+	if (mw->virt_addr == NULL)
+		return (ENOMEM);
+
+	if (mw_num < nt->qp_count % mw_count)
+		num_qps_mw = nt->qp_count / mw_count + 1;
+	else
+		num_qps_mw = nt->qp_count / mw_count;
+
+	rx_size = mw->xlat_size / num_qps_mw;
+	qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
+	rx_size -= sizeof(struct ntb_rx_info);
+
+	qp->remote_rx_info = (void*)(qp->rx_buff + rx_size);
+
+	/* Due to house-keeping, there must be at least 2 buffs */
+	qp->rx_max_frame = qmin(transport_mtu, rx_size / 2);
+	qp->rx_max_entry = rx_size / qp->rx_max_frame;
+	qp->rx_index = 0;
+
+	qp->remote_rx_info->entry = qp->rx_max_entry - 1;
+
+	/* Set up the hdr offsets with 0s */
+	for (i = 0; i < qp->rx_max_entry; i++) {
+		offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) -
+		    sizeof(struct ntb_payload_header));
+		memset(offset, 0, sizeof(struct ntb_payload_header));
+	}
+
+	qp->rx_pkts = 0;
+	qp->tx_pkts = 0;
+	qp->tx_index = 0;
+
+	return (0);
+}
+
+static void
+ntb_qp_link_work(void *arg)
+{
+	struct ntb_transport_qp *qp = arg;
+	device_t dev = qp->dev;
+	struct ntb_transport_ctx *nt = qp->transport;
+	int i;
+	uint32_t val;
+
+	/* Report queues that are up on our side */
+	for (i = 0, val = 0; i < nt->qp_count; i++) {
+		if (nt->qp_vec[i].client_ready)
+			val |= (1 << i);
+	}
+	ntb_peer_spad_write(dev, NTBT_QP_LINKS, val);
+
+	/* See if the remote side is up */
+	ntb_spad_read(dev, NTBT_QP_LINKS, &val);
+	if ((val & (1ull << qp->qp_num)) != 0) {
+		ntb_printf(2, "qp %d link up\n", qp->qp_num);
+		qp->link_is_up = true;
+
+		if (qp->event_handler != NULL)
+			qp->event_handler(qp->cb_data, NTB_LINK_UP);
+
+		ntb_db_clear_mask(dev, 1ull << qp->qp_num);
+	} else if (nt->link_is_up)
+		callout_reset(&qp->link_work,
+		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
+}
+
+/* Link down event*/
+static void
+ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
+{
+	struct ntb_transport_qp *qp;
+	int i;
+
+	/* Pass along the info to any clients */
+	for (i = 0; i < nt->qp_count; i++) {
+		if ((nt->qp_bitmap & (1 << i)) != 0) {
+			qp = &nt->qp_vec[i];
+			ntb_qp_link_cleanup(qp);
+			callout_drain(&qp->link_work);
+		}
+	}
+
+	if (!nt->link_is_up)
+		callout_drain(&nt->link_work);
+
+	/*
+	 * The scratchpad registers keep the values if the remote side
+	 * goes down, blast them now to give them a sane value the next
+	 * time they are accessed
+	 */
+	ntb_spad_clear(nt->dev);
+}
+
+static void
+ntb_transport_link_cleanup_work(void *arg, int pending __unused)
+{
+
+	ntb_transport_link_cleanup(arg);
+}
+
+static void
+ntb_qp_link_down(struct ntb_transport_qp *qp)
+{
+
+	ntb_qp_link_cleanup(qp);
+}
+
+static void
+ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+{
+
+	qp->link_is_up = false;
+	ntb_db_set_mask(qp->dev, 1ull << qp->qp_num);
+
+	qp->tx_index = qp->rx_index = 0;
+	qp->tx_bytes = qp->rx_bytes = 0;
+	qp->tx_pkts = qp->rx_pkts = 0;
+
+	qp->rx_ring_empty = 0;
+	qp->tx_ring_full = 0;
+
+	qp->rx_err_no_buf = qp->tx_err_no_buf = 0;
+	qp->rx_err_oflow = qp->rx_err_ver = 0;
+}
+
+static void
+ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
+{
+
+	callout_drain(&qp->link_work);
+	ntb_qp_link_down_reset(qp);
+
+	if (qp->event_handler != NULL)
+		qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
+}
+
+/* Link commanded down */
+/**
+ * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
+ * @qp: NTB transport layer queue to be disabled
+ *
+ * Notify NTB transport layer of client's desire to no longer receive data on
+ * transport queue specified.  It is the client's responsibility to ensure all
+ * entries on queue are purged or otherwise handled appropriately.
+ */
+void
+ntb_transport_link_down(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport_ctx *nt = qp->transport;
+	int i;
+	uint32_t val;
+
+	qp->client_ready = false;
+	for (i = 0, val = 0; i < nt->qp_count; i++) {
+		if (nt->qp_vec[i].client_ready)
+			val |= (1 << i);
+	}
+	ntb_peer_spad_write(qp->dev, NTBT_QP_LINKS, val);
+
+	if (qp->link_is_up)
+		ntb_send_link_down(qp);
+	else
+		callout_drain(&qp->link_work);
+}
+
+/**
+ * ntb_transport_link_query - Query transport link state
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query connectivity to the remote system of the NTB transport queue
+ *
+ * RETURNS: true for link up or false for link down
+ */
+bool
+ntb_transport_link_query(struct ntb_transport_qp *qp)
+{
+
+	return (qp->link_is_up);
+}
+
+static void
+ntb_send_link_down(struct ntb_transport_qp *qp)
+{
+	struct ntb_queue_entry *entry;
+	int i, rc;
+
+	if (!qp->link_is_up)
+		return;
+
+	for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
+		entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+		if (entry != NULL)
+			break;
+		pause("NTB Wait for link down", hz / 10);
+	}
+
+	if (entry == NULL)
+		return;
+
+	entry->cb_data = NULL;
+	entry->buf = NULL;
+	entry->len = 0;
+	entry->flags = NTBT_LINK_DOWN_FLAG;
+
+	mtx_lock(&qp->tx_lock);
+	rc = ntb_process_tx(qp, entry);
+	mtx_unlock(&qp->tx_lock);
+	if (rc != 0)
+		printf("ntb: Failed to send link down\n");
+
+	ntb_qp_link_down_reset(qp);
+}
+
+
+/* List Management */
+
+static void
+ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
+    struct ntb_queue_list *list)
+{
+
+	mtx_lock_spin(lock);
+	STAILQ_INSERT_TAIL(list, entry, entry);
+	mtx_unlock_spin(lock);
+}
+
+static struct ntb_queue_entry *
+ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list)
+{
+	struct ntb_queue_entry *entry;
+
+	mtx_lock_spin(lock);
+	if (STAILQ_EMPTY(list)) {
+		entry = NULL;
+		goto out;
+	}
+	entry = STAILQ_FIRST(list);
+	STAILQ_REMOVE_HEAD(list, entry);
+out:
+	mtx_unlock_spin(lock);
+
+	return (entry);
+}
+
+static struct ntb_queue_entry *
+ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from,
+    struct ntb_queue_list *to)
+{
+	struct ntb_queue_entry *entry;
+
+	mtx_lock_spin(lock);
+	if (STAILQ_EMPTY(from)) {
+		entry = NULL;
+		goto out;
+	}
+	entry = STAILQ_FIRST(from);
+	STAILQ_REMOVE_HEAD(from, entry);
+	STAILQ_INSERT_TAIL(to, entry, entry);
+
+out:
+	mtx_unlock_spin(lock);
+	return (entry);
+}
+
+/**
+ * ntb_transport_qp_num - Query the qp number
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query qp number of the NTB transport queue
+ *
+ * RETURNS: a zero based number specifying the qp number
+ */
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp)
+{
+
+	return (qp->qp_num);
+}
+
+/**
+ * ntb_transport_max_size - Query the max payload size of a qp
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query the maximum payload size permissible on the given qp
+ *
+ * RETURNS: the max payload size of a qp
+ */
+unsigned int
+ntb_transport_max_size(struct ntb_transport_qp *qp)
+{
+
+	return (qp->tx_max_frame - sizeof(struct ntb_payload_header));
+}
+
+unsigned int
+ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
+{
+	unsigned int head = qp->tx_index;
+	unsigned int tail = qp->remote_rx_info->entry;
+
+	return (tail >= head ? tail - head : qp->tx_max_entry + tail - head);
+}
+
+static device_method_t ntb_transport_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,     ntb_transport_probe),
+	DEVMETHOD(device_attach,    ntb_transport_attach),
+	DEVMETHOD(device_detach,    ntb_transport_detach),
+	DEVMETHOD_END
+};
+
+devclass_t ntb_transport_devclass;
+static DEFINE_CLASS_0(ntb_transport, ntb_transport_driver,
+    ntb_transport_methods, sizeof(struct ntb_transport_ctx));
+DRIVER_MODULE(ntb_transport, ntb_hw, ntb_transport_driver,
+    ntb_transport_devclass, NULL, NULL);
+MODULE_DEPEND(ntb_transport, ntb, 1, 1, 1);
+MODULE_VERSION(ntb_transport, 1);


Property changes on: trunk/sys/dev/ntb/ntb_transport.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/ntb/ntb_transport.h
===================================================================
--- trunk/sys/dev/ntb/ntb_transport.h	                        (rev 0)
+++ trunk/sys/dev/ntb/ntb_transport.h	2018-05-27 23:30:17 UTC (rev 10090)
@@ -0,0 +1,62 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/ntb/ntb_transport.h 304405 2016-08-18 11:00:48Z mav $
+ */
+
+struct ntb_transport_qp;
+
+extern devclass_t ntb_transport_devclass;
+
+enum ntb_link_event {
+	NTB_LINK_DOWN = 0,
+	NTB_LINK_UP,
+};
+
+struct ntb_queue_handlers {
+	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+	    void *data, int len);
+	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+	    void *data, int len);
+	void (*event_handler)(void *data, enum ntb_link_event status);
+};
+
+int ntb_transport_queue_count(device_t dev);
+struct ntb_transport_qp *
+ntb_transport_create_queue(device_t dev, int q,
+    const struct ntb_queue_handlers *handlers, void *data);
+void ntb_transport_free_queue(struct ntb_transport_qp *qp);
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
+int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len);
+int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len);
+void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len);
+void ntb_transport_link_up(struct ntb_transport_qp *qp);
+void ntb_transport_link_down(struct ntb_transport_qp *qp);
+bool ntb_transport_link_query(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp);


Property changes on: trunk/sys/dev/ntb/ntb_transport.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property


More information about the Midnightbsd-cvs mailing list