[Midnightbsd-cvs] src [10092] trunk/sys/dev: sync with freebsd

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun May 27 19:32:52 EDT 2018


Revision: 10092
          http://svnweb.midnightbsd.org/src/?rev=10092
Author:   laffer1
Date:     2018-05-27 19:32:51 -0400 (Sun, 27 May 2018)
Log Message:
-----------
sync with freebsd

Modified Paths:
--------------
    trunk/sys/dev/mpt/mpilib/mpi.h
    trunk/sys/dev/mpt/mpilib/mpi_cnfg.h
    trunk/sys/dev/mpt/mpilib/mpi_fc.h
    trunk/sys/dev/mpt/mpilib/mpi_init.h
    trunk/sys/dev/mpt/mpilib/mpi_ioc.h
    trunk/sys/dev/mpt/mpilib/mpi_lan.h
    trunk/sys/dev/mpt/mpilib/mpi_log_fc.h
    trunk/sys/dev/mpt/mpilib/mpi_log_sas.h
    trunk/sys/dev/mpt/mpilib/mpi_raid.h
    trunk/sys/dev/mpt/mpilib/mpi_sas.h
    trunk/sys/dev/mpt/mpilib/mpi_targ.h
    trunk/sys/dev/mpt/mpilib/mpi_tool.h
    trunk/sys/dev/mpt/mpilib/mpi_type.h
    trunk/sys/dev/mpt/mpt.c
    trunk/sys/dev/mpt/mpt.h
    trunk/sys/dev/mpt/mpt_cam.c
    trunk/sys/dev/mpt/mpt_cam.h
    trunk/sys/dev/mpt/mpt_debug.c
    trunk/sys/dev/mpt/mpt_pci.c
    trunk/sys/dev/mpt/mpt_raid.c
    trunk/sys/dev/mpt/mpt_raid.h
    trunk/sys/dev/mpt/mpt_reg.h
    trunk/sys/dev/mpt/mpt_user.c
    trunk/sys/dev/mse/mse.c
    trunk/sys/dev/mse/mse_cbus.c
    trunk/sys/dev/mse/mse_isa.c
    trunk/sys/dev/mse/msevar.h
    trunk/sys/dev/mxge/eth_z8e.h
    trunk/sys/dev/mxge/ethp_z8e.h
    trunk/sys/dev/mxge/if_mxge.c
    trunk/sys/dev/mxge/if_mxge_var.h
    trunk/sys/dev/mxge/mcp_gen_header.h
    trunk/sys/dev/mxge/mxge_eth_z8e.c
    trunk/sys/dev/mxge/mxge_ethp_z8e.c
    trunk/sys/dev/mxge/mxge_mcp.h
    trunk/sys/dev/mxge/mxge_rss_eth_z8e.c
    trunk/sys/dev/mxge/mxge_rss_ethp_z8e.c
    trunk/sys/dev/mxge/rss_eth_z8e.h
    trunk/sys/dev/mxge/rss_ethp_z8e.h
    trunk/sys/dev/my/if_my.c
    trunk/sys/dev/my/if_myreg.h
    trunk/sys/dev/netmap/if_em_netmap.h
    trunk/sys/dev/netmap/if_igb_netmap.h
    trunk/sys/dev/netmap/if_lem_netmap.h
    trunk/sys/dev/netmap/if_re_netmap.h
    trunk/sys/dev/netmap/ixgbe_netmap.h
    trunk/sys/dev/netmap/netmap.c
    trunk/sys/dev/netmap/netmap_kern.h
    trunk/sys/dev/netmap/netmap_mem2.c
    trunk/sys/dev/null/null.c
    trunk/sys/dev/nvram2env/nvram2env.c

Added Paths:
-----------
    trunk/sys/dev/mrsas/
    trunk/sys/dev/mrsas/mrsas.c
    trunk/sys/dev/mrsas/mrsas.h
    trunk/sys/dev/mrsas/mrsas_cam.c
    trunk/sys/dev/mrsas/mrsas_fp.c
    trunk/sys/dev/mrsas/mrsas_ioctl.c
    trunk/sys/dev/mrsas/mrsas_ioctl.h
    trunk/sys/dev/mrsas/mrsas_linux.c
    trunk/sys/dev/nand/
    trunk/sys/dev/nand/nand.c
    trunk/sys/dev/nand/nand.h
    trunk/sys/dev/nand/nand_bbt.c
    trunk/sys/dev/nand/nand_cdev.c
    trunk/sys/dev/nand/nand_dev.h
    trunk/sys/dev/nand/nand_ecc_pos.h
    trunk/sys/dev/nand/nand_generic.c
    trunk/sys/dev/nand/nand_geom.c
    trunk/sys/dev/nand/nand_id.c
    trunk/sys/dev/nand/nand_if.m
    trunk/sys/dev/nand/nandbus.c
    trunk/sys/dev/nand/nandbus.h
    trunk/sys/dev/nand/nandbus_if.m
    trunk/sys/dev/nand/nandsim.c
    trunk/sys/dev/nand/nandsim.h
    trunk/sys/dev/nand/nandsim_chip.c
    trunk/sys/dev/nand/nandsim_chip.h
    trunk/sys/dev/nand/nandsim_ctrl.c
    trunk/sys/dev/nand/nandsim_log.c
    trunk/sys/dev/nand/nandsim_log.h
    trunk/sys/dev/nand/nandsim_swap.c
    trunk/sys/dev/nand/nandsim_swap.h
    trunk/sys/dev/nand/nfc_at91.c
    trunk/sys/dev/nand/nfc_at91.h
    trunk/sys/dev/nand/nfc_fsl.c
    trunk/sys/dev/nand/nfc_fsl.h
    trunk/sys/dev/nand/nfc_if.m
    trunk/sys/dev/nand/nfc_mv.c
    trunk/sys/dev/netmap/if_ixl_netmap.h
    trunk/sys/dev/netmap/if_vtnet_netmap.h
    trunk/sys/dev/netmap/netmap_freebsd.c
    trunk/sys/dev/netmap/netmap_generic.c
    trunk/sys/dev/netmap/netmap_mbq.c
    trunk/sys/dev/netmap/netmap_mbq.h
    trunk/sys/dev/netmap/netmap_mem2.h
    trunk/sys/dev/netmap/netmap_monitor.c
    trunk/sys/dev/netmap/netmap_offloadings.c
    trunk/sys/dev/netmap/netmap_pipe.c
    trunk/sys/dev/netmap/netmap_vale.c

Modified: trunk/sys/dev/mpt/mpilib/mpi.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_cnfg.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_cnfg.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_cnfg.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_cnfg.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_fc.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_fc.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_fc.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_fc.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_init.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_init.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_init.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_init.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_ioc.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_ioc.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_ioc.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_ioc.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_lan.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_lan.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_lan.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_lan.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_log_fc.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_log_fc.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_log_fc.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_log_fc.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.
@@ -35,7 +36,7 @@
  *                  in the IOCLogInfo field of a MPI Default Reply Message.
  *
  *  CREATION DATE:  6/02/2000
- *  ID:             $Id: mpi_log_fc.h,v 1.5 2013-01-05 20:21:17 laffer1 Exp $
+ *  ID:             $Id: fc_log.h,v 4.6 2001/07/26 14:41:33 sschremm Exp $
  */
 
 

Modified: trunk/sys/dev/mpt/mpilib/mpi_log_sas.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_log_sas.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_log_sas.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_log_sas.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_raid.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_raid.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_raid.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_raid.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2005, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_sas.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_sas.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_sas.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_sas.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_targ.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_targ.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_targ.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_targ.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_tool.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_tool.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_tool.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_tool.h 233425 2012-03-24 16:23:21Z marius $ */
 /*-
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.

Modified: trunk/sys/dev/mpt/mpilib/mpi_type.h
===================================================================
--- trunk/sys/dev/mpt/mpilib/mpi_type.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpilib/mpi_type.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpilib/mpi_type.h 233425 2012-03-24 16:23:21Z marius $ */
 /*
  * Copyright (c) 2000-2010, LSI Logic Corporation and its contributors.
  * All rights reserved.
@@ -84,7 +85,7 @@
 
 #else
 
-#if defined(unix) || defined(__arm) || defined(ALPHA) || defined(__PPC__) || defined(__ppc)
+#if defined(__unix__) || defined(__arm) || defined(ALPHA) || defined(__PPC__) || defined(__ppc)
 
     typedef signed   int   S32;
     typedef unsigned int   U32;

Modified: trunk/sys/dev/mpt/mpt.c
===================================================================
--- trunk/sys/dev/mpt/mpt.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Generic routines for LSI Fusion adapters.
  * FreeBSD Version.
@@ -96,7 +97,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mpt/mpt.c 315811 2017-03-23 06:37:23Z mav $");
 
 #include <dev/mpt/mpt.h>
 #include <dev/mpt/mpt_cam.h> /* XXX For static handler registration */
@@ -1320,18 +1321,21 @@
 	     mpt_req_state_t state, mpt_req_state_t mask,
 	     int sleep_ok, int time_ms)
 {
-	int   error;
 	int   timeout;
 	u_int saved_cnt;
+	sbintime_t sbt;
 
 	/*
-	 * timeout is in ms.  0 indicates infinite wait.
-	 * Convert to ticks or 500us units depending on
+	 * time_ms is in ms, 0 indicates infinite wait.
+	 * Convert to sbintime_t or 500us units depending on
 	 * our sleep mode.
 	 */
 	if (sleep_ok != 0) {
-		timeout = (time_ms * hz) / 1000;
+		sbt = SBT_1MS * time_ms;
+		/* Set timeout as well so final timeout check works. */
+		timeout = time_ms;
 	} else {
+		sbt = 0; /* Squelch bogus gcc warning. */
 		timeout = time_ms * 2;
 	}
 	req->state |= REQ_STATE_NEED_WAKEUP;
@@ -1339,8 +1343,8 @@
 	saved_cnt = mpt->reset_cnt;
 	while ((req->state & mask) != state && mpt->reset_cnt == saved_cnt) {
 		if (sleep_ok != 0) {
-			error = mpt_sleep(mpt, req, PUSER, "mptreq", timeout);
-			if (error == EWOULDBLOCK) {
+			if (mpt_sleep(mpt, req, PUSER, "mptreq", sbt) ==
+			    EWOULDBLOCK) {
 				timeout = 0;
 				break;
 			}
@@ -1420,7 +1424,7 @@
 
 	/* Send the command */
 	for (i = 0; i < len; i++) {
-		mpt_write(mpt, MPT_OFFSET_DOORBELL, htole32(*data32++));
+		mpt_write_stream(mpt, MPT_OFFSET_DOORBELL, *data32++);
 		if (mpt_wait_db_ack(mpt) != MPT_OK) {
 			mpt_prt(mpt,
 			    "mpt_send_handshake_cmd: timeout @ index %d\n", i);
@@ -1454,7 +1458,7 @@
 	*data16++ = le16toh(data & MPT_DB_DATA_MASK);
 	mpt_write(mpt, MPT_OFFSET_INTR_STATUS, 0);
 
-	/* Get Second Word */
+	/* Get second word */
 	if (mpt_wait_db_int(mpt) != MPT_OK) {
 		mpt_prt(mpt, "mpt_recv_handshake_cmd timeout2\n");
 		return ETIMEDOUT;
@@ -1478,18 +1482,13 @@
 	left = (hdr->MsgLength << 1) - 2;
 	reply_left =  reply_len - 2;
 	while (left--) {
-		u_int16_t datum;
-
 		if (mpt_wait_db_int(mpt) != MPT_OK) {
 			mpt_prt(mpt, "mpt_recv_handshake_cmd timeout3\n");
 			return ETIMEDOUT;
 		}
 		data = mpt_read(mpt, MPT_OFFSET_DOORBELL);
-		datum = le16toh(data & MPT_DB_DATA_MASK);
-
 		if (reply_left-- > 0)
-			*data16++ = datum;
-
+			*data16++ = le16toh(data & MPT_DB_DATA_MASK);
 		mpt_write(mpt, MPT_OFFSET_INTR_STATUS, 0);
 	}
 
@@ -2697,7 +2696,11 @@
 	 */
 	mpt->max_cam_seg_cnt = min(mpt->max_seg_cnt, (MAXPHYS / PAGE_SIZE) + 1);
 
+	/* XXX Lame Locking! */
+	MPT_UNLOCK(mpt);
 	error = mpt_dma_buf_alloc(mpt);
+	MPT_LOCK(mpt);
+
 	if (error != 0) {
 		mpt_prt(mpt, "mpt_dma_buf_alloc() failed!\n");
 		return (EIO);
@@ -2747,6 +2750,7 @@
 		 * retrieved, we are responsible for re-downloading
 		 * the firmware after any hard-reset.
 		 */
+		MPT_UNLOCK(mpt);
 		mpt->fw_image_size = mpt->ioc_facts.FWImageSize;
 		error = mpt_dma_tag_create(mpt, mpt->parent_dmat, 1, 0,
 		    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
@@ -2754,6 +2758,7 @@
 		    &mpt->fw_dmat);
 		if (error != 0) {
 			mpt_prt(mpt, "cannot create firmware dma tag\n");
+			MPT_LOCK(mpt);
 			return (ENOMEM);
 		}
 		error = bus_dmamem_alloc(mpt->fw_dmat,
@@ -2762,6 +2767,7 @@
 		if (error != 0) {
 			mpt_prt(mpt, "cannot allocate firmware memory\n");
 			bus_dma_tag_destroy(mpt->fw_dmat);
+			MPT_LOCK(mpt);
 			return (ENOMEM);
 		}
 		mi.mpt = mpt;
@@ -2770,6 +2776,7 @@
 		    mpt->fw_image, mpt->fw_image_size, mpt_map_rquest, &mi, 0);
 		mpt->fw_phys = mi.phys;
 
+		MPT_LOCK(mpt);
 		error = mpt_upload_fw(mpt);
 		if (error != 0) {
 			mpt_prt(mpt, "firmware upload failed.\n");

Modified: trunk/sys/dev/mpt/mpt.h
===================================================================
--- trunk/sys/dev/mpt/mpt.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpt.h 315828 2017-03-23 06:55:32Z mav $ */
 /*-
  * Generic defines for LSI '909 FC  adapters.
  * FreeBSD Version.
@@ -220,9 +221,6 @@
 #define bus_dmamap_sync_range(dma_tag, dmamap, offset, len, op)	\
 	bus_dmamap_sync(dma_tag, dmamap, op)
 
-#if __FreeBSD_version < 600000
-#define	bus_get_dma_tag(x)	NULL
-#endif
 #define mpt_dma_tag_create(mpt, parent_tag, alignment, boundary,	\
 			   lowaddr, highaddr, filter, filterarg,	\
 			   maxsize, nsegments, maxsegsz, flags,		\
@@ -239,35 +237,7 @@
 };
 
 void mpt_map_rquest(void *, bus_dma_segment_t *, int, int);
-/* **************************** NewBUS interrupt Crock ************************/
-#if __FreeBSD_version < 700031
-#define	mpt_setup_intr(d, i, f, U, if, ifa, hp)	\
-	bus_setup_intr(d, i, f, if, ifa, hp)
-#else
-#define	mpt_setup_intr	bus_setup_intr
-#endif
 
-/* **************************** NewBUS CAM Support ****************************/
-#if __FreeBSD_version < 700049
-#define mpt_xpt_bus_register(sim, parent, bus)	\
-	xpt_bus_register(sim, bus)
-#else
-#define mpt_xpt_bus_register	xpt_bus_register
-#endif
-
-/**************************** Kernel Thread Support ***************************/
-#if __FreeBSD_version > 800001
-#define mpt_kthread_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg) \
-	kproc_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg)
-#define	mpt_kthread_exit(status)	\
-	kproc_exit(status)
-#else
-#define mpt_kthread_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg) \
-	kthread_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg)
-#define	mpt_kthread_exit(status)	\
-	kthread_exit(status)
-#endif
-
 /********************************** Endianess *********************************/
 #define	MPT_2_HOST64(ptr, tag)	ptr->tag = le64toh(ptr->tag)
 #define	MPT_2_HOST32(ptr, tag)	ptr->tag = le32toh(ptr->tag)
@@ -360,17 +330,17 @@
 } cfgparms_t;
 
 /**************************** MPI Target State Info ***************************/
-
 typedef struct {
 	uint32_t reply_desc;	/* current reply descriptor */
-	uint32_t resid;		/* current data residual */
 	uint32_t bytes_xfered;	/* current relative offset */
+	int resid;		/* current data residual */
 	union ccb *ccb;		/* pointer to currently active ccb */
 	request_t *req;		/* pointer to currently active assist request */
 	uint32_t
 		is_local : 1,
 		nxfers	 : 31;
-	uint32_t tag_id;
+	uint32_t tag_id;	/* Our local tag. */
+	uint16_t itag;		/* Initiator tag. */
 	enum {
 		TGT_STATE_NIL,
 		TGT_STATE_LOADING,
@@ -644,8 +614,9 @@
 			unsigned int initiator_id;
 		} spi;
 		struct {
-			char wwnn[19];
-			char wwpn[19];
+			uint64_t wwnn;
+			uint64_t wwpn;
+			uint32_t portid;
 		} fc;
 	} scinfo;
 
@@ -671,7 +642,6 @@
 	/*
 	 * PCI Hardware info
 	 */
-	int			pci_msi_count;
 	struct resource *	pci_irq;	/* Interrupt map for chip */
 	void *			ih;		/* Interrupt handle */
 #if 0
@@ -754,9 +724,10 @@
 	uint16_t		sequence;	/* Sequence Number */
 	uint16_t		pad3;
 
-
+#if 0
 	/* Paired port in some dual adapters configurations */
 	struct mpt_softc *	mpt2;
+#endif
 
 	/* FW Image management */
 	uint32_t		fw_image_size;
@@ -802,10 +773,10 @@
 #define	MPT_UNLOCK(mpt)		mtx_unlock(&(mpt)->mpt_lock)
 #define	MPT_OWNED(mpt)		mtx_owned(&(mpt)->mpt_lock)
 #define	MPT_LOCK_ASSERT(mpt)	mtx_assert(&(mpt)->mpt_lock, MA_OWNED)
-#define mpt_sleep(mpt, ident, priority, wmesg, timo) \
-	msleep(ident, &(mpt)->mpt_lock, priority, wmesg, timo)
-#define mpt_req_timeout(req, ticks, func, arg) \
-	callout_reset(&(req)->callout, (ticks), (func), (arg))
+#define mpt_sleep(mpt, ident, priority, wmesg, sbt) \
+    msleep_sbt(ident, &(mpt)->mpt_lock, priority, wmesg, sbt, 0, 0)
+#define mpt_req_timeout(req, sbt, func, arg) \
+    callout_reset_sbt(&(req)->callout, (sbt), 0, (func), (arg), 0)
 #define mpt_req_untimeout(req, func, arg) \
 	callout_stop(&(req)->callout)
 #define mpt_callout_init(mpt, c) \
@@ -815,6 +786,7 @@
 
 /******************************* Register Access ******************************/
 static __inline void mpt_write(struct mpt_softc *, size_t, uint32_t);
+static __inline void mpt_write_stream(struct mpt_softc *, size_t, uint32_t);
 static __inline uint32_t mpt_read(struct mpt_softc *, int);
 static __inline void mpt_pio_write(struct mpt_softc *, size_t, uint32_t);
 static __inline uint32_t mpt_pio_read(struct mpt_softc *, int);
@@ -825,6 +797,12 @@
 	bus_space_write_4(mpt->pci_st, mpt->pci_sh, offset, val);
 }
 
+static __inline void
+mpt_write_stream(struct mpt_softc *mpt, size_t offset, uint32_t val)
+{
+	bus_space_write_stream_4(mpt->pci_st, mpt->pci_sh, offset, val);
+}
+
 static __inline uint32_t
 mpt_read(struct mpt_softc *mpt, int offset)
 {
@@ -849,6 +827,7 @@
 	KASSERT(mpt->pci_pio_reg != NULL, ("no PIO resource"));
 	return (bus_space_read_4(mpt->pci_pio_st, mpt->pci_pio_sh, offset));
 }
+
 /*********************** Reply Frame/Request Management ***********************/
 /* Max MPT Reply we are willing to accept (must be power of 2) */
 #define MPT_REPLY_SIZE   	256
@@ -972,23 +951,6 @@
 	__printflike(2, 3);
 
 /**************************** Target Mode Related ***************************/
-static __inline int mpt_cdblen(uint8_t, int);
-static __inline int
-mpt_cdblen(uint8_t cdb0, int maxlen)
-{
-	int group = cdb0 >> 5;
-	switch (group) {
-	case 0:
-		return (6);
-	case 1:
-		return (10);
-	case 4:
-	case 5:
-		return (12);
-	default:
-		return (16);
-	}
-}
 #ifdef	INVARIANTS
 static __inline request_t * mpt_tag_2_req(struct mpt_softc *, uint32_t);
 static __inline request_t *
@@ -1093,11 +1055,13 @@
  * Task Management Types, purely for internal consumption
  */
 typedef enum {
-	MPT_ABORT_TASK_SET=1234,
+	MPT_QUERY_TASK_SET=1234,
+	MPT_ABORT_TASK_SET,
 	MPT_CLEAR_TASK_SET,
+	MPT_QUERY_ASYNC_EVENT,
+	MPT_LOGICAL_UNIT_RESET,
 	MPT_TARGET_RESET,
 	MPT_CLEAR_ACA,
-	MPT_TERMINATE_TASK,
 	MPT_NIL_TMT_VALUE=5678
 } mpt_task_mgmt_t;
 
@@ -1167,6 +1131,7 @@
 				   PageAddress, hdr, len, sleep_ok,
 				   timeout_ms));
 }
+
 /* mpt_debug.c functions */
 void mpt_print_reply(void *vmsg);
 void mpt_print_db(uint32_t mb);
@@ -1176,4 +1141,5 @@
 void mpt_print_config_request(void *vmsg);
 void mpt_print_request(void *vmsg);
 void mpt_dump_sgl(SGE_IO_UNION *se, int offset);
+
 #endif /* _MPT_H_ */

Modified: trunk/sys/dev/mpt/mpt_cam.c
===================================================================
--- trunk/sys/dev/mpt/mpt_cam.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt_cam.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * FreeBSD/CAM specific routines for LSI '909 FC  adapters.
  * FreeBSD Version.
@@ -94,7 +95,7 @@
  * OWNER OR CONTRIBUTOR IS ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mpt/mpt_cam.c 315828 2017-03-23 06:55:32Z mav $");
 
 #include <dev/mpt/mpt.h>
 #include <dev/mpt/mpt_cam.h>
@@ -110,12 +111,6 @@
 #include <sys/kthread.h>
 #include <sys/sysctl.h>
 
-#if __FreeBSD_version >= 700025
-#ifndef	CAM_NEW_TRAN_CODE
-#define	CAM_NEW_TRAN_CODE	1
-#endif
-#endif
-
 static void mpt_poll(struct cam_sim *);
 static timeout_t mpt_timeout;
 static void mpt_action(struct cam_sim *, union ccb *);
@@ -139,7 +134,7 @@
 static void mpt_recover_commands(struct mpt_softc *mpt);
 
 static int mpt_scsi_send_tmf(struct mpt_softc *, u_int, u_int, u_int,
-    u_int, u_int, u_int, int);
+    target_id_t, lun_id_t, u_int, int);
 
 static void mpt_fc_post_els(struct mpt_softc *mpt, request_t *, int);
 static void mpt_post_target_command(struct mpt_softc *, request_t *, int);
@@ -151,7 +146,7 @@
 static cam_status mpt_abort_target_ccb(struct mpt_softc *, union ccb *);
 static int mpt_abort_target_cmd(struct mpt_softc *, request_t *);
 static void mpt_scsi_tgt_status(struct mpt_softc *, union ccb *, request_t *,
-    uint8_t, uint8_t const *);
+    uint8_t, uint8_t const *, u_int);
 static void
 mpt_scsi_tgt_tsk_mgmt(struct mpt_softc *, request_t *, mpt_task_mgmt_t,
     tgt_resource_t *, int);
@@ -344,7 +339,7 @@
 	 * Register exactly this bus.
 	 */
 	MPT_LOCK(mpt);
-	if (mpt_xpt_bus_register(mpt->sim, mpt->dev, 0) != CAM_SUCCESS) {
+	if (xpt_bus_register(mpt->sim, mpt->dev, 0) != CAM_SUCCESS) {
 		mpt_prt(mpt, "Bus registration Failed!\n");
 		error = ENOMEM;
 		MPT_UNLOCK(mpt);
@@ -383,7 +378,7 @@
 	 * Register this bus.
 	 */
 	MPT_LOCK(mpt);
-	if (mpt_xpt_bus_register(mpt->phydisk_sim, mpt->dev, 1) !=
+	if (xpt_bus_register(mpt->phydisk_sim, mpt->dev, 1) !=
 	    CAM_SUCCESS) {
 		mpt_prt(mpt, "Physical Disk Bus registration Failed!\n");
 		error = ENOMEM;
@@ -439,7 +434,23 @@
 	}
 	mpt2host_config_page_fc_port_0(&mpt->mpt_fcport_page0);
 
-	mpt->mpt_fcport_speed = mpt->mpt_fcport_page0.CurrentSpeed;
+	switch (mpt->mpt_fcport_page0.CurrentSpeed) {
+	case MPI_FCPORTPAGE0_CURRENT_SPEED_1GBIT:
+		mpt->mpt_fcport_speed = 1;
+		break;
+	case MPI_FCPORTPAGE0_CURRENT_SPEED_2GBIT:
+		mpt->mpt_fcport_speed = 2;
+		break;
+	case MPI_FCPORTPAGE0_CURRENT_SPEED_10GBIT:
+		mpt->mpt_fcport_speed = 10;
+		break;
+	case MPI_FCPORTPAGE0_CURRENT_SPEED_4GBIT:
+		mpt->mpt_fcport_speed = 4;
+		break;
+	default:
+		mpt->mpt_fcport_speed = 0;
+		break;
+	}
 
 	switch (mpt->mpt_fcport_page0.Flags &
 	    MPI_FCPORTPAGE0_FLAGS_ATTACH_TYPE_MASK) {
@@ -465,32 +476,27 @@
 		break;
 	}
 
+	mpt->scinfo.fc.wwnn = ((uint64_t)mpt->mpt_fcport_page0.WWNN.High << 32)
+	    | mpt->mpt_fcport_page0.WWNN.Low;
+	mpt->scinfo.fc.wwpn = ((uint64_t)mpt->mpt_fcport_page0.WWPN.High << 32)
+	    | mpt->mpt_fcport_page0.WWPN.Low;
+	mpt->scinfo.fc.portid = mpt->mpt_fcport_page0.PortIdentifier;
+
 	mpt_lprt(mpt, MPT_PRT_INFO,
-	    "FC Port Page 0: Topology <%s> WWNN 0x%08x%08x WWPN 0x%08x%08x "
+	    "FC Port Page 0: Topology <%s> WWNN 0x%16jx WWPN 0x%16jx "
 	    "Speed %u-Gbit\n", topology,
-	    mpt->mpt_fcport_page0.WWNN.High,
-	    mpt->mpt_fcport_page0.WWNN.Low,
-	    mpt->mpt_fcport_page0.WWPN.High,
-	    mpt->mpt_fcport_page0.WWPN.Low,
+	    (uintmax_t)mpt->scinfo.fc.wwnn, (uintmax_t)mpt->scinfo.fc.wwpn,
 	    mpt->mpt_fcport_speed);
 	MPT_UNLOCK(mpt);
 	ctx = device_get_sysctl_ctx(mpt->dev);
 	tree = device_get_sysctl_tree(mpt->dev);
 
-	snprintf(mpt->scinfo.fc.wwnn, sizeof (mpt->scinfo.fc.wwnn),
-	    "0x%08x%08x", mpt->mpt_fcport_page0.WWNN.High,
-	    mpt->mpt_fcport_page0.WWNN.Low);
-
-	snprintf(mpt->scinfo.fc.wwpn, sizeof (mpt->scinfo.fc.wwpn),
-	    "0x%08x%08x", mpt->mpt_fcport_page0.WWPN.High,
-	    mpt->mpt_fcport_page0.WWPN.Low);
-
-	SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-	    "wwnn", CTLFLAG_RD, mpt->scinfo.fc.wwnn, 0,
+	SYSCTL_ADD_QUAD(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+	    "wwnn", CTLFLAG_RD, &mpt->scinfo.fc.wwnn,
 	    "World Wide Node Name");
 
-	SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-	     "wwpn", CTLFLAG_RD, mpt->scinfo.fc.wwpn, 0,
+	SYSCTL_ADD_QUAD(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+	     "wwpn", CTLFLAG_RD, &mpt->scinfo.fc.wwpn,
 	     "World Wide Port Name");
 
 	MPT_LOCK(mpt);
@@ -1254,7 +1260,8 @@
 }
 
 /*
- * Callback routine from "bus_dmamap_load" or, in simple cases, called directly.
+ * Callback routine from bus_dmamap_load_ccb(9) or, in simple cases, called
+ * directly.
  *
  * Takes a list of physical segments and builds the SGL for SCSI IO command
  * and forwards the commard to the IOC after one last check that CAM has not
@@ -1382,7 +1389,7 @@
 		}
 	}
 
-	if (!(ccb->ccb_h.flags & (CAM_SG_LIST_PHYS|CAM_DATA_PHYS))) {
+	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
 		bus_dmasync_op_t op;
 		if (istgt == 0) {
 			if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
@@ -1430,7 +1437,7 @@
 			/* SAS1078 36GB limitation WAR */
 			if (mpt->is_1078 && (((uint64_t)dm_segs->ds_addr +
 			    MPI_SGE_LENGTH(se->FlagsLength)) >> 32) == 9) {
-				addr |= (1 << 31);
+				addr |= (1U << 31);
 				tf |= MPI_SGE_FLAGS_LOCAL_ADDRESS;
 			}
 			se->Address.High = htole32(addr);
@@ -1553,7 +1560,7 @@
 				    (((uint64_t)dm_segs->ds_addr +
 				    MPI_SGE_LENGTH(se->FlagsLength)) >>
 				    32) == 9) {
-					addr |= (1 << 31);
+					addr |= (1U << 31);
 					tf |= MPI_SGE_FLAGS_LOCAL_ADDRESS;
 				}
 				se->Address.High = htole32(addr);
@@ -1623,7 +1630,7 @@
 		mpt_prt(mpt,
 		    "mpt_execute_req_a64: I/O cancelled (status 0x%x)\n",
 		    ccb->ccb_h.status & CAM_STATUS_MASK);
-		if (nseg && (ccb->ccb_h.flags & CAM_SG_LIST_PHYS) == 0) {
+		if (nseg) {
 			bus_dmamap_unload(mpt->buffer_dmat, req->dmap);
 		}
 		ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
@@ -1635,7 +1642,7 @@
 
 	ccb->ccb_h.status |= CAM_SIM_QUEUED;
 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
-		mpt_req_timeout(req, (ccb->ccb_h.timeout * hz) / 1000,
+		mpt_req_timeout(req, SBT_1MS * ccb->ccb_h.timeout,
 		    mpt_timeout, ccb);
 	}
 	if (mpt->verbose > MPT_PRT_DEBUG) {
@@ -1688,7 +1695,6 @@
 	hdrp = req->req_vbuf;
 	mpt_off = req->req_vbuf;
 
-
 	if (error == 0 && ((uint32_t)nseg) >= mpt->max_seg_cnt) {
 		error = EFBIG;
 	}
@@ -1785,7 +1791,7 @@
 		}
 	}
 
-	if (!(ccb->ccb_h.flags & (CAM_SG_LIST_PHYS|CAM_DATA_PHYS))) {
+	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
 		bus_dmasync_op_t op;
 		if (istgt) {
 			if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
@@ -2010,7 +2016,7 @@
 		mpt_prt(mpt,
 		    "mpt_execute_req: I/O cancelled (status 0x%x)\n",
 		    ccb->ccb_h.status & CAM_STATUS_MASK);
-		if (nseg && (ccb->ccb_h.flags & CAM_SG_LIST_PHYS) == 0) {
+		if (nseg) {
 			bus_dmamap_unload(mpt->buffer_dmat, req->dmap);
 		}
 		ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
@@ -2022,7 +2028,7 @@
 
 	ccb->ccb_h.status |= CAM_SIM_QUEUED;
 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
-		mpt_req_timeout(req, (ccb->ccb_h.timeout * hz) / 1000,
+		mpt_req_timeout(req, SBT_1MS * ccb->ccb_h.timeout,
 		    mpt_timeout, ccb);
 	}
 	if (mpt->verbose > MPT_PRT_DEBUG) {
@@ -2062,6 +2068,7 @@
 	bus_dmamap_callback_t *cb;
 	target_id_t tgt;
 	int raid_passthru;
+	int error;
 
 	/* Get the pointer for the physical addapter */
 	mpt = ccb->ccb_h.ccb_mpt_ptr;
@@ -2127,13 +2134,7 @@
 	/* Which physical device to do the I/O on */
 	mpt_req->TargetID = tgt;
 
-	/* We assume a single level LUN type */
-	if (ccb->ccb_h.target_lun >= MPT_MAX_LUNS) {
-		mpt_req->LUN[0] = 0x40 | ((ccb->ccb_h.target_lun >> 8) & 0x3f);
-		mpt_req->LUN[1] = ccb->ccb_h.target_lun & 0xff;
-	} else {
-		mpt_req->LUN[1] = ccb->ccb_h.target_lun;
-	}
+	be64enc(mpt_req->LUN, CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun));
 
 	/* Set the direction of the transfer */
 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
@@ -2206,64 +2207,15 @@
 		    ccb->ccb_h.target_lun, req, req->serno);
 	}
 
-	/*
-	 * If we have any data to send with this command map it into bus space.
-	 */
-	if ((ccbh->flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
-		if ((ccbh->flags & CAM_SCATTER_VALID) == 0) {
-			/*
-			 * We've been given a pointer to a single buffer.
-			 */
-			if ((ccbh->flags & CAM_DATA_PHYS) == 0) {
-				/*
-				 * Virtual address that needs to translated into
-				 * one or more physical address ranges.
-				 */
-				int error;
-				int s = splsoftvm();
-				error = bus_dmamap_load(mpt->buffer_dmat,
-				    req->dmap, csio->data_ptr, csio->dxfer_len,
-				    cb, req, 0);
-				splx(s);
-				if (error == EINPROGRESS) {
-					/*
-					 * So as to maintain ordering,
-					 * freeze the controller queue
-					 * until our mapping is
-					 * returned.
-					 */
-					xpt_freeze_simq(mpt->sim, 1);
-					ccbh->status |= CAM_RELEASE_SIMQ;
-				}
-			} else {
-				/*
-				 * We have been given a pointer to single
-				 * physical buffer.
-				 */
-				struct bus_dma_segment seg;
-				seg.ds_addr = 
-				    (bus_addr_t)(vm_offset_t)csio->data_ptr;
-				seg.ds_len = csio->dxfer_len;
-				(*cb)(req, &seg, 1, 0);
-			}
-		} else {
-			/*
-			 * We have been given a list of addresses.
-			 * This case could be easily supported but they are not
-			 * currently generated by the CAM subsystem so there
-			 * is no point in wasting the time right now.
-			 */
-			struct bus_dma_segment *segs;
-			if ((ccbh->flags & CAM_SG_LIST_PHYS) == 0) {
-				(*cb)(req, NULL, 0, EFAULT);
-			} else {
-				/* Just use the segments provided */
-				segs = (struct bus_dma_segment *)csio->data_ptr;
-				(*cb)(req, segs, csio->sglist_cnt, 0);
-			}
-		}
-	} else {
-		(*cb)(req, NULL, 0, 0);
+	error = bus_dmamap_load_ccb(mpt->buffer_dmat, req->dmap, ccb, cb,
+	    req, 0);
+	if (error == EINPROGRESS) {
+		/*
+		 * So as to maintain ordering, freeze the controller queue
+		 * until our mapping is returned.
+		 */
+		xpt_freeze_simq(mpt->sim, 1);
+		ccbh->status |= CAM_RELEASE_SIMQ;
 	}
 }
 
@@ -2386,7 +2338,6 @@
 		break;
 
 	case MPI_EVENT_RESCAN:
-#if __FreeBSD_version >= 600000
 	{
 		union ccb *ccb;
 		uint32_t pathid;
@@ -2412,7 +2363,7 @@
 			break;
 		}
 
-		if (xpt_create_path(&ccb->ccb_h.path, xpt_periph, pathid,
+		if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid,
 		    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 			mpt_prt(mpt, "unable to create path for rescan\n");
 			xpt_free_ccb(ccb);
@@ -2421,10 +2372,7 @@
 		xpt_rescan(ccb);
 		break;
 	}
-#else
-		mpt_prt(mpt, "Rescan Port: %d\n", (data0 >> 8) & 0xff);
-		break;
-#endif
+
 	case MPI_EVENT_LINK_STATUS_CHANGE:
 		mpt_prt(mpt, "Port %d: LinkState: %s\n",
 		    (data1 >> 8) & 0xff,
@@ -2507,8 +2455,11 @@
 
 		pqf = (PTR_EVENT_DATA_QUEUE_FULL)msg->Data;
 		pqf->CurrentDepth = le16toh(pqf->CurrentDepth);
-		mpt_prt(mpt, "QUEUE FULL EVENT: Bus 0x%02x Target 0x%02x Depth "
-		    "%d\n", pqf->Bus, pqf->TargetID, pqf->CurrentDepth);
+		if (bootverbose) {
+		    mpt_prt(mpt, "QUEUE FULL EVENT: Bus 0x%02x Target 0x%02x "
+			"Depth %d\n",
+			pqf->Bus, pqf->TargetID, pqf->CurrentDepth);
+		}
 		if (mpt->phydisk_sim && mpt_is_raid_member(mpt,
 		    pqf->TargetID) != 0) {
 			sim = mpt->phydisk_sim;
@@ -2560,7 +2511,7 @@
 				    "unable to alloc CCB for rescan\n");
 				break;
 			}
-			if (xpt_create_path(&ccb->ccb_h.path, xpt_periph,
+			if (xpt_create_path(&ccb->ccb_h.path, NULL,
 			    cam_sim_path(sim), psdsc->TargetID,
 			    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 				mpt_prt(mpt,
@@ -2987,7 +2938,10 @@
 	} else if (rctl == ABTS && type == 0) {
 		uint16_t rx_id = le16toh(rp->Rxid);
 		uint16_t ox_id = le16toh(rp->Oxid);
+		mpt_tgt_state_t *tgt;
 		request_t *tgt_req = NULL;
+		union ccb *ccb;
+		uint32_t ct_id;
 
 		mpt_prt(mpt,
 		    "ELS: ABTS OX_ID 0x%x RX_ID 0x%x from 0x%08x%08x\n",
@@ -3000,47 +2954,37 @@
 		} else {
 			tgt_req = mpt->tgt_cmd_ptrs[rx_id];
 		}
-		if (tgt_req) {
-			mpt_tgt_state_t *tgt = MPT_TGT_STATE(mpt, tgt_req);
-			union ccb *ccb;
-			uint32_t ct_id;
+		if (tgt_req == NULL) {
+			mpt_prt(mpt, "no back pointer for RX_ID 0x%x\n", rx_id);
+			goto skip;
+		}
+		tgt = MPT_TGT_STATE(mpt, tgt_req);
 
-			/*
-			 * Check to make sure we have the correct command
-			 * The reply descriptor in the target state should
-			 * should contain an IoIndex that should match the
-			 * RX_ID.
-			 *
-			 * It'd be nice to have OX_ID to crosscheck with
-			 * as well.
-			 */
-			ct_id = GET_IO_INDEX(tgt->reply_desc);
+		/* Check to make sure we have the correct command. */
+		ct_id = GET_IO_INDEX(tgt->reply_desc);
+		if (ct_id != rx_id) {
+			mpt_lprt(mpt, MPT_PRT_ERROR, "ABORT Mismatch: "
+			    "RX_ID received=0x%x, in cmd=0x%x\n", rx_id, ct_id);
+			goto skip;
+		}
+		if (tgt->itag != ox_id) {
+			mpt_lprt(mpt, MPT_PRT_ERROR, "ABORT Mismatch: "
+			    "OX_ID received=0x%x, in cmd=0x%x\n", ox_id, tgt->itag);
+			goto skip;
+		}
 
-			if (ct_id != rx_id) {
-				mpt_lprt(mpt, MPT_PRT_ERROR, "ABORT Mismatch: "
-				    "RX_ID received=0x%x; RX_ID in cmd=0x%x\n",
-				    rx_id, ct_id);
-				goto skip;
-			}
+		if ((ccb = tgt->ccb) != NULL) {
+			mpt_prt(mpt, "CCB (%p): lun %jx flags %x status %x\n",
+			    ccb, (uintmax_t)ccb->ccb_h.target_lun,
+			    ccb->ccb_h.flags, ccb->ccb_h.status);
+		}
+		mpt_prt(mpt, "target state 0x%x resid %u xfrd %u rpwrd "
+		    "%x nxfers %x\n", tgt->state, tgt->resid,
+		    tgt->bytes_xfered, tgt->reply_desc, tgt->nxfers);
+		if (mpt_abort_target_cmd(mpt, tgt_req))
+			mpt_prt(mpt, "unable to start TargetAbort\n");
 
-			ccb = tgt->ccb;
-			if (ccb) {
-				mpt_prt(mpt,
-				    "CCB (%p): lun %u flags %x status %x\n",
-				    ccb, ccb->ccb_h.target_lun,
-				    ccb->ccb_h.flags, ccb->ccb_h.status);
-			}
-			mpt_prt(mpt, "target state 0x%x resid %u xfrd %u rpwrd "
-			    "%x nxfers %x\n", tgt->state,
-			    tgt->resid, tgt->bytes_xfered, tgt->reply_desc,
-			    tgt->nxfers);
-  skip:
-			if (mpt_abort_target_cmd(mpt, tgt_req)) {
-				mpt_prt(mpt, "unable to start TargetAbort\n");
-			}
-		} else {
-			mpt_prt(mpt, "no back pointer for RX_ID 0x%x\n", rx_id);
-		}
+skip:
 		memset(elsbuf, 0, 5 * (sizeof (U32)));
 		elsbuf[0] = htobe32(0);
 		elsbuf[1] = htobe32((ox_id << 16) | rx_id);
@@ -3372,11 +3316,8 @@
 		break;
 	}
 
-#ifdef	CAM_NEW_TRAN_CODE
 #define	IS_CURRENT_SETTINGS(c)	((c)->type == CTS_TYPE_CURRENT_SETTINGS)
-#else
-#define	IS_CURRENT_SETTINGS(c)	((c)->flags & CCB_TRANS_CURRENT_SETTINGS)
-#endif
+
 #define	DP_DISC_ENABLE	0x1
 #define	DP_DISC_DISABL	0x2
 #define	DP_DISC		(DP_DISC_ENABLE|DP_DISC_DISABL)
@@ -3393,10 +3334,8 @@
 
 	case XPT_SET_TRAN_SETTINGS:	/* Nexus Settings */
 	{
-#ifdef	CAM_NEW_TRAN_CODE
 		struct ccb_trans_settings_scsi *scsi;
 		struct ccb_trans_settings_spi *spi;
-#endif
 		uint8_t dval;
 		u_int period;
 		u_int offset;
@@ -3409,7 +3348,6 @@
 			break;
 		}
 
-#ifdef	CAM_NEW_TRAN_CODE
 		scsi = &cts->proto_specific.scsi;
 		spi = &cts->xport_specific.spi;
 
@@ -3420,7 +3358,6 @@
 			mpt_set_ccb_status(ccb, CAM_REQ_CMP);
 			break;
 		}
-#endif
 
 		/*
 		 * Skip attempting settings on RAID volume disks.
@@ -3450,28 +3387,6 @@
 		period = 0;
 		offset = 0;
 
-#ifndef	CAM_NEW_TRAN_CODE
-		if ((cts->valid & CCB_TRANS_DISC_VALID) != 0) {
-			dval |= (cts->flags & CCB_TRANS_DISC_ENB) ?
-			    DP_DISC_ENABLE : DP_DISC_DISABL;
-		}
-
-		if ((cts->valid & CCB_TRANS_TQ_VALID) != 0) {
-			dval |= (cts->flags & CCB_TRANS_TAG_ENB) ?
-			    DP_TQING_ENABLE : DP_TQING_DISABL;
-		}
-
-		if ((cts->valid & CCB_TRANS_BUS_WIDTH_VALID) != 0) {
-			dval |= cts->bus_width ? DP_WIDE : DP_NARROW;
-		}
-
-		if ((cts->valid & CCB_TRANS_SYNC_RATE_VALID) &&
-		    (cts->valid & CCB_TRANS_SYNC_OFFSET_VALID)) {
-			dval |= DP_SYNC;
-			period = cts->sync_period;
-			offset = cts->sync_offset;
-		}
-#else
 		if ((spi->valid & CTS_SPI_VALID_DISC) != 0) {
 			dval |= ((spi->flags & CTS_SPI_FLAGS_DISC_ENB) != 0) ?
 			    DP_DISC_ENABLE : DP_DISC_DISABL;
@@ -3507,7 +3422,7 @@
 			period &= MPI_SCSIDEVPAGE1_RP_MIN_SYNC_PERIOD_MASK;
 	    		period >>= MPI_SCSIDEVPAGE1_RP_SHIFT_MIN_SYNC_PERIOD;
 		}
-#endif
+
 		if (dval & DP_DISC_ENABLE) {
 			mpt->mpt_disc_enable |= (1 << tgt);
 		} else if (dval & DP_DISC_DISABL) {
@@ -3540,7 +3455,6 @@
 	}
 	case XPT_GET_TRAN_SETTINGS:
 	{
-#ifdef	CAM_NEW_TRAN_CODE
 		struct ccb_trans_settings_scsi *scsi;
 		cts = &ccb->cts;
 		cts->protocol = PROTO_SCSI;
@@ -3550,8 +3464,10 @@
 			cts->protocol_version = SCSI_REV_SPC;
 			cts->transport = XPORT_FC;
 			cts->transport_version = 0;
-			fc->valid = CTS_FC_VALID_SPEED;
-			fc->bitrate = 100000;
+			if (mpt->mpt_fcport_speed != 0) {
+				fc->valid = CTS_FC_VALID_SPEED;
+				fc->bitrate = 100000 * mpt->mpt_fcport_speed;
+			}
 		} else if (mpt->is_sas) {
 			struct ccb_trans_settings_sas *sas =
 			    &cts->xport_specific.sas;
@@ -3572,21 +3488,6 @@
 		scsi = &cts->proto_specific.scsi;
 		scsi->valid = CTS_SCSI_VALID_TQ;
 		scsi->flags = CTS_SCSI_FLAGS_TAG_ENB;
-#else
-		cts = &ccb->cts;
-		if (mpt->is_fc) {
-			cts->flags = CCB_TRANS_TAG_ENB | CCB_TRANS_DISC_ENB;
-			cts->valid = CCB_TRANS_DISC_VALID | CCB_TRANS_TQ_VALID;
-			cts->bus_width = MSG_EXT_WDTR_BUS_8_BIT;
-		} else if (mpt->is_sas) {
-			cts->flags = CCB_TRANS_TAG_ENB | CCB_TRANS_DISC_ENB;
-			cts->valid = CCB_TRANS_DISC_VALID | CCB_TRANS_TQ_VALID;
-			cts->bus_width = MSG_EXT_WDTR_BUS_8_BIT;
-		} else if (mpt_get_spi_settings(mpt, cts) != 0) {
-			mpt_set_ccb_status(ccb, CAM_REQ_CMP_ERR);
-			break;
-		}
-#endif
 		mpt_set_ccb_status(ccb, CAM_REQ_CMP);
 		break;
 	}
@@ -3604,6 +3505,36 @@
 		KASSERT(ccb->ccb_h.status, ("zero ccb sts at %d", __LINE__));
 		break;
 	}
+	case XPT_GET_SIM_KNOB:
+	{
+		struct ccb_sim_knob *kp = &ccb->knob;
+
+		if (mpt->is_fc) {
+			kp->xport_specific.fc.wwnn = mpt->scinfo.fc.wwnn;
+			kp->xport_specific.fc.wwpn = mpt->scinfo.fc.wwpn;
+			switch (mpt->role) {
+			case MPT_ROLE_NONE:
+				kp->xport_specific.fc.role = KNOB_ROLE_NONE;
+				break;
+			case MPT_ROLE_INITIATOR:
+				kp->xport_specific.fc.role = KNOB_ROLE_INITIATOR;
+				break;
+			case MPT_ROLE_TARGET:
+				kp->xport_specific.fc.role = KNOB_ROLE_TARGET;
+				break;
+			case MPT_ROLE_BOTH:
+				kp->xport_specific.fc.role = KNOB_ROLE_BOTH;
+				break;
+			}
+			kp->xport_specific.fc.valid =
+			    KNOB_VALID_ADDRESS | KNOB_VALID_ROLE;
+			ccb->ccb_h.status = CAM_REQ_CMP;
+		} else {
+			ccb->ccb_h.status = CAM_REQ_INVALID;
+		}
+		xpt_done(ccb);
+		break;
+	}
 	case XPT_PATH_INQ:		/* Path routing inquiry */
 	{
 		struct ccb_pathinq *cpi = &ccb->cpi;
@@ -3640,17 +3571,23 @@
 		/*
 		 * The base speed is the speed of the underlying connection.
 		 */
-#ifdef	CAM_NEW_TRAN_CODE
 		cpi->protocol = PROTO_SCSI;
 		if (mpt->is_fc) {
-			cpi->hba_misc = PIM_NOBUSRESET;
+			cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED |
+			    PIM_EXTLUNS;
 			cpi->base_transfer_speed = 100000;
 			cpi->hba_inquiry = PI_TAG_ABLE;
 			cpi->transport = XPORT_FC;
 			cpi->transport_version = 0;
 			cpi->protocol_version = SCSI_REV_SPC;
+			cpi->xport_specific.fc.wwnn = mpt->scinfo.fc.wwnn;
+			cpi->xport_specific.fc.wwpn = mpt->scinfo.fc.wwpn;
+			cpi->xport_specific.fc.port = mpt->scinfo.fc.portid;
+			cpi->xport_specific.fc.bitrate =
+			    100000 * mpt->mpt_fcport_speed;
 		} else if (mpt->is_sas) {
-			cpi->hba_misc = PIM_NOBUSRESET;
+			cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED |
+			    PIM_EXTLUNS;
 			cpi->base_transfer_speed = 300000;
 			cpi->hba_inquiry = PI_TAG_ABLE;
 			cpi->transport = XPORT_SAS;
@@ -3657,7 +3594,8 @@
 			cpi->transport_version = 0;
 			cpi->protocol_version = SCSI_REV_SPC2;
 		} else {
-			cpi->hba_misc = PIM_SEQSCAN;
+			cpi->hba_misc = PIM_SEQSCAN | PIM_UNMAPPED |
+			    PIM_EXTLUNS;
 			cpi->base_transfer_speed = 3300;
 			cpi->hba_inquiry = PI_SDTR_ABLE|PI_TAG_ABLE|PI_WIDE_16;
 			cpi->transport = XPORT_SPI;
@@ -3664,21 +3602,6 @@
 			cpi->transport_version = 2;
 			cpi->protocol_version = SCSI_REV_2;
 		}
-#else
-		if (mpt->is_fc) {
-			cpi->hba_misc = PIM_NOBUSRESET;
-			cpi->base_transfer_speed = 100000;
-			cpi->hba_inquiry = PI_TAG_ABLE;
-		} else if (mpt->is_sas) {
-			cpi->hba_misc = PIM_NOBUSRESET;
-			cpi->base_transfer_speed = 300000;
-			cpi->hba_inquiry = PI_TAG_ABLE;
-		} else {
-			cpi->hba_misc = PIM_SEQSCAN;
-			cpi->base_transfer_speed = 3300;
-			cpi->hba_inquiry = PI_SDTR_ABLE|PI_TAG_ABLE|PI_WIDE_16;
-		}
-#endif
 
 		/*
 		 * We give our fake RAID passhtru bus a width that is MaxVolumes
@@ -3699,9 +3622,9 @@
 		} else {
 			cpi->target_sprt = 0;
 		}
-		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
-		strncpy(cpi->hba_vid, "LSI", HBA_IDLEN);
-		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
+		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
+		strlcpy(cpi->hba_vid, "LSI", HBA_IDLEN);
+		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 		cpi->ccb_h.status = CAM_REQ_CMP;
 		break;
@@ -3723,7 +3646,6 @@
 		}
 		break;
 	}
-	case XPT_NOTIFY_ACKNOWLEDGE:	/* recycle notify ack */
 	case XPT_IMMEDIATE_NOTIFY:	/* Add Immediate Notify Resource */
 	case XPT_ACCEPT_TARGET_IO:	/* Add Accept Target IO Resource */
 	{
@@ -3731,7 +3653,6 @@
 		lun_id_t lun = ccb->ccb_h.target_lun;
 		ccb->ccb_h.sim_priv.entries[0].field = 0;
 		ccb->ccb_h.sim_priv.entries[1].ptr = mpt;
-		ccb->ccb_h.flags = 0;
 
 		if (lun == CAM_LUN_WILDCARD) {
 			if (ccb->ccb_h.target_id != CAM_TARGET_WILDCARD) {
@@ -3750,17 +3671,24 @@
 			    "Put FREE ATIO %p lun %d\n", ccb, lun);
 			STAILQ_INSERT_TAIL(&trtp->atios, &ccb->ccb_h,
 			    sim_links.stqe);
-		} else if (ccb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY) {
+		} else {
 			mpt_lprt(mpt, MPT_PRT_DEBUG1,
 			    "Put FREE INOT lun %d\n", lun);
 			STAILQ_INSERT_TAIL(&trtp->inots, &ccb->ccb_h,
 			    sim_links.stqe);
-		} else {
-			mpt_lprt(mpt, MPT_PRT_ALWAYS, "Got Notify ACK\n");
 		}
 		mpt_set_ccb_status(ccb, CAM_REQ_INPROG);
 		return;
 	}
+	case XPT_NOTIFY_ACKNOWLEDGE:	/* Task management request done. */
+	{
+		request_t *req = MPT_TAG_2_REQ(mpt, ccb->cna2.tag_id);
+
+		mpt_lprt(mpt, MPT_PRT_DEBUG, "Got Notify ACK\n");
+		mpt_scsi_tgt_status(mpt, NULL, req, 0, NULL, 0);
+		mpt_set_ccb_status(ccb, CAM_REQ_CMP);
+		break;
+	}
 	case XPT_CONT_TARGET_IO:
 		mpt_target_start_io(mpt, ccb);
 		return;
@@ -3775,10 +3703,8 @@
 static int
 mpt_get_spi_settings(struct mpt_softc *mpt, struct ccb_trans_settings *cts)
 {
-#ifdef	CAM_NEW_TRAN_CODE
 	struct ccb_trans_settings_scsi *scsi = &cts->proto_specific.scsi;
 	struct ccb_trans_settings_spi *spi = &cts->xport_specific.spi;
-#endif
 	target_id_t tgt;
 	uint32_t dval, pval, oval;
 	int rv;
@@ -3839,29 +3765,6 @@
 		pval = MPI_SCSIPORTPAGE0_CAP_GET_MIN_SYNC_PERIOD(pval);
 	}
 
-#ifndef	CAM_NEW_TRAN_CODE
-	cts->flags &= ~(CCB_TRANS_DISC_ENB|CCB_TRANS_TAG_ENB);
-	cts->valid = 0;
-	cts->sync_period = pval;
-	cts->sync_offset = oval;
-	cts->valid |= CCB_TRANS_SYNC_RATE_VALID;
-	cts->valid |= CCB_TRANS_SYNC_OFFSET_VALID;
-	cts->valid |= CCB_TRANS_BUS_WIDTH_VALID;
-	if (dval & DP_WIDE) {
-		cts->bus_width = MSG_EXT_WDTR_BUS_16_BIT;
-	} else {
-		cts->bus_width = MSG_EXT_WDTR_BUS_8_BIT;
-	}
-	if (cts->ccb_h.target_lun != CAM_LUN_WILDCARD) {
-		cts->valid |= CCB_TRANS_DISC_VALID | CCB_TRANS_TQ_VALID;
-		if (dval & DP_DISC_ENABLE) {
-			cts->flags |= CCB_TRANS_DISC_ENB;
-		}
-		if (dval & DP_TQING_ENABLE) {
-			cts->flags |= CCB_TRANS_TAG_ENB;
-		}
-	}
-#else
 	spi->valid = 0;
 	scsi->valid = 0;
 	spi->flags = 0;
@@ -3886,10 +3789,10 @@
 			spi->flags |= CTS_SPI_FLAGS_DISC_ENB;
 		}
 	}
-#endif
+
 	mpt_lprt(mpt, MPT_PRT_NEGOTIATION,
 	    "mpt_get_spi_settings[%d]: %s flags 0x%x per 0x%x off=%d\n", tgt,
-	    IS_CURRENT_SETTINGS(cts)? "ACTIVE" : "NVRAM ", dval, pval, oval);
+	    IS_CURRENT_SETTINGS(cts) ? "ACTIVE" : "NVRAM ", dval, pval, oval);
 	return (0);
 }
 
@@ -3959,7 +3862,7 @@
 {
 	int error;
 
-	error = mpt_kthread_create(mpt_recovery_thread, mpt,
+	error = kproc_create(mpt_recovery_thread, mpt,
 	    &mpt->recovery_thread, /*flags*/0,
 	    /*altstack*/0, "mpt_recovery%d", mpt->unit);
 	return (error);
@@ -4002,12 +3905,13 @@
 	mpt->recovery_thread = NULL;
 	wakeup(&mpt->recovery_thread);
 	MPT_UNLOCK(mpt);
-	mpt_kthread_exit(0);
+	kproc_exit(0);
 }
 
 static int
 mpt_scsi_send_tmf(struct mpt_softc *mpt, u_int type, u_int flags,
-    u_int channel, u_int target, u_int lun, u_int abort_ctx, int sleep_ok)
+    u_int channel, target_id_t target, lun_id_t lun, u_int abort_ctx,
+    int sleep_ok)
 {
 	MSG_SCSI_TASK_MGMT *tmf_req;
 	int		    error;
@@ -4035,12 +3939,7 @@
 	tmf_req->MsgFlags = flags;
 	tmf_req->MsgContext =
 	    htole32(mpt->tmf_req->index | scsi_tmf_handler_id);
-	if (lun > MPT_MAX_LUNS) {
-		tmf_req->LUN[0] = 0x40 | ((lun >> 8) & 0x3f);
-		tmf_req->LUN[1] = lun & 0xff;
-	} else {
-		tmf_req->LUN[1] = lun;
-	}
+	be64enc(tmf_req->LUN, CAM_EXTLUN_BYTE_SWIZZLE(lun));
 	tmf_req->TaskMsgContext = abort_ctx;
 
 	mpt_lprt(mpt, MPT_PRT_DEBUG,
@@ -4262,6 +4161,7 @@
 	fc = req->req_vbuf;
 	fc->BufferCount = 1;
 	fc->Function = MPI_FUNCTION_TARGET_CMD_BUFFER_POST;
+	fc->BufferLength = MIN(MPT_REQUEST_AREA - MPT_RQSL(mpt), UINT8_MAX);
 	fc->MsgContext = htole32(req->index | mpt->scsi_tgt_handler_id);
 
 	cb = &fc->Buffer[0];
@@ -4458,6 +4358,7 @@
 		bus_dmamap_callback_t *cb;
 		PTR_MSG_TARGET_ASSIST_REQUEST ta;
 		request_t *req;
+		int error;
 
 		KASSERT((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE,
 		    ("dxfer_len %u but direction is NONE", csio->dxfer_len));
@@ -4505,13 +4406,7 @@
 		ta->Function = MPI_FUNCTION_TARGET_ASSIST;
 		ta->MsgContext = htole32(req->index | mpt->scsi_tgt_handler_id);
 		ta->ReplyWord = htole32(tgt->reply_desc);
-		if (csio->ccb_h.target_lun > MPT_MAX_LUNS) {
-			ta->LUN[0] =
-			    0x40 | ((csio->ccb_h.target_lun >> 8) & 0x3f);
-			ta->LUN[1] = csio->ccb_h.target_lun & 0xff;
-		} else {
-			ta->LUN[1] = csio->ccb_h.target_lun;
-		}
+		be64enc(ta->LUN, CAM_EXTLUN_BYTE_SWIZZLE(csio->ccb_h.target_lun));
 
 		ta->RelativeOffset = tgt->bytes_xfered;
 		ta->DataLength = ccb->csio.dxfer_len;
@@ -4522,6 +4417,7 @@
 		/*
 		 * XXX Should be done after data transfer completes?
 		 */
+		csio->resid = csio->dxfer_len - ta->DataLength;
 		tgt->resid -= csio->dxfer_len;
 		tgt->bytes_xfered += csio->dxfer_len;
 
@@ -4544,48 +4440,13 @@
 		    "nxtstate=%d\n", csio, csio->tag_id, csio->dxfer_len,
 		    tgt->resid, ccb->ccb_h.flags, req, req->serno, tgt->state);
 
-		if ((ccb->ccb_h.flags & CAM_SCATTER_VALID) == 0) {
-			if ((ccb->ccb_h.flags & CAM_DATA_PHYS) == 0) {
-				int error;
-				int s = splsoftvm();
-				error = bus_dmamap_load(mpt->buffer_dmat,
-				    req->dmap, csio->data_ptr, csio->dxfer_len,
-				    cb, req, 0);
-				splx(s);
-				if (error == EINPROGRESS) {
-					xpt_freeze_simq(mpt->sim, 1);
-					ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
-				}
-			} else {
-				/*
-				 * We have been given a pointer to single
-				 * physical buffer.
-				 */
-				struct bus_dma_segment seg;
-				seg.ds_addr = (bus_addr_t)
-				    (vm_offset_t)csio->data_ptr;
-				seg.ds_len = csio->dxfer_len;
-				(*cb)(req, &seg, 1, 0);
-			}
-		} else {
-			/*
-			 * We have been given a list of addresses.
-			 * This case could be easily supported but they are not
-			 * currently generated by the CAM subsystem so there
-			 * is no point in wasting the time right now.
-			 */
-			struct bus_dma_segment *sgs;
-			if ((ccb->ccb_h.flags & CAM_SG_LIST_PHYS) == 0) {
-				(*cb)(req, NULL, 0, EFAULT);
-			} else {
-				/* Just use the segments provided */
-				sgs = (struct bus_dma_segment *)csio->data_ptr;
-				(*cb)(req, sgs, csio->sglist_cnt, 0);
-			}
+		error = bus_dmamap_load_ccb(mpt->buffer_dmat, req->dmap, ccb,
+		    cb, req, 0);
+		if (error == EINPROGRESS) {
+			xpt_freeze_simq(mpt->sim, 1);
+			ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		}
 	} else {
-		uint8_t *sp = NULL, sense[MPT_SENSE_SIZE];
-
 		/*
 		 * XXX: I don't know why this seems to happen, but
 		 * XXX: completing the CCB seems to make things happy.
@@ -4602,18 +4463,16 @@
 			xpt_done(ccb);
 			return;
 		}
-		if (ccb->ccb_h.flags & CAM_SEND_SENSE) {
-			sp = sense;
-			memcpy(sp, &csio->sense_data,
-			   min(csio->sense_len, MPT_SENSE_SIZE));
-		}
-		mpt_scsi_tgt_status(mpt, ccb, cmd_req, csio->scsi_status, sp);
+		mpt_scsi_tgt_status(mpt, ccb, cmd_req, csio->scsi_status,
+		    (void *)&csio->sense_data,
+		    (ccb->ccb_h.flags & CAM_SEND_SENSE) ?
+		     csio->sense_len : 0);
 	}
 }
 
 static void
 mpt_scsi_tgt_local(struct mpt_softc *mpt, request_t *cmd_req,
-    uint32_t lun, int send, uint8_t *data, size_t length)
+    lun_id_t lun, int send, uint8_t *data, size_t length)
 {
 	mpt_tgt_state_t *tgt;
 	PTR_MSG_TARGET_ASSIST_REQUEST ta;
@@ -4629,7 +4488,7 @@
 	tgt = MPT_TGT_STATE(mpt, cmd_req);
 	if (length == 0 || tgt->resid == 0) {
 		tgt->resid = 0;
-		mpt_scsi_tgt_status(mpt, NULL, cmd_req, 0, NULL);
+		mpt_scsi_tgt_status(mpt, NULL, cmd_req, 0, NULL, 0);
 		return;
 	}
 
@@ -4653,12 +4512,7 @@
 	ta->Function = MPI_FUNCTION_TARGET_ASSIST;
 	ta->MsgContext = htole32(req->index | mpt->scsi_tgt_handler_id);
 	ta->ReplyWord = htole32(tgt->reply_desc);
-	if (lun > MPT_MAX_LUNS) {
-		ta->LUN[0] = 0x40 | ((lun >> 8) & 0x3f);
-		ta->LUN[1] = lun & 0xff;
-	} else {
-		ta->LUN[1] = lun;
-	}
+	be64enc(ta->LUN, CAM_EXTLUN_BYTE_SWIZZLE(lun));
 	ta->RelativeOffset = 0;
 	ta->DataLength = length;
 
@@ -4702,40 +4556,44 @@
 {
 	struct mpt_hdr_stailq *lp;
 	struct ccb_hdr *srch;
-	int found = 0;
 	union ccb *accb = ccb->cab.abort_ccb;
 	tgt_resource_t *trtp;
+	mpt_tgt_state_t *tgt;
+	request_t *req;
+	uint32_t tag;
 
 	mpt_lprt(mpt, MPT_PRT_DEBUG, "aborting ccb %p\n", accb);
-
-	if (ccb->ccb_h.target_lun == CAM_LUN_WILDCARD) {
+	if (ccb->ccb_h.target_lun == CAM_LUN_WILDCARD)
 		trtp = &mpt->trt_wildcard;
-	} else {
+	else
 		trtp = &mpt->trt[ccb->ccb_h.target_lun];
-	}
-
 	if (accb->ccb_h.func_code == XPT_ACCEPT_TARGET_IO) {
 		lp = &trtp->atios;
-	} else if (accb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY) {
+		tag = accb->atio.tag_id;
+	} else {
 		lp = &trtp->inots;
-	} else {
-		return (CAM_REQ_INVALID);
+		tag = accb->cin1.tag_id;
 	}
 
+	/* Search the CCB among queued. */
 	STAILQ_FOREACH(srch, lp, sim_links.stqe) {
-		if (srch == &accb->ccb_h) {
-			found = 1;
-			STAILQ_REMOVE(lp, srch, ccb_hdr, sim_links.stqe);
-			break;
-		}
-	}
-	if (found) {
+		if (srch != &accb->ccb_h)
+			continue;
+		STAILQ_REMOVE(lp, srch, ccb_hdr, sim_links.stqe);
 		accb->ccb_h.status = CAM_REQ_ABORTED;
 		xpt_done(accb);
 		return (CAM_REQ_CMP);
 	}
-	mpt_prt(mpt, "mpt_abort_tgt_ccb: CCB %p not found\n", ccb);
-	return (CAM_PATH_INVALID);
+
+	/* Search the CCB among running. */
+	req = MPT_TAG_2_REQ(mpt, tag);
+	tgt = MPT_TGT_STATE(mpt, req);
+	if (tgt->tag_id == tag) {
+		mpt_abort_target_cmd(mpt, req);
+		return (CAM_REQ_CMP);
+	}
+
+	return (CAM_UA_ABORT);
 }
 
 /*
@@ -4782,7 +4640,7 @@
 
 static void
 mpt_scsi_tgt_status(struct mpt_softc *mpt, union ccb *ccb, request_t *cmd_req,
-    uint8_t status, uint8_t const *sense_data)
+    uint8_t status, uint8_t const *sense_data, u_int sense_len)
 {
 	uint8_t *cmd_vbuf;
 	mpt_tgt_state_t *tgt;
@@ -4831,6 +4689,7 @@
 	paddr += MPT_RQSL(mpt);
 
 	memset(tp, 0, sizeof (*tp));
+	tp->StatusCode = status;
 	tp->Function = MPI_FUNCTION_TARGET_STATUS_SEND;
 	if (mpt->is_fc) {
 		PTR_MPI_TARGET_FCP_CMD_BUFFER fc =
@@ -4856,37 +4715,26 @@
 		 */
 		memset(rsp, 0, sizeof (MPI_TARGET_FCP_RSP_BUFFER));
 
-		rsp[2] = status;
-		if (tgt->resid) {
-			rsp[2] |= 0x800;	/* XXXX NEED MNEMONIC!!!! */
+		rsp[2] = htobe32(status);
+#define	MIN_FCP_RESPONSE_SIZE	24
+#ifndef	WE_TRUST_AUTO_GOOD_STATUS
+		resplen = MIN_FCP_RESPONSE_SIZE;
+#endif
+		if (tgt->resid < 0) {
+			rsp[2] |= htobe32(0x400); /* XXXX NEED MNEMONIC!!!! */
+			rsp[3] = htobe32(-tgt->resid);
+			resplen = MIN_FCP_RESPONSE_SIZE;
+		} else if (tgt->resid > 0) {
+			rsp[2] |= htobe32(0x800); /* XXXX NEED MNEMONIC!!!! */
 			rsp[3] = htobe32(tgt->resid);
-#ifdef	WE_TRUST_AUTO_GOOD_STATUS
-			resplen = sizeof (MPI_TARGET_FCP_RSP_BUFFER);
-#endif
+			resplen = MIN_FCP_RESPONSE_SIZE;
 		}
-		if (status == SCSI_STATUS_CHECK_COND) {
-			int i;
-
-			rsp[2] |= 0x200;	/* XXXX NEED MNEMONIC!!!! */
-			rsp[4] = htobe32(MPT_SENSE_SIZE);
-			if (sense_data) {
-				memcpy(&rsp[8], sense_data, MPT_SENSE_SIZE);
-			} else {
-				mpt_prt(mpt, "mpt_scsi_tgt_status: CHECK CONDI"
-				    "TION but no sense data?\n");
-				memset(&rsp, 0, MPT_SENSE_SIZE);
-			}
-			for (i = 8; i < (8 + (MPT_SENSE_SIZE >> 2)); i++) {
-				rsp[i] = htobe32(rsp[i]);
-			}
-#ifdef	WE_TRUST_AUTO_GOOD_STATUS
-			resplen = sizeof (MPI_TARGET_FCP_RSP_BUFFER);
-#endif
+		if (sense_len > 0) {
+			rsp[2] |= htobe32(0x200); /* XXXX NEED MNEMONIC!!!! */
+			rsp[4] = htobe32(sense_len);
+			memcpy(&rsp[6], sense_data, sense_len);
+			resplen = MIN_FCP_RESPONSE_SIZE + sense_len;
 		}
-#ifndef	WE_TRUST_AUTO_GOOD_STATUS
-		resplen = sizeof (MPI_TARGET_FCP_RSP_BUFFER);
-#endif
-		rsp[2] = htobe32(rsp[2]);
 	} else if (mpt->is_sas) {
 		PTR_MPI_TARGET_SSP_CMD_BUFFER ssp =
 		    (PTR_MPI_TARGET_SSP_CMD_BUFFER) cmd_vbuf;
@@ -4894,7 +4742,6 @@
 	} else {
 		PTR_MPI_TARGET_SCSI_SPI_CMD_BUFFER sp =
 		    (PTR_MPI_TARGET_SCSI_SPI_CMD_BUFFER) cmd_vbuf;
-		tp->StatusCode = status;
 		tp->QueueTag = htole16(sp->Tag);
 		memcpy(tp->LUN, sp->LogicalUnitNumber, sizeof (tp->LUN));
 	}
@@ -4909,12 +4756,11 @@
 		tp->MsgFlags |= TARGET_STATUS_SEND_FLAGS_AUTO_GOOD_STATUS;
 	} else {
 		tp->StatusDataSGE.u.Address32 = htole32((uint32_t) paddr);
-		fl =
-			MPI_SGE_FLAGS_HOST_TO_IOC	|
-			MPI_SGE_FLAGS_SIMPLE_ELEMENT	|
-			MPI_SGE_FLAGS_LAST_ELEMENT	|
-			MPI_SGE_FLAGS_END_OF_LIST	|
-			MPI_SGE_FLAGS_END_OF_BUFFER;
+		fl = MPI_SGE_FLAGS_HOST_TO_IOC |
+		     MPI_SGE_FLAGS_SIMPLE_ELEMENT |
+		     MPI_SGE_FLAGS_LAST_ELEMENT |
+		     MPI_SGE_FLAGS_END_OF_LIST |
+		     MPI_SGE_FLAGS_END_OF_BUFFER;
 		fl <<= MPI_SGE_FLAGS_SHIFT;
 		fl |= resplen;
 		tp->StatusDataSGE.FlagsLength = htole32(fl);
@@ -4921,12 +4767,14 @@
 	}
 
 	mpt_lprt(mpt, MPT_PRT_DEBUG, 
-	    "STATUS_CCB %p (wit%s sense) tag %x req %p:%u resid %u\n",
-	    ccb, sense_data?"h" : "hout", ccb? ccb->csio.tag_id : -1, req,
-	    req->serno, tgt->resid);
+	    "STATUS_CCB %p (with%s sense) tag %x req %p:%u resid %u\n",
+	    ccb, sense_len > 0 ? "" : "out", tgt->tag_id,
+	    req, req->serno, tgt->resid);
+	if (mpt->verbose > MPT_PRT_DEBUG)
+		mpt_print_request(req->req_vbuf);
 	if (ccb) {
 		ccb->ccb_h.status = CAM_SIM_QUEUED | CAM_REQ_INPROG;
-		mpt_req_timeout(req, 60 * hz, mpt_timeout, ccb);
+		mpt_req_timeout(req, SBT_1S * 60, mpt_timeout, ccb);
 	}
 	mpt_send_cmd(mpt, req);
 }
@@ -4942,7 +4790,7 @@
 	inot = (struct ccb_immediate_notify *) STAILQ_FIRST(&trtp->inots);
 	if (inot == NULL) {
 		mpt_lprt(mpt, MPT_PRT_WARN, "no INOTSs- sending back BSY\n");
-		mpt_scsi_tgt_status(mpt, NULL, req, SCSI_STATUS_BUSY, NULL);
+		mpt_scsi_tgt_status(mpt, NULL, req, SCSI_STATUS_BUSY, NULL, 0);
 		return;
 	}
 	STAILQ_REMOVE_HEAD(&trtp->inots, sim_links.stqe);
@@ -4950,17 +4798,28 @@
 	    "Get FREE INOT %p lun %d\n", inot, inot->ccb_h.target_lun);
 
 	inot->initiator_id = init_id;	/* XXX */
+	inot->tag_id = tgt->tag_id;
+	inot->seq_id = 0;
 	/*
 	 * This is a somewhat grotesque attempt to map from task management
 	 * to old style SCSI messages. God help us all.
 	 */
 	switch (fc) {
+	case MPT_QUERY_TASK_SET:
+		inot->arg = MSG_QUERY_TASK_SET;
+		break;
 	case MPT_ABORT_TASK_SET:
-		inot->arg = MSG_ABORT_TAG;
+		inot->arg = MSG_ABORT_TASK_SET;
 		break;
 	case MPT_CLEAR_TASK_SET:
 		inot->arg = MSG_CLEAR_TASK_SET;
 		break;
+	case MPT_QUERY_ASYNC_EVENT:
+		inot->arg = MSG_QUERY_ASYNC_EVENT;
+		break;
+	case MPT_LOGICAL_UNIT_RESET:
+		inot->arg = MSG_LOGICAL_UNIT_RESET;
+		break;
 	case MPT_TARGET_RESET:
 		inot->arg = MSG_TARGET_RESET;
 		break;
@@ -4967,19 +4826,12 @@
 	case MPT_CLEAR_ACA:
 		inot->arg = MSG_CLEAR_ACA;
 		break;
-	case MPT_TERMINATE_TASK:
-		inot->arg = MSG_ABORT_TAG;
-		break;
 	default:
 		inot->arg = MSG_NOOP;
 		break;
 	}
-	/*
-	 * XXX KDM we need the sequence/tag number for the target of the
-	 * task management operation, especially if it is an abort.
-	 */
 	tgt->ccb = (union ccb *) inot;
-	inot->ccb_h.status = CAM_MESSAGE_RECV|CAM_DEV_QFRZN;
+	inot->ccb_h.status = CAM_MESSAGE_RECV;
 	xpt_done((union ccb *)inot);
 }
 
@@ -5000,7 +4852,6 @@
 	tgt_resource_t *trtp = NULL;
 	U8 *lunptr;
 	U8 *vbuf;
-	U16 itag;
 	U16 ioindex;
 	mpt_task_mgmt_t fct = MPT_NIL_TMT_VALUE;
 	uint8_t *cdbp;
@@ -5010,6 +4861,12 @@
 	 */
 	vbuf = req->req_vbuf;
 	vbuf += MPT_RQSL(mpt);
+	if (mpt->verbose >= MPT_PRT_DEBUG) {
+		mpt_dump_data(mpt, "mpt_scsi_tgt_atio response", vbuf,
+		    max(sizeof (MPI_TARGET_FCP_CMD_BUFFER),
+		    max(sizeof (MPI_TARGET_SSP_CMD_BUFFER),
+		    sizeof (MPI_TARGET_SCSI_SPI_CMD_BUFFER))));
+	}
 
 	/*
 	 * Get our state pointer set up.
@@ -5023,12 +4880,16 @@
 	tgt->state = TGT_STATE_IN_CAM;
 	tgt->reply_desc = reply_desc;
 	ioindex = GET_IO_INDEX(reply_desc);
-	if (mpt->verbose >= MPT_PRT_DEBUG) {
-		mpt_dump_data(mpt, "mpt_scsi_tgt_atio response", vbuf,
-		    max(sizeof (MPI_TARGET_FCP_CMD_BUFFER),
-		    max(sizeof (MPI_TARGET_SSP_CMD_BUFFER),
-		    sizeof (MPI_TARGET_SCSI_SPI_CMD_BUFFER))));
-	}
+
+	/*
+	 * The tag we construct here allows us to find the
+	 * original request that the command came in with.
+	 *
+	 * This way we don't have to depend on anything but the
+	 * tag to find things when CCBs show back up from CAM.
+	 */
+	tgt->tag_id = MPT_MAKE_TAGID(mpt, req, ioindex);
+
 	if (mpt->is_fc) {
 		PTR_MPI_TARGET_FCP_CMD_BUFFER fc;
 		fc = (PTR_MPI_TARGET_FCP_CMD_BUFFER) vbuf;
@@ -5037,6 +4898,9 @@
 			 * Task Management Request
 			 */
 			switch (fc->FcpCntl[2]) {
+			case 0x1:
+				fct = MPT_QUERY_TASK_SET;
+				break;
 			case 0x2:
 				fct = MPT_ABORT_TASK_SET;
 				break;
@@ -5043,6 +4907,12 @@
 			case 0x4:
 				fct = MPT_CLEAR_TASK_SET;
 				break;
+			case 0x8:
+				fct = MPT_QUERY_ASYNC_EVENT;
+				break;
+			case 0x10:
+				fct = MPT_LOGICAL_UNIT_RESET;
+				break;
 			case 0x20:
 				fct = MPT_TARGET_RESET;
 				break;
@@ -5049,14 +4919,11 @@
 			case 0x40:
 				fct = MPT_CLEAR_ACA;
 				break;
-			case 0x80:
-				fct = MPT_TERMINATE_TASK;
-				break;
 			default:
 				mpt_prt(mpt, "CORRUPTED TASK MGMT BITS: 0x%x\n",
 				    fc->FcpCntl[2]);
-				mpt_scsi_tgt_status(mpt, 0, req,
-				    SCSI_STATUS_OK, 0);
+				mpt_scsi_tgt_status(mpt, NULL, req,
+				    SCSI_STATUS_OK, NULL, 0);
 				return;
 			}
 		} else {
@@ -5081,36 +4948,22 @@
 		tgt->resid = be32toh(fc->FcpDl);
 		cdbp = fc->FcpCdb;
 		lunptr = fc->FcpLun;
-		itag = be16toh(fc->OptionalOxid);
+		tgt->itag = fc->OptionalOxid;
 	} else if (mpt->is_sas) {
 		PTR_MPI_TARGET_SSP_CMD_BUFFER ssp;
 		ssp = (PTR_MPI_TARGET_SSP_CMD_BUFFER) vbuf;
 		cdbp = ssp->CDB;
 		lunptr = ssp->LogicalUnitNumber;
-		itag = ssp->InitiatorTag;
+		tgt->itag = ssp->InitiatorTag;
 	} else {
 		PTR_MPI_TARGET_SCSI_SPI_CMD_BUFFER sp;
 		sp = (PTR_MPI_TARGET_SCSI_SPI_CMD_BUFFER) vbuf;
 		cdbp = sp->CDB;
 		lunptr = sp->LogicalUnitNumber;
-		itag = sp->Tag;
+		tgt->itag = sp->Tag;
 	}
 
-	/*
-	 * Generate a simple lun
-	 */
-	switch (lunptr[0] & 0xc0) {
-	case 0x40:
-		lun = ((lunptr[0] & 0x3f) << 8) | lunptr[1];
-		break;
-	case 0:
-		lun = lunptr[1];
-		break;
-	default:
-		mpt_lprt(mpt, MPT_PRT_ERROR, "cannot handle this type lun\n");
-		lun = 0xffff;
-		break;
-	}
+	lun = CAM_EXTLUN_BYTE_SWIZZLE(be64dec(lunptr));
 
 	/*
 	 * Deal with non-enabled or bad luns here.
@@ -5130,23 +4983,20 @@
 			 * REPORT LUNS gets illegal command.
 			 * All other commands get 'no such device'.
 			 */
-			uint8_t *sp, cond, buf[MPT_SENSE_SIZE];
+			uint8_t sense[MPT_SENSE_SIZE];
 			size_t len;
 
-			memset(buf, 0, MPT_SENSE_SIZE);
-			cond = SCSI_STATUS_CHECK_COND;
-			buf[0] = 0xf0;
-			buf[2] = 0x5;
-			buf[7] = 0x8;
-			sp = buf;
-			tgt->tag_id = MPT_MAKE_TAGID(mpt, req, ioindex);
+			memset(sense, 0, sizeof(sense));
+			sense[0] = 0xf0;
+			sense[2] = 0x5;
+			sense[7] = 0x8;
 
 			switch (cdbp[0]) {
 			case INQUIRY:
 			{
 				if (cdbp[1] != 0) {
-					buf[12] = 0x26;
-					buf[13] = 0x01;
+					sense[12] = 0x26;
+					sense[13] = 0x01;
 					break;
 				}
 				len = min(tgt->resid, cdbp[4]);
@@ -5159,27 +5009,28 @@
 			}
 			case REQUEST_SENSE:
 			{
-				buf[2] = 0x0;
+				sense[2] = 0x0;
 				len = min(tgt->resid, cdbp[4]);
-				len = min(len, sizeof (buf));
+				len = min(len, sizeof (sense));
 				mpt_lprt(mpt, MPT_PRT_DEBUG,
 				    "local reqsense %ld bytes\n", (long) len);
 				mpt_scsi_tgt_local(mpt, req, lun, 1,
-				    buf, len);
+				    sense, len);
 				return;
 			}
 			case REPORT_LUNS:
 				mpt_lprt(mpt, MPT_PRT_DEBUG, "REPORT LUNS\n");
-				buf[12] = 0x26;
+				sense[12] = 0x26;
 				return;
 			default:
 				mpt_lprt(mpt, MPT_PRT_DEBUG,
 				    "CMD 0x%x to unmanaged lun %u\n",
 				    cdbp[0], lun);
-				buf[12] = 0x25;
+				sense[12] = 0x25;
 				break;
 			}
-			mpt_scsi_tgt_status(mpt, NULL, req, cond, sp);
+			mpt_scsi_tgt_status(mpt, NULL, req,
+			    SCSI_STATUS_CHECK_COND, sense, sizeof(sense));
 			return;
 		}
 		/* otherwise, leave trtp NULL */
@@ -5194,8 +5045,8 @@
 		if (trtp == NULL) {
 			mpt_prt(mpt, "task mgmt function %x but no listener\n",
 			    fct);
-			mpt_scsi_tgt_status(mpt, 0, req,
-			    SCSI_STATUS_OK, 0);
+			mpt_scsi_tgt_status(mpt, NULL, req,
+			    SCSI_STATUS_OK, NULL, 0);
 		} else {
 			mpt_scsi_tgt_tsk_mgmt(mpt, req, fct, trtp,
 			    GET_INITIATOR_INDEX(reply_desc));
@@ -5211,7 +5062,7 @@
 		    mpt->tenabled? "QUEUE FULL" : "BUSY");
 		mpt_scsi_tgt_status(mpt, NULL, req,
 		    mpt->tenabled? SCSI_STATUS_QUEUE_FULL : SCSI_STATUS_BUSY,
-		    NULL);
+		    NULL, 0);
 		return;
 	}
 	STAILQ_REMOVE_HEAD(&trtp->atios, sim_links.stqe);
@@ -5221,22 +5072,13 @@
 	atiop->ccb_h.status = CAM_CDB_RECVD;
 	atiop->ccb_h.target_lun = lun;
 	atiop->sense_len = 0;
+	atiop->tag_id = tgt->tag_id;
 	atiop->init_id = GET_INITIATOR_INDEX(reply_desc);
-	atiop->cdb_len = mpt_cdblen(cdbp[0], 16);
+	atiop->cdb_len = 16;
 	memcpy(atiop->cdb_io.cdb_bytes, cdbp, atiop->cdb_len);
-
-	/*
-	 * The tag we construct here allows us to find the
-	 * original request that the command came in with.
-	 *
-	 * This way we don't have to depend on anything but the
-	 * tag to find things when CCBs show back up from CAM.
-	 */
-	atiop->tag_id = MPT_MAKE_TAGID(mpt, req, ioindex);
-	tgt->tag_id = atiop->tag_id;
 	if (tag_action) {
 		atiop->tag_action = tag_action;
-		atiop->ccb_h.flags = CAM_TAG_ACTION_VALID;
+		atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID;
 	}
 	if (mpt->verbose >= MPT_PRT_DEBUG) {
 		int i;
@@ -5247,7 +5089,7 @@
 			    (i == (atiop->cdb_len - 1))? '>' : ' ');
 		}
 		mpt_prtc(mpt, " itag %x tag %x rdesc %x dl=%u\n",
-	    	    itag, atiop->tag_id, tgt->reply_desc, tgt->resid);
+		    tgt->itag, tgt->tag_id, tgt->reply_desc, tgt->resid);
 	}
 	
 	xpt_done((union ccb *)atiop);
@@ -5259,9 +5101,9 @@
 	mpt_tgt_state_t *tgt = MPT_TGT_STATE(mpt, req);
 
 	mpt_prt(mpt, "req %p:%u tgt:rdesc 0x%x resid %u xfrd %u ccb %p treq %p "
-	    "nx %d tag 0x%08x state=%d\n", req, req->serno, tgt->reply_desc,
-	    tgt->resid, tgt->bytes_xfered, tgt->ccb, tgt->req, tgt->nxfers,
-	    tgt->tag_id, tgt->state);
+	    "nx %d tag 0x%08x itag 0x%04x state=%d\n", req, req->serno,
+	    tgt->reply_desc, tgt->resid, tgt->bytes_xfered, tgt->ccb,
+	    tgt->req, tgt->nxfers, tgt->tag_id, tgt->itag, tgt->state);
 }
 
 static void
@@ -5303,8 +5145,6 @@
 			break;
 		case TGT_STATE_MOVING_DATA:
 		{
-			uint8_t *sp = NULL, sense[MPT_SENSE_SIZE];
-
 			ccb = tgt->ccb;
 			if (tgt->req == NULL) {
 				panic("mpt: turbo target reply with null "
@@ -5324,12 +5164,12 @@
 				mpt_free_request(mpt, tgt->req);
 				tgt->req = NULL;
 				mpt_scsi_tgt_status(mpt, NULL, req,
-				    0, NULL);
+				    0, NULL, 0);
 				return (TRUE);
 			}
 			tgt->ccb = NULL;
 			tgt->nxfers++;
-			mpt_req_untimeout(req, mpt_timeout, ccb);
+			mpt_req_untimeout(tgt->req, mpt_timeout, ccb);
 			mpt_lprt(mpt, MPT_PRT_DEBUG,
 			    "TARGET_ASSIST %p (req %p:%u) done tag 0x%x\n",
 			    ccb, tgt->req, tgt->req->serno, ccb->csio.tag_id);
@@ -5365,13 +5205,11 @@
 			/*
 			 * Otherwise, send status (and sense)
 			 */
-			if (ccb->ccb_h.flags & CAM_SEND_SENSE) {
-				sp = sense;
-				memcpy(sp, &ccb->csio.sense_data,
-				   min(ccb->csio.sense_len, MPT_SENSE_SIZE));
-			}
 			mpt_scsi_tgt_status(mpt, ccb, req,
-			    ccb->csio.scsi_status, sp);
+			    ccb->csio.scsi_status,
+			    (void *)&ccb->csio.sense_data,
+			    (ccb->ccb_h.flags & CAM_SEND_SENSE) ?
+			     ccb->csio.sense_len : 0);
 			break;
 		}
 		case TGT_STATE_SENDING_STATUS:
@@ -5392,7 +5230,7 @@
 				    TGT_STATE_MOVING_DATA_AND_STATUS) {
 					tgt->nxfers++;
 				}
-				mpt_req_untimeout(req, mpt_timeout, ccb);
+				mpt_req_untimeout(tgt->req, mpt_timeout, ccb);
 				if (ccb->ccb_h.flags & CAM_SEND_SENSE) {
 					ccb->ccb_h.status |= CAM_SENT_SENSE;
 				}
@@ -5416,7 +5254,7 @@
 				tgt->ccb = NULL;
 			} else {
 				mpt_lprt(mpt, MPT_PRT_DEBUG,
-				    "TARGET_STATUS non-CAM for  req %p:%u\n",
+				    "TARGET_STATUS non-CAM for req %p:%u\n",
 				    tgt->req, tgt->req->serno);
 			}
 			TAILQ_REMOVE(&mpt->request_pending_list,

Modified: trunk/sys/dev/mpt/mpt_cam.h
===================================================================
--- trunk/sys/dev/mpt/mpt_cam.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt_cam.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpt_cam.h 203108 2010-01-28 08:41:30Z mav $ */
 /*-
  * LSI MPT Host Adapter FreeBSD Wrapper Definitions (CAM version)
  *

Modified: trunk/sys/dev/mpt/mpt_debug.c
===================================================================
--- trunk/sys/dev/mpt/mpt_debug.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt_debug.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Debug routines for LSI '909 FC  adapters.
  * FreeBSD Version.
@@ -64,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mpt/mpt_debug.c 315826 2017-03-23 06:52:29Z mav $");
 
 #include <dev/mpt/mpt.h>
 
@@ -536,7 +537,7 @@
 	printf("\tBus:                %d\n", msg->Bus);
 	printf("\tTargetID            %d\n", msg->TargetID);
 	printf("\tSenseBufferLength   %d\n", msg->SenseBufferLength);
-	printf("\tLUN:              0x%0x\n", msg->LUN[1]);
+	printf("\tLUN:              0x%jx\n", (uintmax_t)be64dec(msg->LUN));
 	printf("\tControl           0x%08x ", msg->Control);
 #define MPI_PRINT_FIELD(x)						\
 	case MPI_SCSIIO_CONTROL_ ## x :					\
@@ -585,7 +586,7 @@
 {
 
 	mpt_print_request_hdr((MSG_REQUEST_HEADER *)msg);
-	printf("\tLun             0x%02x\n", msg->LUN[1]);
+	printf("\tLun             0x%jx\n", (uintmax_t)be64dec(msg->LUN));
 	printf("\tTaskType        %s\n", mpt_scsi_tm_type(msg->TaskType));
 	printf("\tTaskMsgContext  0x%08x\n", msg->TaskMsgContext);
 }
@@ -600,7 +601,7 @@
 	printf("\tTargetAssist  0x%02x\n", msg->TargetAssistFlags);
 	printf("\tQueueTag      0x%04x\n", msg->QueueTag);
 	printf("\tReplyWord     0x%08x\n", msg->ReplyWord);
-	printf("\tLun           0x%02x\n", msg->LUN[1]);
+	printf("\tLun           0x%jx\n", (uintmax_t)be64dec(msg->LUN));
 	printf("\tRelativeOff   0x%08x\n", msg->RelativeOffset);
 	printf("\tDataLength    0x%08x\n", msg->DataLength);
 	mpt_dump_sgl(msg->SGL, 0);
@@ -616,7 +617,7 @@
 	printf("\tStatusFlags   0x%02x\n", msg->StatusFlags);
 	printf("\tQueueTag      0x%04x\n", msg->QueueTag);
 	printf("\tReplyWord     0x%08x\n", msg->ReplyWord);
-	printf("\tLun           0x%02x\n", msg->LUN[1]);
+	printf("\tLun           0x%jx\n", (uintmax_t)be64dec(msg->LUN));
 	x.u.Simple = msg->StatusDataSGE;
 	mpt_dump_sgl(&x, 0);
 }

Modified: trunk/sys/dev/mpt/mpt_pci.c
===================================================================
--- trunk/sys/dev/mpt/mpt_pci.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt_pci.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * PCI specific probe and attach routines for LSI Fusion Adapters
  * FreeBSD Version.
@@ -99,20 +100,12 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mpt/mpt_pci.c 315809 2017-03-23 06:34:45Z mav $");
 
 #include <dev/mpt/mpt.h>
 #include <dev/mpt/mpt_cam.h>
 #include <dev/mpt/mpt_raid.h>
 
-#if __FreeBSD_version < 700000
-#define	pci_msix_count(x)	0
-#define	pci_msi_count(x)	0
-#define	pci_alloc_msi(x, y)	1
-#define	pci_alloc_msix(x, y)	1
-#define	pci_release_msi(x)	do { ; } while (0)
-#endif
-
 /*
  * XXX it seems no other MPT driver knows about the following chips.
  */
@@ -149,10 +142,6 @@
 #define	MPI_MANUFACTPAGE_DEVID_SAS1078DE_FB	0x007C
 #endif
 
-#ifndef	PCIM_CMD_SERRESPEN
-#define	PCIM_CMD_SERRESPEN	0x0100
-#endif
-
 static int mpt_pci_probe(device_t);
 static int mpt_pci_attach(device_t);
 static void mpt_free_bus_resources(struct mpt_softc *mpt);
@@ -178,6 +167,7 @@
 static driver_t mpt_driver = {
 	"mpt", mpt_methods, sizeof(struct mpt_softc)
 };
+
 static devclass_t mpt_devclass;
 DRIVER_MODULE(mpt, pci, mpt_driver, mpt_devclass, NULL, NULL);
 MODULE_DEPEND(mpt, pci, 1, 1, 1);
@@ -268,11 +258,6 @@
 
 	tval = 0;
 	if (resource_int_value(device_get_name(mpt->dev),
-	    device_get_unit(mpt->dev), "disable", &tval) == 0 && tval != 0) {
-		mpt->disabled = 1;
-	}
-	tval = 0;
-	if (resource_int_value(device_get_name(mpt->dev),
 	    device_get_unit(mpt->dev), "debug", &tval) == 0 && tval != 0) {
 		mpt->verbose = tval;
 	}
@@ -293,6 +278,7 @@
 	}
 }
 
+#if 0
 static void
 mpt_link_peer(struct mpt_softc *mpt)
 {
@@ -331,6 +317,7 @@
 		mpt->mpt2->mpt2 = NULL;
 	}
 }
+#endif
 
 static int
 mpt_pci_attach(device_t dev)
@@ -337,7 +324,7 @@
 {
 	struct mpt_softc *mpt;
 	int		  iqd;
-	uint32_t	  data, cmd;
+	uint32_t	  val;
 	int		  mpt_io_bar, mpt_mem_bar;
 
 	mpt  = (struct mpt_softc*)device_get_softc(dev);
@@ -394,28 +381,23 @@
 		/* Print INFO level (if any) if bootverbose is set */
 		mpt->verbose += (bootverbose != 0)? 1 : 0;
 	}
-	/* Make sure memory access decoders are enabled */
-	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
-	if ((cmd & PCIM_CMD_MEMEN) == 0) {
-		device_printf(dev, "Memory accesses disabled");
-		return (ENXIO);
-	}
 
 	/*
 	 * Make sure that SERR, PERR, WRITE INVALIDATE and BUSMASTER are set.
 	 */
-	cmd |=
-	    PCIM_CMD_SERRESPEN | PCIM_CMD_PERRESPEN |
+	val = pci_read_config(dev, PCIR_COMMAND, 2);
+	val |= PCIM_CMD_SERRESPEN | PCIM_CMD_PERRESPEN |
 	    PCIM_CMD_BUSMASTEREN | PCIM_CMD_MWRICEN;
-	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
+	pci_write_config(dev, PCIR_COMMAND, val, 2);
 
 	/*
 	 * Make sure we've disabled the ROM.
 	 */
-	data = pci_read_config(dev, PCIR_BIOS, 4);
-	data &= ~PCIM_BIOS_ENABLE;
-	pci_write_config(dev, PCIR_BIOS, data, 4);
+	val = pci_read_config(dev, PCIR_BIOS, 4);
+	val &= ~PCIM_BIOS_ENABLE;
+	pci_write_config(dev, PCIR_BIOS, val, 4);
 
+#if 0
 	/*
 	 * Is this part a dual?
 	 * If so, link with our partner (around yet)
@@ -432,12 +414,13 @@
 	default:
 		break;
 	}
+#endif
 
 	/*
 	 * Figure out which are the I/O and MEM Bars
 	 */
-	data = pci_read_config(dev, PCIR_BAR(0), 4);
-	if (PCI_BAR_IO(data)) {
+	val = pci_read_config(dev, PCIR_BAR(0), 4);
+	if (PCI_BAR_IO(val)) {
 		/* BAR0 is IO, BAR1 is memory */
 		mpt_io_bar = 0;
 		mpt_mem_bar = 1;
@@ -494,25 +477,15 @@
 		 * First try to alloc an MSI-X message.  If that
 		 * fails, then try to alloc an MSI message instead.
 		 */
-		if (pci_msix_count(dev) == 1) {
-			mpt->pci_msi_count = 1;
-			if (pci_alloc_msix(dev, &mpt->pci_msi_count) == 0) {
-				iqd = 1;
-			} else {
-				mpt->pci_msi_count = 0;
-			}
-		}
-		if (iqd == 0 && pci_msi_count(dev) == 1) {
-			mpt->pci_msi_count = 1;
-			if (pci_alloc_msi(dev, &mpt->pci_msi_count) == 0) {
-				iqd = 1;
-			} else {
-				mpt->pci_msi_count = 0;
-			}
-		}
+		val = 1;
+		if (pci_alloc_msix(dev, &val) == 0)
+			iqd = 1;
+		val = 1;
+		if (iqd == 0 && pci_alloc_msi(dev, &val) == 0)
+			iqd = 1;
 	}
 	mpt->pci_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &iqd,
-	    RF_ACTIVE | (mpt->pci_msi_count ? 0 : RF_SHAREABLE));
+	    RF_ACTIVE | (iqd != 0 ? 0 : RF_SHAREABLE));
 	if (mpt->pci_irq == NULL) {
 		device_printf(dev, "could not allocate interrupt\n");
 		goto bad;
@@ -524,7 +497,7 @@
 	mpt_disable_ints(mpt);
 
 	/* Register the interrupt handler */
-	if (mpt_setup_intr(dev, mpt->pci_irq, MPT_IFLAGS, NULL, mpt_pci_intr,
+	if (bus_setup_intr(dev, mpt->pci_irq, MPT_IFLAGS, NULL, mpt_pci_intr,
 	    mpt, &mpt->ih)) {
 		device_printf(dev, "could not setup interrupt\n");
 		goto bad;
@@ -572,7 +545,10 @@
 
 	if (mpt->eh == NULL) {
 		mpt_prt(mpt, "shutdown event registration failed\n");
+		mpt_disable_ints(mpt);
 		(void) mpt_detach(mpt);
+		mpt_reset(mpt, /*reinit*/FALSE);
+		mpt_raid_free_mem(mpt);
 		goto bad;
 	}
 	return (0);
@@ -580,7 +556,9 @@
 bad:
 	mpt_dma_mem_free(mpt);
 	mpt_free_bus_resources(mpt);
+#if 0
 	mpt_unlink_peer(mpt);
+#endif
 
 	MPT_LOCK_DESTROY(mpt);
 
@@ -605,25 +583,21 @@
 	if (mpt->pci_irq) {
 		bus_release_resource(mpt->dev, SYS_RES_IRQ,
 		    rman_get_rid(mpt->pci_irq), mpt->pci_irq);
+		pci_release_msi(mpt->dev);
 		mpt->pci_irq = NULL;
 	}
 
-	if (mpt->pci_msi_count) {
-		pci_release_msi(mpt->dev);
-		mpt->pci_msi_count = 0;
-	}
-		
 	if (mpt->pci_pio_reg) {
 		bus_release_resource(mpt->dev, SYS_RES_IOPORT,
 		    rman_get_rid(mpt->pci_pio_reg), mpt->pci_pio_reg);
 		mpt->pci_pio_reg = NULL;
 	}
+
 	if (mpt->pci_reg) {
 		bus_release_resource(mpt->dev, SYS_RES_MEMORY,
 		    rman_get_rid(mpt->pci_reg), mpt->pci_reg);
 		mpt->pci_reg = NULL;
 	}
-	MPT_LOCK_DESTROY(mpt);
 }
 
 /*
@@ -640,12 +614,16 @@
 		mpt_disable_ints(mpt);
 		mpt_detach(mpt);
 		mpt_reset(mpt, /*reinit*/FALSE);
+		mpt_raid_free_mem(mpt);
 		mpt_dma_mem_free(mpt);
 		mpt_free_bus_resources(mpt);
-		mpt_raid_free_mem(mpt);
+#if 0
+		mpt_unlink_peer(mpt);
+#endif
 		if (mpt->eh != NULL) {
                         EVENTHANDLER_DEREGISTER(shutdown_post_sync, mpt->eh);
 		}
+		MPT_LOCK_DESTROY(mpt);
 	}
 	return(0);
 }
@@ -659,11 +637,8 @@
 	struct mpt_softc *mpt;
 
 	mpt = (struct mpt_softc *)device_get_softc(dev);
-	if (mpt) {
-		int r;
-		r = mpt_shutdown(mpt);
-		return (r);
-	}
+	if (mpt)
+		return (mpt_shutdown(mpt));
 	return(0);
 }
 
@@ -679,20 +654,7 @@
 	}
 
 	len = sizeof (request_t) * MPT_MAX_REQUESTS(mpt);
-#ifdef	RELENG_4
-	mpt->request_pool = (request_t *)malloc(len, M_DEVBUF, M_WAITOK);
-	if (mpt->request_pool == NULL) {
-		mpt_prt(mpt, "cannot allocate request pool\n");
-		return (1);
-	}
-	memset(mpt->request_pool, 0, len);
-#else
 	mpt->request_pool = (request_t *)malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
-	if (mpt->request_pool == NULL) {
-		mpt_prt(mpt, "cannot allocate request pool\n");
-		return (1);
-	}
-#endif
 
 	/*
 	 * Create a parent dma tag for this device.

Modified: trunk/sys/dev/mpt/mpt_raid.c
===================================================================
--- trunk/sys/dev/mpt/mpt_raid.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt_raid.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Routines for handling the integrated RAID features LSI MPT Fusion adapters.
  *
@@ -41,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mpt/mpt_raid.c 264949 2014-04-25 22:01:02Z marius $");
 
 #include <dev/mpt/mpt.h>
 #include <dev/mpt/mpt_raid.h>
@@ -605,7 +606,7 @@
 	MPI_pSGE_SET_FLAGS(se, (MPI_SGE_FLAGS_SIMPLE_ELEMENT |
 	    MPI_SGE_FLAGS_LAST_ELEMENT | MPI_SGE_FLAGS_END_OF_BUFFER |
 	    MPI_SGE_FLAGS_END_OF_LIST |
-	    write ? MPI_SGE_FLAGS_HOST_TO_IOC : MPI_SGE_FLAGS_IOC_TO_HOST));
+	    (write ? MPI_SGE_FLAGS_HOST_TO_IOC : MPI_SGE_FLAGS_IOC_TO_HOST)));
 	se->FlagsLength = htole32(se->FlagsLength);
 	rap->MsgContext = htole32(req->index | raid_handler_id);
 
@@ -636,7 +637,7 @@
 	MPT_LOCK(mpt);
 	xpt_freeze_simq(mpt->phydisk_sim, 1);
 	MPT_UNLOCK(mpt);
-	error = mpt_kthread_create(mpt_raid_thread, mpt,
+	error = kproc_create(mpt_raid_thread, mpt,
 	    &mpt->raid_thread, /*flags*/0, /*altstack*/0,
 	    "mpt_raid%d", mpt->unit);
 	if (error != 0) {
@@ -705,7 +706,7 @@
 			ccb = xpt_alloc_ccb();
 
 			MPT_LOCK(mpt);
-			error = xpt_create_path(&ccb->ccb_h.path, xpt_periph,
+			error = xpt_create_path(&ccb->ccb_h.path, NULL,
 			    cam_sim_path(mpt->phydisk_sim),
 			    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD);
 			if (error != CAM_REQ_CMP) {
@@ -719,7 +720,7 @@
 	mpt->raid_thread = NULL;
 	wakeup(&mpt->raid_thread);
 	MPT_UNLOCK(mpt);
-	mpt_kthread_exit(0);
+	kproc_exit(0);
 }
 
 #if 0
@@ -1662,7 +1663,7 @@
 
 		mpt->raid_rescan = 0;
 
-		error = xpt_create_path(&path, xpt_periph,
+		error = xpt_create_path(&path, NULL,
 					cam_sim_path(mpt->sim),
 					mpt_vol->config_page->VolumeID,
 					/*lun*/0);

Modified: trunk/sys/dev/mpt/mpt_raid.h
===================================================================
--- trunk/sys/dev/mpt/mpt_raid.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt_raid.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpt_raid.h 224494 2011-07-29 18:38:31Z marius $ */
 /*-
  * Definitions for the integrated RAID features LSI MPT Fusion adapters.
  *

Modified: trunk/sys/dev/mpt/mpt_reg.h
===================================================================
--- trunk/sys/dev/mpt/mpt_reg.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt_reg.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,4 +1,5 @@
 /* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/dev/mpt/mpt_reg.h 231518 2012-02-11 12:03:44Z marius $ */
 /*-
  * Generic defines for LSI '909 FC  adapters.
  * FreeBSD Version.

Modified: trunk/sys/dev/mpt/mpt_user.c
===================================================================
--- trunk/sys/dev/mpt/mpt_user.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mpt/mpt_user.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008 Yahoo!, Inc.
  * All rights reserved.
@@ -31,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mpt/mpt_user.c 251187 2013-05-31 17:27:44Z delphij $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -548,8 +549,8 @@
 		MPI_pSGE_SET_FLAGS(se, (MPI_SGE_FLAGS_SIMPLE_ELEMENT |
 		    MPI_SGE_FLAGS_LAST_ELEMENT | MPI_SGE_FLAGS_END_OF_BUFFER |
 		    MPI_SGE_FLAGS_END_OF_LIST |
-		    raid_act->write ? MPI_SGE_FLAGS_HOST_TO_IOC :
-		    MPI_SGE_FLAGS_IOC_TO_HOST));
+		    (raid_act->write ? MPI_SGE_FLAGS_HOST_TO_IOC :
+		    MPI_SGE_FLAGS_IOC_TO_HOST)));
 	}
 	se->FlagsLength = htole32(se->FlagsLength);
 	rap->MsgContext = htole32(req->index | user_handler_id);

Added: trunk/sys/dev/mrsas/mrsas.c
===================================================================
--- trunk/sys/dev/mrsas/mrsas.c	                        (rev 0)
+++ trunk/sys/dev/mrsas/mrsas.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,4601 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2015, AVAGO Tech. All rights reserved. Author: Marian Choy
+ * Copyright (c) 2014, LSI Corp. All rights reserved. Author: Marian Choy
+ * Support: freebsdraid at avagotech.com
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer. 2. Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution. 3. Neither the name of the
+ * <ORGANIZATION> nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing
+ * official policies,either expressed or implied, of the FreeBSD Project.
+ *
+ * Send feedback to: <megaraidfbsd at avagotech.com> Mail to: AVAGO TECHNOLOGIES 1621
+ * Barber Lane, Milpitas, CA 95035 ATTN: MegaRaid FreeBSD
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/mrsas/mrsas.c 310264 2016-12-19 13:14:39Z kadesai $");
+
+#include <dev/mrsas/mrsas.h>
+#include <dev/mrsas/mrsas_ioctl.h>
+
+#include <cam/cam.h>
+#include <cam/cam_ccb.h>
+
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#include <sys/sysent.h>
+#include <sys/kthread.h>
+#include <sys/taskqueue.h>
+#include <sys/smp.h>
+
+
+/*
+ * Function prototypes
+ */
+static d_open_t mrsas_open;
+static d_close_t mrsas_close;
+static d_read_t mrsas_read;
+static d_write_t mrsas_write;
+static d_ioctl_t mrsas_ioctl;
+static d_poll_t mrsas_poll;
+
+static void mrsas_ich_startup(void *arg);
+static struct mrsas_mgmt_info mrsas_mgmt_info;
+static struct mrsas_ident *mrsas_find_ident(device_t);
+static int mrsas_setup_msix(struct mrsas_softc *sc);
+static int mrsas_allocate_msix(struct mrsas_softc *sc);
+static void mrsas_shutdown_ctlr(struct mrsas_softc *sc, u_int32_t opcode);
+static void mrsas_flush_cache(struct mrsas_softc *sc);
+static void mrsas_reset_reply_desc(struct mrsas_softc *sc);
+static void mrsas_ocr_thread(void *arg);
+static int mrsas_get_map_info(struct mrsas_softc *sc);
+static int mrsas_get_ld_map_info(struct mrsas_softc *sc);
+static int mrsas_sync_map_info(struct mrsas_softc *sc);
+static int mrsas_get_pd_list(struct mrsas_softc *sc);
+static int mrsas_get_ld_list(struct mrsas_softc *sc);
+static int mrsas_setup_irq(struct mrsas_softc *sc);
+static int mrsas_alloc_mem(struct mrsas_softc *sc);
+static int mrsas_init_fw(struct mrsas_softc *sc);
+static int mrsas_setup_raidmap(struct mrsas_softc *sc);
+static void megasas_setup_jbod_map(struct mrsas_softc *sc);
+static int megasas_sync_pd_seq_num(struct mrsas_softc *sc, boolean_t pend);
+static int mrsas_clear_intr(struct mrsas_softc *sc);
+static int mrsas_get_ctrl_info(struct mrsas_softc *sc);
+static void mrsas_update_ext_vd_details(struct mrsas_softc *sc);
+static int
+mrsas_issue_blocked_abort_cmd(struct mrsas_softc *sc,
+    struct mrsas_mfi_cmd *cmd_to_abort);
+static struct mrsas_softc *
+mrsas_get_softc_instance(struct cdev *dev,
+    u_long cmd, caddr_t arg);
+u_int32_t mrsas_read_reg(struct mrsas_softc *sc, int offset);
+u_int8_t
+mrsas_build_mptmfi_passthru(struct mrsas_softc *sc,
+    struct mrsas_mfi_cmd *mfi_cmd);
+void	mrsas_complete_outstanding_ioctls(struct mrsas_softc *sc);
+int	mrsas_transition_to_ready(struct mrsas_softc *sc, int ocr);
+int	mrsas_init_adapter(struct mrsas_softc *sc);
+int	mrsas_alloc_mpt_cmds(struct mrsas_softc *sc);
+int	mrsas_alloc_ioc_cmd(struct mrsas_softc *sc);
+int	mrsas_alloc_ctlr_info_cmd(struct mrsas_softc *sc);
+int	mrsas_ioc_init(struct mrsas_softc *sc);
+int	mrsas_bus_scan(struct mrsas_softc *sc);
+int	mrsas_issue_dcmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd);
+int	mrsas_issue_polled(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd);
+int	mrsas_reset_ctrl(struct mrsas_softc *sc, u_int8_t reset_reason);
+int	mrsas_wait_for_outstanding(struct mrsas_softc *sc, u_int8_t check_reason);
+int mrsas_complete_cmd(struct mrsas_softc *sc, u_int32_t MSIxIndex);
+int mrsas_reset_targets(struct mrsas_softc *sc);
+int
+mrsas_issue_blocked_cmd(struct mrsas_softc *sc,
+    struct mrsas_mfi_cmd *cmd);
+int
+mrsas_alloc_tmp_dcmd(struct mrsas_softc *sc, struct mrsas_tmp_dcmd *tcmd,
+    int size);
+void	mrsas_release_mfi_cmd(struct mrsas_mfi_cmd *cmd);
+void	mrsas_wakeup(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd);
+void	mrsas_complete_aen(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd);
+void	mrsas_complete_abort(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd);
+void	mrsas_disable_intr(struct mrsas_softc *sc);
+void	mrsas_enable_intr(struct mrsas_softc *sc);
+void	mrsas_free_ioc_cmd(struct mrsas_softc *sc);
+void	mrsas_free_mem(struct mrsas_softc *sc);
+void	mrsas_free_tmp_dcmd(struct mrsas_tmp_dcmd *tmp);
+void	mrsas_isr(void *arg);
+void	mrsas_teardown_intr(struct mrsas_softc *sc);
+void	mrsas_addr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error);
+void	mrsas_kill_hba(struct mrsas_softc *sc);
+void	mrsas_aen_handler(struct mrsas_softc *sc);
+void
+mrsas_write_reg(struct mrsas_softc *sc, int offset,
+    u_int32_t value);
+void
+mrsas_fire_cmd(struct mrsas_softc *sc, u_int32_t req_desc_lo,
+    u_int32_t req_desc_hi);
+void	mrsas_free_ctlr_info_cmd(struct mrsas_softc *sc);
+void
+mrsas_complete_mptmfi_passthru(struct mrsas_softc *sc,
+    struct mrsas_mfi_cmd *cmd, u_int8_t status);
+void
+mrsas_map_mpt_cmd_status(struct mrsas_mpt_cmd *cmd, u_int8_t status,
+    u_int8_t extStatus);
+struct mrsas_mfi_cmd *mrsas_get_mfi_cmd(struct mrsas_softc *sc);
+
+MRSAS_REQUEST_DESCRIPTOR_UNION *mrsas_build_mpt_cmd
+        (struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd);
+
+extern int mrsas_cam_attach(struct mrsas_softc *sc);
+extern void mrsas_cam_detach(struct mrsas_softc *sc);
+extern void mrsas_cmd_done(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd);
+extern void mrsas_free_frame(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd);
+extern int mrsas_alloc_mfi_cmds(struct mrsas_softc *sc);
+extern struct mrsas_mpt_cmd *mrsas_get_mpt_cmd(struct mrsas_softc *sc);
+extern int mrsas_passthru(struct mrsas_softc *sc, void *arg, u_long ioctlCmd);
+extern uint8_t MR_ValidateMapInfo(struct mrsas_softc *sc);
+extern u_int16_t MR_GetLDTgtId(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map);
+extern MR_LD_RAID *MR_LdRaidGet(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map);
+extern void mrsas_xpt_freeze(struct mrsas_softc *sc);
+extern void mrsas_xpt_release(struct mrsas_softc *sc);
+extern MRSAS_REQUEST_DESCRIPTOR_UNION *
+mrsas_get_request_desc(struct mrsas_softc *sc,
+    u_int16_t index);
+extern int mrsas_bus_scan_sim(struct mrsas_softc *sc, struct cam_sim *sim);
+static int mrsas_alloc_evt_log_info_cmd(struct mrsas_softc *sc);
+static void mrsas_free_evt_log_info_cmd(struct mrsas_softc *sc);
+
+SYSCTL_NODE(_hw, OID_AUTO, mrsas, CTLFLAG_RD, 0, "MRSAS Driver Parameters");
+
+/*
+ * PCI device struct and table
+ *
+ */
+typedef struct mrsas_ident {
+	uint16_t vendor;
+	uint16_t device;
+	uint16_t subvendor;
+	uint16_t subdevice;
+	const char *desc;
+}	MRSAS_CTLR_ID;
+
+MRSAS_CTLR_ID device_table[] = {
+	{0x1000, MRSAS_TBOLT, 0xffff, 0xffff, "AVAGO Thunderbolt SAS Controller"},
+	{0x1000, MRSAS_INVADER, 0xffff, 0xffff, "AVAGO Invader SAS Controller"},
+	{0x1000, MRSAS_FURY, 0xffff, 0xffff, "AVAGO Fury SAS Controller"},
+	{0x1000, MRSAS_INTRUDER, 0xffff, 0xffff, "AVAGO Intruder SAS Controller"},
+	{0x1000, MRSAS_INTRUDER_24, 0xffff, 0xffff, "AVAGO Intruder_24 SAS Controller"},
+	{0x1000, MRSAS_CUTLASS_52, 0xffff, 0xffff, "AVAGO Cutlass_52 SAS Controller"},
+	{0x1000, MRSAS_CUTLASS_53, 0xffff, 0xffff, "AVAGO Cutlass_53 SAS Controller"},
+	{0, 0, 0, 0, NULL}
+};
+
+/*
+ * Character device entry points
+ *
+ */
+static struct cdevsw mrsas_cdevsw = {
+	.d_version = D_VERSION,
+	.d_open = mrsas_open,
+	.d_close = mrsas_close,
+	.d_read = mrsas_read,
+	.d_write = mrsas_write,
+	.d_ioctl = mrsas_ioctl,
+	.d_poll = mrsas_poll,
+	.d_name = "mrsas",
+};
+
+MALLOC_DEFINE(M_MRSAS, "mrsasbuf", "Buffers for the MRSAS driver");
+
+/*
+ * In the cdevsw routines, we find our softc by using the si_drv1 member of
+ * struct cdev.  We set this variable to point to our softc in our attach
+ * routine when we create the /dev entry.
+ */
+int
+mrsas_open(struct cdev *dev, int oflags, int devtype, d_thread_t *td)
+{
+	struct mrsas_softc *sc;
+
+	sc = dev->si_drv1;
+	return (0);
+}
+
+int
+mrsas_close(struct cdev *dev, int fflag, int devtype, d_thread_t *td)
+{
+	struct mrsas_softc *sc;
+
+	sc = dev->si_drv1;
+	return (0);
+}
+
+int
+mrsas_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	struct mrsas_softc *sc;
+
+	sc = dev->si_drv1;
+	return (0);
+}
+int
+mrsas_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	struct mrsas_softc *sc;
+
+	sc = dev->si_drv1;
+	return (0);
+}
+
+/*
+ * Register Read/Write Functions
+ *
+ */
+void
+mrsas_write_reg(struct mrsas_softc *sc, int offset,
+    u_int32_t value)
+{
+	bus_space_tag_t bus_tag = sc->bus_tag;
+	bus_space_handle_t bus_handle = sc->bus_handle;
+
+	bus_space_write_4(bus_tag, bus_handle, offset, value);
+}
+
+u_int32_t
+mrsas_read_reg(struct mrsas_softc *sc, int offset)
+{
+	bus_space_tag_t bus_tag = sc->bus_tag;
+	bus_space_handle_t bus_handle = sc->bus_handle;
+
+	return ((u_int32_t)bus_space_read_4(bus_tag, bus_handle, offset));
+}
+
+
+/*
+ * Interrupt Disable/Enable/Clear Functions
+ *
+ */
+void
+mrsas_disable_intr(struct mrsas_softc *sc)
+{
+	u_int32_t mask = 0xFFFFFFFF;
+	u_int32_t status;
+
+	sc->mask_interrupts = 1;
+	mrsas_write_reg(sc, offsetof(mrsas_reg_set, outbound_intr_mask), mask);
+	/* Dummy read to force pci flush */
+	status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_intr_mask));
+}
+
+void
+mrsas_enable_intr(struct mrsas_softc *sc)
+{
+	u_int32_t mask = MFI_FUSION_ENABLE_INTERRUPT_MASK;
+	u_int32_t status;
+
+	sc->mask_interrupts = 0;
+	mrsas_write_reg(sc, offsetof(mrsas_reg_set, outbound_intr_status), ~0);
+	status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_intr_status));
+
+	mrsas_write_reg(sc, offsetof(mrsas_reg_set, outbound_intr_mask), ~mask);
+	status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_intr_mask));
+}
+
+static int
+mrsas_clear_intr(struct mrsas_softc *sc)
+{
+	u_int32_t status;
+
+	/* Read received interrupt */
+	status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_intr_status));
+
+	/* Not our interrupt, so just return */
+	if (!(status & MFI_FUSION_ENABLE_INTERRUPT_MASK))
+		return (0);
+
+	/* We got a reply interrupt */
+	return (1);
+}
+
+/*
+ * PCI Support Functions
+ *
+ */
+static struct mrsas_ident *
+mrsas_find_ident(device_t dev)
+{
+	struct mrsas_ident *pci_device;
+
+	for (pci_device = device_table; pci_device->vendor != 0; pci_device++) {
+		if ((pci_device->vendor == pci_get_vendor(dev)) &&
+		    (pci_device->device == pci_get_device(dev)) &&
+		    ((pci_device->subvendor == pci_get_subvendor(dev)) ||
+		    (pci_device->subvendor == 0xffff)) &&
+		    ((pci_device->subdevice == pci_get_subdevice(dev)) ||
+		    (pci_device->subdevice == 0xffff)))
+			return (pci_device);
+	}
+	return (NULL);
+}
+
+static int
+mrsas_probe(device_t dev)
+{
+	static u_int8_t first_ctrl = 1;
+	struct mrsas_ident *id;
+
+	if ((id = mrsas_find_ident(dev)) != NULL) {
+		if (first_ctrl) {
+			printf("AVAGO MegaRAID SAS FreeBSD mrsas driver version: %s\n",
+			    MRSAS_VERSION);
+			first_ctrl = 0;
+		}
+		device_set_desc(dev, id->desc);
+		/* between BUS_PROBE_DEFAULT and BUS_PROBE_LOW_PRIORITY */
+		return (-30);
+	}
+	return (ENXIO);
+}
+
+/*
+ * mrsas_setup_sysctl:	setup sysctl values for mrsas
+ * input:				Adapter instance soft state
+ *
+ * Setup sysctl entries for mrsas driver.
+ */
+static void
+mrsas_setup_sysctl(struct mrsas_softc *sc)
+{
+	struct sysctl_ctx_list *sysctl_ctx = NULL;
+	struct sysctl_oid *sysctl_tree = NULL;
+	char tmpstr[80], tmpstr2[80];
+
+	/*
+	 * Setup the sysctl variable so the user can change the debug level
+	 * on the fly.
+	 */
+	snprintf(tmpstr, sizeof(tmpstr), "MRSAS controller %d",
+	    device_get_unit(sc->mrsas_dev));
+	snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mrsas_dev));
+
+	sysctl_ctx = device_get_sysctl_ctx(sc->mrsas_dev);
+	if (sysctl_ctx != NULL)
+		sysctl_tree = device_get_sysctl_tree(sc->mrsas_dev);
+
+	if (sysctl_tree == NULL) {
+		sysctl_ctx_init(&sc->sysctl_ctx);
+		sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
+		    SYSCTL_STATIC_CHILDREN(_hw_mrsas), OID_AUTO, tmpstr2,
+		    CTLFLAG_RD, 0, tmpstr);
+		if (sc->sysctl_tree == NULL)
+			return;
+		sysctl_ctx = &sc->sysctl_ctx;
+		sysctl_tree = sc->sysctl_tree;
+	}
+	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "disable_ocr", CTLFLAG_RW, &sc->disableOnlineCtrlReset, 0,
+	    "Disable the use of OCR");
+
+	SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "driver_version", CTLFLAG_RD, MRSAS_VERSION,
+	    strlen(MRSAS_VERSION), "driver version");
+
+	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "reset_count", CTLFLAG_RD,
+	    &sc->reset_count, 0, "number of ocr from start of the day");
+
+	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "fw_outstanding", CTLFLAG_RD,
+	    &sc->fw_outstanding.val_rdonly, 0, "FW outstanding commands");
+
+	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "io_cmds_highwater", CTLFLAG_RD,
+	    &sc->io_cmds_highwater, 0, "Max FW outstanding commands");
+
+	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "mrsas_debug", CTLFLAG_RW, &sc->mrsas_debug, 0,
+	    "Driver debug level");
+
+	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "mrsas_io_timeout", CTLFLAG_RW, &sc->mrsas_io_timeout,
+	    0, "Driver IO timeout value in mili-second.");
+
+	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "mrsas_fw_fault_check_delay", CTLFLAG_RW,
+	    &sc->mrsas_fw_fault_check_delay,
+	    0, "FW fault check thread delay in seconds. <default is 1 sec>");
+
+	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "reset_in_progress", CTLFLAG_RD,
+	    &sc->reset_in_progress, 0, "ocr in progress status");
+
+	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
+	    OID_AUTO, "block_sync_cache", CTLFLAG_RW,
+	    &sc->block_sync_cache, 0,
+	    "Block SYNC CACHE at driver. <default: 0, send it to FW>");
+
+}
+
+/*
+ * mrsas_get_tunables:	get tunable parameters.
+ * input:				Adapter instance soft state
+ *
+ * Get tunable parameters. This will help to debug driver at boot time.
+ */
+static void
+mrsas_get_tunables(struct mrsas_softc *sc)
+{
+	char tmpstr[80];
+
+	/* XXX default to some debugging for now */
+	sc->mrsas_debug = MRSAS_FAULT;
+	sc->mrsas_io_timeout = MRSAS_IO_TIMEOUT;
+	sc->mrsas_fw_fault_check_delay = 1;
+	sc->reset_count = 0;
+	sc->reset_in_progress = 0;
+	sc->block_sync_cache = 0;
+
+	/*
+	 * Grab the global variables.
+	 */
+	TUNABLE_INT_FETCH("hw.mrsas.debug_level", &sc->mrsas_debug);
+
+	/*
+	 * Grab the global variables.
+	 */
+	TUNABLE_INT_FETCH("hw.mrsas.lb_pending_cmds", &sc->lb_pending_cmds);
+
+	/* Grab the unit-instance variables */
+	snprintf(tmpstr, sizeof(tmpstr), "dev.mrsas.%d.debug_level",
+	    device_get_unit(sc->mrsas_dev));
+	TUNABLE_INT_FETCH(tmpstr, &sc->mrsas_debug);
+}
+
+/*
+ * mrsas_alloc_evt_log_info cmd: Allocates memory to get event log information.
+ * Used to get sequence number at driver load time.
+ * input:		Adapter soft state
+ *
+ * Allocates DMAable memory for the event log info internal command.
+ */
+int
+mrsas_alloc_evt_log_info_cmd(struct mrsas_softc *sc)
+{
+	int el_info_size;
+
+	/* Allocate get event log info command */
+	el_info_size = sizeof(struct mrsas_evt_log_info);
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    1, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    el_info_size,
+	    1,
+	    el_info_size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->el_info_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate event log info tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(sc->el_info_tag, (void **)&sc->el_info_mem,
+	    BUS_DMA_NOWAIT, &sc->el_info_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate event log info cmd mem\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamap_load(sc->el_info_tag, sc->el_info_dmamap,
+	    sc->el_info_mem, el_info_size, mrsas_addr_cb,
+	    &sc->el_info_phys_addr, BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load event log info cmd mem\n");
+		return (ENOMEM);
+	}
+	memset(sc->el_info_mem, 0, el_info_size);
+	return (0);
+}
+
+/*
+ * mrsas_free_evt_info_cmd:	Free memory for Event log info command
+ * input:					Adapter soft state
+ *
+ * Deallocates memory for the event log info internal command.
+ */
+void
+mrsas_free_evt_log_info_cmd(struct mrsas_softc *sc)
+{
+	if (sc->el_info_phys_addr)
+		bus_dmamap_unload(sc->el_info_tag, sc->el_info_dmamap);
+	if (sc->el_info_mem != NULL)
+		bus_dmamem_free(sc->el_info_tag, sc->el_info_mem, sc->el_info_dmamap);
+	if (sc->el_info_tag != NULL)
+		bus_dma_tag_destroy(sc->el_info_tag);
+}
+
+/*
+ *  mrsas_get_seq_num:	Get latest event sequence number
+ *  @sc:				Adapter soft state
+ *  @eli:				Firmware event log sequence number information.
+ *
+ * Firmware maintains a log of all events in a non-volatile area.
+ * Driver get the sequence number using DCMD
+ * "MR_DCMD_CTRL_EVENT_GET_INFO" at driver load time.
+ */
+
+static int
+mrsas_get_seq_num(struct mrsas_softc *sc,
+    struct mrsas_evt_log_info *eli)
+{
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+	u_int8_t do_ocr = 1, retcode = 0;
+
+	cmd = mrsas_get_mfi_cmd(sc);
+
+	if (!cmd) {
+		device_printf(sc->mrsas_dev, "Failed to get a free cmd\n");
+		return -ENOMEM;
+	}
+	dcmd = &cmd->frame->dcmd;
+
+	if (mrsas_alloc_evt_log_info_cmd(sc) != SUCCESS) {
+		device_printf(sc->mrsas_dev, "Cannot allocate evt log info cmd\n");
+		mrsas_release_mfi_cmd(cmd);
+		return -ENOMEM;
+	}
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0x0;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_READ;
+	dcmd->timeout = 0;
+	dcmd->pad_0 = 0;
+	dcmd->data_xfer_len = sizeof(struct mrsas_evt_log_info);
+	dcmd->opcode = MR_DCMD_CTRL_EVENT_GET_INFO;
+	dcmd->sgl.sge32[0].phys_addr = sc->el_info_phys_addr;
+	dcmd->sgl.sge32[0].length = sizeof(struct mrsas_evt_log_info);
+
+	retcode = mrsas_issue_blocked_cmd(sc, cmd);
+	if (retcode == ETIMEDOUT)
+		goto dcmd_timeout;
+
+	do_ocr = 0;
+	/*
+	 * Copy the data back into callers buffer
+	 */
+	memcpy(eli, sc->el_info_mem, sizeof(struct mrsas_evt_log_info));
+	mrsas_free_evt_log_info_cmd(sc);
+
+dcmd_timeout:
+	if (do_ocr)
+		sc->do_timedout_reset = MFI_DCMD_TIMEOUT_OCR;
+	else
+		mrsas_release_mfi_cmd(cmd);
+
+	return retcode;
+}
+
+
+/*
+ *  mrsas_register_aen:		Register for asynchronous event notification
+ *  @sc:			Adapter soft state
+ *  @seq_num:			Starting sequence number
+ *  @class_locale:		Class of the event
+ *
+ *  This function subscribes for events beyond the @seq_num
+ *  and type @class_locale.
+ *
+ */
+static int
+mrsas_register_aen(struct mrsas_softc *sc, u_int32_t seq_num,
+    u_int32_t class_locale_word)
+{
+	int ret_val;
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+	union mrsas_evt_class_locale curr_aen;
+	union mrsas_evt_class_locale prev_aen;
+
+	/*
+	 * If there an AEN pending already (aen_cmd), check if the
+	 * class_locale of that pending AEN is inclusive of the new AEN
+	 * request we currently have. If it is, then we don't have to do
+	 * anything. In other words, whichever events the current AEN request
+	 * is subscribing to, have already been subscribed to. If the old_cmd
+	 * is _not_ inclusive, then we have to abort that command, form a
+	 * class_locale that is superset of both old and current and re-issue
+	 * to the FW
+	 */
+
+	curr_aen.word = class_locale_word;
+
+	if (sc->aen_cmd) {
+
+		prev_aen.word = sc->aen_cmd->frame->dcmd.mbox.w[1];
+
+		/*
+		 * A class whose enum value is smaller is inclusive of all
+		 * higher values. If a PROGRESS (= -1) was previously
+		 * registered, then a new registration requests for higher
+		 * classes need not be sent to FW. They are automatically
+		 * included. Locale numbers don't have such hierarchy. They
+		 * are bitmap values
+		 */
+		if ((prev_aen.members.class <= curr_aen.members.class) &&
+		    !((prev_aen.members.locale & curr_aen.members.locale) ^
+		    curr_aen.members.locale)) {
+			/*
+			 * Previously issued event registration includes
+			 * current request. Nothing to do.
+			 */
+			return 0;
+		} else {
+			curr_aen.members.locale |= prev_aen.members.locale;
+
+			if (prev_aen.members.class < curr_aen.members.class)
+				curr_aen.members.class = prev_aen.members.class;
+
+			sc->aen_cmd->abort_aen = 1;
+			ret_val = mrsas_issue_blocked_abort_cmd(sc,
+			    sc->aen_cmd);
+
+			if (ret_val) {
+				printf("mrsas: Failed to abort previous AEN command\n");
+				return ret_val;
+			} else
+				sc->aen_cmd = NULL;
+		}
+	}
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd)
+		return ENOMEM;
+
+	dcmd = &cmd->frame->dcmd;
+
+	memset(sc->evt_detail_mem, 0, sizeof(struct mrsas_evt_detail));
+
+	/*
+	 * Prepare DCMD for aen registration
+	 */
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0x0;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_READ;
+	dcmd->timeout = 0;
+	dcmd->pad_0 = 0;
+	dcmd->data_xfer_len = sizeof(struct mrsas_evt_detail);
+	dcmd->opcode = MR_DCMD_CTRL_EVENT_WAIT;
+	dcmd->mbox.w[0] = seq_num;
+	sc->last_seq_num = seq_num;
+	dcmd->mbox.w[1] = curr_aen.word;
+	dcmd->sgl.sge32[0].phys_addr = (u_int32_t)sc->evt_detail_phys_addr;
+	dcmd->sgl.sge32[0].length = sizeof(struct mrsas_evt_detail);
+
+	if (sc->aen_cmd != NULL) {
+		mrsas_release_mfi_cmd(cmd);
+		return 0;
+	}
+	/*
+	 * Store reference to the cmd used to register for AEN. When an
+	 * application wants us to register for AEN, we have to abort this
+	 * cmd and re-register with a new EVENT LOCALE supplied by that app
+	 */
+	sc->aen_cmd = cmd;
+
+	/*
+	 * Issue the aen registration frame
+	 */
+	if (mrsas_issue_dcmd(sc, cmd)) {
+		device_printf(sc->mrsas_dev, "Cannot issue AEN DCMD command.\n");
+		return (1);
+	}
+	return 0;
+}
+
+/*
+ * mrsas_start_aen:	Subscribes to AEN during driver load time
+ * @instance:		Adapter soft state
+ */
+static int
+mrsas_start_aen(struct mrsas_softc *sc)
+{
+	struct mrsas_evt_log_info eli;
+	union mrsas_evt_class_locale class_locale;
+
+
+	/* Get the latest sequence number from FW */
+
+	memset(&eli, 0, sizeof(eli));
+
+	if (mrsas_get_seq_num(sc, &eli))
+		return -1;
+
+	/* Register AEN with FW for latest sequence number plus 1 */
+	class_locale.members.reserved = 0;
+	class_locale.members.locale = MR_EVT_LOCALE_ALL;
+	class_locale.members.class = MR_EVT_CLASS_DEBUG;
+
+	return mrsas_register_aen(sc, eli.newest_seq_num + 1,
+	    class_locale.word);
+
+}
+
+/*
+ * mrsas_setup_msix:	Allocate MSI-x vectors
+ * @sc:					adapter soft state
+ */
+static int
+mrsas_setup_msix(struct mrsas_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < sc->msix_vectors; i++) {
+		sc->irq_context[i].sc = sc;
+		sc->irq_context[i].MSIxIndex = i;
+		sc->irq_id[i] = i + 1;
+		sc->mrsas_irq[i] = bus_alloc_resource_any
+		    (sc->mrsas_dev, SYS_RES_IRQ, &sc->irq_id[i]
+		    ,RF_ACTIVE);
+		if (sc->mrsas_irq[i] == NULL) {
+			device_printf(sc->mrsas_dev, "Can't allocate MSI-x\n");
+			goto irq_alloc_failed;
+		}
+		if (bus_setup_intr(sc->mrsas_dev,
+		    sc->mrsas_irq[i],
+		    INTR_MPSAFE | INTR_TYPE_CAM,
+		    NULL, mrsas_isr, &sc->irq_context[i],
+		    &sc->intr_handle[i])) {
+			device_printf(sc->mrsas_dev,
+			    "Cannot set up MSI-x interrupt handler\n");
+			goto irq_alloc_failed;
+		}
+	}
+	return SUCCESS;
+
+irq_alloc_failed:
+	mrsas_teardown_intr(sc);
+	return (FAIL);
+}
+
+/*
+ * mrsas_allocate_msix:		Setup MSI-x vectors
+ * @sc:						adapter soft state
+ */
+static int
+mrsas_allocate_msix(struct mrsas_softc *sc)
+{
+	if (pci_alloc_msix(sc->mrsas_dev, &sc->msix_vectors) == 0) {
+		device_printf(sc->mrsas_dev, "Using MSI-X with %d number"
+		    " of vectors\n", sc->msix_vectors);
+	} else {
+		device_printf(sc->mrsas_dev, "MSI-x setup failed\n");
+		goto irq_alloc_failed;
+	}
+	return SUCCESS;
+
+irq_alloc_failed:
+	mrsas_teardown_intr(sc);
+	return (FAIL);
+}
+
+/*
+ * mrsas_attach:	PCI entry point
+ * input:			pointer to device struct
+ *
+ * Performs setup of PCI and registers, initializes mutexes and linked lists,
+ * registers interrupts and CAM, and initializes   the adapter/controller to
+ * its proper state.
+ */
+static int
+mrsas_attach(device_t dev)
+{
+	struct mrsas_softc *sc = device_get_softc(dev);
+	uint32_t cmd, bar, error;
+
+	memset(sc, 0, sizeof(struct mrsas_softc));
+
+	/* Look up our softc and initialize its fields. */
+	sc->mrsas_dev = dev;
+	sc->device_id = pci_get_device(dev);
+
+	if ((sc->device_id == MRSAS_INVADER) ||
+	    (sc->device_id == MRSAS_FURY) ||
+	    (sc->device_id == MRSAS_INTRUDER) ||
+	    (sc->device_id == MRSAS_INTRUDER_24) ||
+	    (sc->device_id == MRSAS_CUTLASS_52) ||
+	    (sc->device_id == MRSAS_CUTLASS_53)) {
+		sc->mrsas_gen3_ctrl = 1;
+    }
+
+	mrsas_get_tunables(sc);
+
+	/*
+	 * Set up PCI and registers
+	 */
+	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
+	if ((cmd & PCIM_CMD_PORTEN) == 0) {
+		return (ENXIO);
+	}
+	/* Force the busmaster enable bit on. */
+	cmd |= PCIM_CMD_BUSMASTEREN;
+	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
+
+	bar = pci_read_config(dev, MRSAS_PCI_BAR1, 4);
+
+	sc->reg_res_id = MRSAS_PCI_BAR1;/* BAR1 offset */
+	if ((sc->reg_res = bus_alloc_resource(dev, SYS_RES_MEMORY,
+	    &(sc->reg_res_id), 0, ~0, 1, RF_ACTIVE))
+	    == NULL) {
+		device_printf(dev, "Cannot allocate PCI registers\n");
+		goto attach_fail;
+	}
+	sc->bus_tag = rman_get_bustag(sc->reg_res);
+	sc->bus_handle = rman_get_bushandle(sc->reg_res);
+
+	/* Intialize mutexes */
+	mtx_init(&sc->sim_lock, "mrsas_sim_lock", NULL, MTX_DEF);
+	mtx_init(&sc->pci_lock, "mrsas_pci_lock", NULL, MTX_DEF);
+	mtx_init(&sc->io_lock, "mrsas_io_lock", NULL, MTX_DEF);
+	mtx_init(&sc->aen_lock, "mrsas_aen_lock", NULL, MTX_DEF);
+	mtx_init(&sc->ioctl_lock, "mrsas_ioctl_lock", NULL, MTX_SPIN);
+	mtx_init(&sc->mpt_cmd_pool_lock, "mrsas_mpt_cmd_pool_lock", NULL, MTX_DEF);
+	mtx_init(&sc->mfi_cmd_pool_lock, "mrsas_mfi_cmd_pool_lock", NULL, MTX_DEF);
+	mtx_init(&sc->raidmap_lock, "mrsas_raidmap_lock", NULL, MTX_DEF);
+
+	/* Intialize linked list */
+	TAILQ_INIT(&sc->mrsas_mpt_cmd_list_head);
+	TAILQ_INIT(&sc->mrsas_mfi_cmd_list_head);
+
+	mrsas_atomic_set(&sc->fw_outstanding, 0);
+	mrsas_atomic_set(&sc->target_reset_outstanding, 0);
+
+	sc->io_cmds_highwater = 0;
+
+	sc->adprecovery = MRSAS_HBA_OPERATIONAL;
+	sc->UnevenSpanSupport = 0;
+
+	sc->msix_enable = 0;
+
+	/* Initialize Firmware */
+	if (mrsas_init_fw(sc) != SUCCESS) {
+		goto attach_fail_fw;
+	}
+	/* Register mrsas to CAM layer */
+	if ((mrsas_cam_attach(sc) != SUCCESS)) {
+		goto attach_fail_cam;
+	}
+	/* Register IRQs */
+	if (mrsas_setup_irq(sc) != SUCCESS) {
+		goto attach_fail_irq;
+	}
+	error = mrsas_kproc_create(mrsas_ocr_thread, sc,
+	    &sc->ocr_thread, 0, 0, "mrsas_ocr%d",
+	    device_get_unit(sc->mrsas_dev));
+	if (error) {
+		device_printf(sc->mrsas_dev, "Error %d starting OCR thread\n", error);
+		goto attach_fail_ocr_thread;
+	}
+	/*
+	 * After FW initialization and OCR thread creation
+	 * we will defer the cdev creation, AEN setup on ICH callback
+	 */
+	sc->mrsas_ich.ich_func = mrsas_ich_startup;
+	sc->mrsas_ich.ich_arg = sc;
+	if (config_intrhook_establish(&sc->mrsas_ich) != 0) {
+		device_printf(sc->mrsas_dev, "Config hook is already established\n");
+	}
+	mrsas_setup_sysctl(sc);
+	return SUCCESS;
+
+attach_fail_ocr_thread:
+	if (sc->ocr_thread_active)
+		wakeup(&sc->ocr_chan);
+attach_fail_irq:
+	mrsas_teardown_intr(sc);
+attach_fail_cam:
+	mrsas_cam_detach(sc);
+attach_fail_fw:
+	/* if MSIX vector is allocated and FW Init FAILED then release MSIX */
+	if (sc->msix_enable == 1)
+		pci_release_msi(sc->mrsas_dev);
+	mrsas_free_mem(sc);
+	mtx_destroy(&sc->sim_lock);
+	mtx_destroy(&sc->aen_lock);
+	mtx_destroy(&sc->pci_lock);
+	mtx_destroy(&sc->io_lock);
+	mtx_destroy(&sc->ioctl_lock);
+	mtx_destroy(&sc->mpt_cmd_pool_lock);
+	mtx_destroy(&sc->mfi_cmd_pool_lock);
+	mtx_destroy(&sc->raidmap_lock);
+attach_fail:
+	if (sc->reg_res) {
+		bus_release_resource(sc->mrsas_dev, SYS_RES_MEMORY,
+		    sc->reg_res_id, sc->reg_res);
+	}
+	return (ENXIO);
+}
+
+/*
+ * Interrupt config hook
+ */
+static void
+mrsas_ich_startup(void *arg)
+{
+	struct mrsas_softc *sc = (struct mrsas_softc *)arg;
+
+	/*
+	 * Intialize a counting Semaphore to take care no. of concurrent IOCTLs
+	 */
+	sema_init(&sc->ioctl_count_sema, MRSAS_MAX_IOCTL_CMDS,
+	    IOCTL_SEMA_DESCRIPTION);
+
+	/* Create a /dev entry for mrsas controller. */
+	sc->mrsas_cdev = make_dev(&mrsas_cdevsw, device_get_unit(sc->mrsas_dev), UID_ROOT,
+	    GID_OPERATOR, (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP), "mrsas%u",
+	    device_get_unit(sc->mrsas_dev));
+
+	if (device_get_unit(sc->mrsas_dev) == 0) {
+		make_dev_alias_p(MAKEDEV_CHECKNAME,
+		    &sc->mrsas_linux_emulator_cdev, sc->mrsas_cdev,
+		    "megaraid_sas_ioctl_node");
+	}
+	if (sc->mrsas_cdev)
+		sc->mrsas_cdev->si_drv1 = sc;
+
+	/*
+	 * Add this controller to mrsas_mgmt_info structure so that it can be
+	 * exported to management applications
+	 */
+	if (device_get_unit(sc->mrsas_dev) == 0)
+		memset(&mrsas_mgmt_info, 0, sizeof(mrsas_mgmt_info));
+
+	mrsas_mgmt_info.count++;
+	mrsas_mgmt_info.sc_ptr[mrsas_mgmt_info.max_index] = sc;
+	mrsas_mgmt_info.max_index++;
+
+	/* Enable Interrupts */
+	mrsas_enable_intr(sc);
+
+	/* Initiate AEN (Asynchronous Event Notification) */
+	if (mrsas_start_aen(sc)) {
+		device_printf(sc->mrsas_dev, "Error: AEN registration FAILED !!! "
+		    "Further events from the controller will not be communicated.\n"
+		    "Either there is some problem in the controller"
+		    "or the controller does not support AEN.\n"
+		    "Please contact to the SUPPORT TEAM if the problem persists\n");
+	}
+	if (sc->mrsas_ich.ich_arg != NULL) {
+		device_printf(sc->mrsas_dev, "Disestablish mrsas intr hook\n");
+		config_intrhook_disestablish(&sc->mrsas_ich);
+		sc->mrsas_ich.ich_arg = NULL;
+	}
+}
+
+/*
+ * mrsas_detach:	De-allocates and teardown resources
+ * input:			pointer to device struct
+ *
+ * This function is the entry point for device disconnect and detach.
+ * It performs memory de-allocations, shutdown of the controller and various
+ * teardown and destroy resource functions.
+ */
+static int
+mrsas_detach(device_t dev)
+{
+	struct mrsas_softc *sc;
+	int i = 0;
+
+	sc = device_get_softc(dev);
+	sc->remove_in_progress = 1;
+
+	/* Destroy the character device so no other IOCTL will be handled */
+	if ((device_get_unit(dev) == 0) && sc->mrsas_linux_emulator_cdev)
+		destroy_dev(sc->mrsas_linux_emulator_cdev);
+	destroy_dev(sc->mrsas_cdev);
+
+	/*
+	 * Take the instance off the instance array. Note that we will not
+	 * decrement the max_index. We let this array be sparse array
+	 */
+	for (i = 0; i < mrsas_mgmt_info.max_index; i++) {
+		if (mrsas_mgmt_info.sc_ptr[i] == sc) {
+			mrsas_mgmt_info.count--;
+			mrsas_mgmt_info.sc_ptr[i] = NULL;
+			break;
+		}
+	}
+
+	if (sc->ocr_thread_active)
+		wakeup(&sc->ocr_chan);
+	while (sc->reset_in_progress) {
+		i++;
+		if (!(i % MRSAS_RESET_NOTICE_INTERVAL)) {
+			mrsas_dprint(sc, MRSAS_INFO,
+			    "[%2d]waiting for OCR to be finished from %s\n", i, __func__);
+		}
+		pause("mr_shutdown", hz);
+	}
+	i = 0;
+	while (sc->ocr_thread_active) {
+		i++;
+		if (!(i % MRSAS_RESET_NOTICE_INTERVAL)) {
+			mrsas_dprint(sc, MRSAS_INFO,
+			    "[%2d]waiting for "
+			    "mrsas_ocr thread to quit ocr %d\n", i,
+			    sc->ocr_thread_active);
+		}
+		pause("mr_shutdown", hz);
+	}
+	mrsas_flush_cache(sc);
+	mrsas_shutdown_ctlr(sc, MR_DCMD_CTRL_SHUTDOWN);
+	mrsas_disable_intr(sc);
+	mrsas_cam_detach(sc);
+	mrsas_teardown_intr(sc);
+	mrsas_free_mem(sc);
+	mtx_destroy(&sc->sim_lock);
+	mtx_destroy(&sc->aen_lock);
+	mtx_destroy(&sc->pci_lock);
+	mtx_destroy(&sc->io_lock);
+	mtx_destroy(&sc->ioctl_lock);
+	mtx_destroy(&sc->mpt_cmd_pool_lock);
+	mtx_destroy(&sc->mfi_cmd_pool_lock);
+	mtx_destroy(&sc->raidmap_lock);
+
+	/* Wait for all the semaphores to be released */
+	while (sema_value(&sc->ioctl_count_sema) != MRSAS_MAX_IOCTL_CMDS)
+		pause("mr_shutdown", hz);
+
+	/* Destroy the counting semaphore created for Ioctl */
+	sema_destroy(&sc->ioctl_count_sema);
+
+	if (sc->reg_res) {
+		bus_release_resource(sc->mrsas_dev,
+		    SYS_RES_MEMORY, sc->reg_res_id, sc->reg_res);
+	}
+	if (sc->sysctl_tree != NULL)
+		sysctl_ctx_free(&sc->sysctl_ctx);
+
+	return (0);
+}
+
+/*
+ * mrsas_free_mem:		Frees allocated memory
+ * input:				Adapter instance soft state
+ *
+ * This function is called from mrsas_detach() to free previously allocated
+ * memory.
+ */
+void
+mrsas_free_mem(struct mrsas_softc *sc)
+{
+	int i;
+	u_int32_t max_cmd;
+	struct mrsas_mfi_cmd *mfi_cmd;
+	struct mrsas_mpt_cmd *mpt_cmd;
+
+	/*
+	 * Free RAID map memory
+	 */
+	for (i = 0; i < 2; i++) {
+		if (sc->raidmap_phys_addr[i])
+			bus_dmamap_unload(sc->raidmap_tag[i], sc->raidmap_dmamap[i]);
+		if (sc->raidmap_mem[i] != NULL)
+			bus_dmamem_free(sc->raidmap_tag[i], sc->raidmap_mem[i], sc->raidmap_dmamap[i]);
+		if (sc->raidmap_tag[i] != NULL)
+			bus_dma_tag_destroy(sc->raidmap_tag[i]);
+
+		if (sc->ld_drv_map[i] != NULL)
+			free(sc->ld_drv_map[i], M_MRSAS);
+	}
+	for (i = 0; i < 2; i++) {
+		if (sc->jbodmap_phys_addr[i])
+			bus_dmamap_unload(sc->jbodmap_tag[i], sc->jbodmap_dmamap[i]);
+		if (sc->jbodmap_mem[i] != NULL)
+			bus_dmamem_free(sc->jbodmap_tag[i], sc->jbodmap_mem[i], sc->jbodmap_dmamap[i]);
+		if (sc->jbodmap_tag[i] != NULL)
+			bus_dma_tag_destroy(sc->jbodmap_tag[i]);
+	}
+	/*
+	 * Free version buffer memroy
+	 */
+	if (sc->verbuf_phys_addr)
+		bus_dmamap_unload(sc->verbuf_tag, sc->verbuf_dmamap);
+	if (sc->verbuf_mem != NULL)
+		bus_dmamem_free(sc->verbuf_tag, sc->verbuf_mem, sc->verbuf_dmamap);
+	if (sc->verbuf_tag != NULL)
+		bus_dma_tag_destroy(sc->verbuf_tag);
+
+
+	/*
+	 * Free sense buffer memory
+	 */
+	if (sc->sense_phys_addr)
+		bus_dmamap_unload(sc->sense_tag, sc->sense_dmamap);
+	if (sc->sense_mem != NULL)
+		bus_dmamem_free(sc->sense_tag, sc->sense_mem, sc->sense_dmamap);
+	if (sc->sense_tag != NULL)
+		bus_dma_tag_destroy(sc->sense_tag);
+
+	/*
+	 * Free chain frame memory
+	 */
+	if (sc->chain_frame_phys_addr)
+		bus_dmamap_unload(sc->chain_frame_tag, sc->chain_frame_dmamap);
+	if (sc->chain_frame_mem != NULL)
+		bus_dmamem_free(sc->chain_frame_tag, sc->chain_frame_mem, sc->chain_frame_dmamap);
+	if (sc->chain_frame_tag != NULL)
+		bus_dma_tag_destroy(sc->chain_frame_tag);
+
+	/*
+	 * Free IO Request memory
+	 */
+	if (sc->io_request_phys_addr)
+		bus_dmamap_unload(sc->io_request_tag, sc->io_request_dmamap);
+	if (sc->io_request_mem != NULL)
+		bus_dmamem_free(sc->io_request_tag, sc->io_request_mem, sc->io_request_dmamap);
+	if (sc->io_request_tag != NULL)
+		bus_dma_tag_destroy(sc->io_request_tag);
+
+	/*
+	 * Free Reply Descriptor memory
+	 */
+	if (sc->reply_desc_phys_addr)
+		bus_dmamap_unload(sc->reply_desc_tag, sc->reply_desc_dmamap);
+	if (sc->reply_desc_mem != NULL)
+		bus_dmamem_free(sc->reply_desc_tag, sc->reply_desc_mem, sc->reply_desc_dmamap);
+	if (sc->reply_desc_tag != NULL)
+		bus_dma_tag_destroy(sc->reply_desc_tag);
+
+	/*
+	 * Free event detail memory
+	 */
+	if (sc->evt_detail_phys_addr)
+		bus_dmamap_unload(sc->evt_detail_tag, sc->evt_detail_dmamap);
+	if (sc->evt_detail_mem != NULL)
+		bus_dmamem_free(sc->evt_detail_tag, sc->evt_detail_mem, sc->evt_detail_dmamap);
+	if (sc->evt_detail_tag != NULL)
+		bus_dma_tag_destroy(sc->evt_detail_tag);
+
+	/*
+	 * Free MFI frames
+	 */
+	if (sc->mfi_cmd_list) {
+		for (i = 0; i < MRSAS_MAX_MFI_CMDS; i++) {
+			mfi_cmd = sc->mfi_cmd_list[i];
+			mrsas_free_frame(sc, mfi_cmd);
+		}
+	}
+	if (sc->mficmd_frame_tag != NULL)
+		bus_dma_tag_destroy(sc->mficmd_frame_tag);
+
+	/*
+	 * Free MPT internal command list
+	 */
+	max_cmd = sc->max_fw_cmds;
+	if (sc->mpt_cmd_list) {
+		for (i = 0; i < max_cmd; i++) {
+			mpt_cmd = sc->mpt_cmd_list[i];
+			bus_dmamap_destroy(sc->data_tag, mpt_cmd->data_dmamap);
+			free(sc->mpt_cmd_list[i], M_MRSAS);
+		}
+		free(sc->mpt_cmd_list, M_MRSAS);
+		sc->mpt_cmd_list = NULL;
+	}
+	/*
+	 * Free MFI internal command list
+	 */
+
+	if (sc->mfi_cmd_list) {
+		for (i = 0; i < MRSAS_MAX_MFI_CMDS; i++) {
+			free(sc->mfi_cmd_list[i], M_MRSAS);
+		}
+		free(sc->mfi_cmd_list, M_MRSAS);
+		sc->mfi_cmd_list = NULL;
+	}
+	/*
+	 * Free request descriptor memory
+	 */
+	free(sc->req_desc, M_MRSAS);
+	sc->req_desc = NULL;
+
+	/*
+	 * Destroy parent tag
+	 */
+	if (sc->mrsas_parent_tag != NULL)
+		bus_dma_tag_destroy(sc->mrsas_parent_tag);
+
+	/*
+	 * Free ctrl_info memory
+	 */
+	if (sc->ctrl_info != NULL)
+		free(sc->ctrl_info, M_MRSAS);
+}
+
+/*
+ * mrsas_teardown_intr:	Teardown interrupt
+ * input:				Adapter instance soft state
+ *
+ * This function is called from mrsas_detach() to teardown and release bus
+ * interrupt resourse.
+ */
+void
+mrsas_teardown_intr(struct mrsas_softc *sc)
+{
+	int i;
+
+	if (!sc->msix_enable) {
+		if (sc->intr_handle[0])
+			bus_teardown_intr(sc->mrsas_dev, sc->mrsas_irq[0], sc->intr_handle[0]);
+		if (sc->mrsas_irq[0] != NULL)
+			bus_release_resource(sc->mrsas_dev, SYS_RES_IRQ,
+			    sc->irq_id[0], sc->mrsas_irq[0]);
+		sc->intr_handle[0] = NULL;
+	} else {
+		for (i = 0; i < sc->msix_vectors; i++) {
+			if (sc->intr_handle[i])
+				bus_teardown_intr(sc->mrsas_dev, sc->mrsas_irq[i],
+				    sc->intr_handle[i]);
+
+			if (sc->mrsas_irq[i] != NULL)
+				bus_release_resource(sc->mrsas_dev, SYS_RES_IRQ,
+				    sc->irq_id[i], sc->mrsas_irq[i]);
+
+			sc->intr_handle[i] = NULL;
+		}
+		pci_release_msi(sc->mrsas_dev);
+	}
+
+}
+
+/*
+ * mrsas_suspend:	Suspend entry point
+ * input:			Device struct pointer
+ *
+ * This function is the entry point for system suspend from the OS.
+ */
+static int
+mrsas_suspend(device_t dev)
+{
+	/* This will be filled when the driver will have hibernation support */
+	return (0);
+}
+
+/*
+ * mrsas_resume:	Resume entry point
+ * input:			Device struct pointer
+ *
+ * This function is the entry point for system resume from the OS.
+ */
+static int
+mrsas_resume(device_t dev)
+{
+	/* This will be filled when the driver will have hibernation support */
+	return (0);
+}
+
+/**
+ * mrsas_get_softc_instance:    Find softc instance based on cmd type
+ *
+ * This function will return softc instance based on cmd type.
+ * In some case, application fire ioctl on required management instance and
+ * do not provide host_no. Use cdev->si_drv1 to get softc instance for those
+ * case, else get the softc instance from host_no provided by application in
+ * user data.
+ */
+
+static struct mrsas_softc *
+mrsas_get_softc_instance(struct cdev *dev, u_long cmd, caddr_t arg)
+{
+	struct mrsas_softc *sc = NULL;
+	struct mrsas_iocpacket *user_ioc = (struct mrsas_iocpacket *)arg;
+
+	if (cmd == MRSAS_IOC_GET_PCI_INFO) {
+		sc = dev->si_drv1;
+	} else {
+		/*
+		 * get the Host number & the softc from data sent by the
+		 * Application
+		 */
+		sc = mrsas_mgmt_info.sc_ptr[user_ioc->host_no];
+		if (sc == NULL)
+			printf("There is no Controller number %d\n",
+			    user_ioc->host_no);
+		else if (user_ioc->host_no >= mrsas_mgmt_info.max_index)
+			mrsas_dprint(sc, MRSAS_FAULT,
+			    "Invalid Controller number %d\n", user_ioc->host_no);
+	}
+
+	return sc;
+}
+
+/*
+ * mrsas_ioctl:	IOCtl commands entry point.
+ *
+ * This function is the entry point for IOCtls from the OS.  It calls the
+ * appropriate function for processing depending on the command received.
+ */
+static int
+mrsas_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, d_thread_t *td)
+{
+	struct mrsas_softc *sc;
+	int ret = 0, i = 0;
+	MRSAS_DRV_PCI_INFORMATION *pciDrvInfo;
+
+	sc = mrsas_get_softc_instance(dev, cmd, arg);
+	if (!sc)
+		return ENOENT;
+
+	if (sc->remove_in_progress ||
+		(sc->adprecovery == MRSAS_HW_CRITICAL_ERROR)) {
+		mrsas_dprint(sc, MRSAS_INFO,
+		    "Either driver remove or shutdown called or "
+			"HW is in unrecoverable critical error state.\n");
+		return ENOENT;
+	}
+	mtx_lock_spin(&sc->ioctl_lock);
+	if (!sc->reset_in_progress) {
+		mtx_unlock_spin(&sc->ioctl_lock);
+		goto do_ioctl;
+	}
+	mtx_unlock_spin(&sc->ioctl_lock);
+	while (sc->reset_in_progress) {
+		i++;
+		if (!(i % MRSAS_RESET_NOTICE_INTERVAL)) {
+			mrsas_dprint(sc, MRSAS_INFO,
+			    "[%2d]waiting for OCR to be finished from %s\n", i, __func__);
+		}
+		pause("mr_ioctl", hz);
+	}
+
+do_ioctl:
+	switch (cmd) {
+	case MRSAS_IOC_FIRMWARE_PASS_THROUGH64:
+#ifdef COMPAT_FREEBSD32
+	case MRSAS_IOC_FIRMWARE_PASS_THROUGH32:
+#endif
+		/*
+		 * Decrement the Ioctl counting Semaphore before getting an
+		 * mfi command
+		 */
+		sema_wait(&sc->ioctl_count_sema);
+
+		ret = mrsas_passthru(sc, (void *)arg, cmd);
+
+		/* Increment the Ioctl counting semaphore value */
+		sema_post(&sc->ioctl_count_sema);
+
+		break;
+	case MRSAS_IOC_SCAN_BUS:
+		ret = mrsas_bus_scan(sc);
+		break;
+
+	case MRSAS_IOC_GET_PCI_INFO:
+		pciDrvInfo = (MRSAS_DRV_PCI_INFORMATION *) arg;
+		memset(pciDrvInfo, 0, sizeof(MRSAS_DRV_PCI_INFORMATION));
+		pciDrvInfo->busNumber = pci_get_bus(sc->mrsas_dev);
+		pciDrvInfo->deviceNumber = pci_get_slot(sc->mrsas_dev);
+		pciDrvInfo->functionNumber = pci_get_function(sc->mrsas_dev);
+		pciDrvInfo->domainID = pci_get_domain(sc->mrsas_dev);
+		mrsas_dprint(sc, MRSAS_INFO, "pci bus no: %d,"
+		    "pci device no: %d, pci function no: %d,"
+		    "pci domain ID: %d\n",
+		    pciDrvInfo->busNumber, pciDrvInfo->deviceNumber,
+		    pciDrvInfo->functionNumber, pciDrvInfo->domainID);
+		ret = 0;
+		break;
+
+	default:
+		mrsas_dprint(sc, MRSAS_TRACE, "IOCTL command 0x%lx is not handled\n", cmd);
+		ret = ENOENT;
+	}
+
+	return (ret);
+}
+
+/*
+ * mrsas_poll:	poll entry point for mrsas driver fd
+ *
+ * This function is the entry point for poll from the OS.  It waits for some AEN
+ * events to be triggered from the controller and notifies back.
+ */
+static int
+mrsas_poll(struct cdev *dev, int poll_events, struct thread *td)
+{
+	struct mrsas_softc *sc;
+	int revents = 0;
+
+	sc = dev->si_drv1;
+
+	if (poll_events & (POLLIN | POLLRDNORM)) {
+		if (sc->mrsas_aen_triggered) {
+			revents |= poll_events & (POLLIN | POLLRDNORM);
+		}
+	}
+	if (revents == 0) {
+		if (poll_events & (POLLIN | POLLRDNORM)) {
+			mtx_lock(&sc->aen_lock);
+			sc->mrsas_poll_waiting = 1;
+			selrecord(td, &sc->mrsas_select);
+			mtx_unlock(&sc->aen_lock);
+		}
+	}
+	return revents;
+}
+
+/*
+ * mrsas_setup_irq:	Set up interrupt
+ * input:			Adapter instance soft state
+ *
+ * This function sets up interrupts as a bus resource, with flags indicating
+ * resource permitting contemporaneous sharing and for resource to activate
+ * atomically.
+ */
+static int
+mrsas_setup_irq(struct mrsas_softc *sc)
+{
+	if (sc->msix_enable && (mrsas_setup_msix(sc) == SUCCESS))
+		device_printf(sc->mrsas_dev, "MSI-x interrupts setup success\n");
+
+	else {
+		device_printf(sc->mrsas_dev, "Fall back to legacy interrupt\n");
+		sc->irq_context[0].sc = sc;
+		sc->irq_context[0].MSIxIndex = 0;
+		sc->irq_id[0] = 0;
+		sc->mrsas_irq[0] = bus_alloc_resource_any(sc->mrsas_dev,
+		    SYS_RES_IRQ, &sc->irq_id[0], RF_SHAREABLE | RF_ACTIVE);
+		if (sc->mrsas_irq[0] == NULL) {
+			device_printf(sc->mrsas_dev, "Cannot allocate legcay"
+			    "interrupt\n");
+			return (FAIL);
+		}
+		if (bus_setup_intr(sc->mrsas_dev, sc->mrsas_irq[0],
+		    INTR_MPSAFE | INTR_TYPE_CAM, NULL, mrsas_isr,
+		    &sc->irq_context[0], &sc->intr_handle[0])) {
+			device_printf(sc->mrsas_dev, "Cannot set up legacy"
+			    "interrupt\n");
+			return (FAIL);
+		}
+	}
+	return (0);
+}
+
+/*
+ * mrsas_isr:	ISR entry point
+ * input:		argument pointer
+ *
+ * This function is the interrupt service routine entry point.  There are two
+ * types of interrupts, state change interrupt and response interrupt.  If an
+ * interrupt is not ours, we just return.
+ */
+void
+mrsas_isr(void *arg)
+{
+	struct mrsas_irq_context *irq_context = (struct mrsas_irq_context *)arg;
+	struct mrsas_softc *sc = irq_context->sc;
+	int status = 0;
+
+	if (sc->mask_interrupts)
+		return;
+
+	if (!sc->msix_vectors) {
+		status = mrsas_clear_intr(sc);
+		if (!status)
+			return;
+	}
+	/* If we are resetting, bail */
+	if (mrsas_test_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags)) {
+		printf(" Entered into ISR when OCR is going active. \n");
+		mrsas_clear_intr(sc);
+		return;
+	}
+	/* Process for reply request and clear response interrupt */
+	if (mrsas_complete_cmd(sc, irq_context->MSIxIndex) != SUCCESS)
+		mrsas_clear_intr(sc);
+
+	return;
+}
+
+/*
+ * mrsas_complete_cmd:	Process reply request
+ * input:				Adapter instance soft state
+ *
+ * This function is called from mrsas_isr() to process reply request and clear
+ * response interrupt. Processing of the reply request entails walking
+ * through the reply descriptor array for the command request  pended from
+ * Firmware.  We look at the Function field to determine the command type and
+ * perform the appropriate action.  Before we return, we clear the response
+ * interrupt.
+ */
+int
+mrsas_complete_cmd(struct mrsas_softc *sc, u_int32_t MSIxIndex)
+{
+	Mpi2ReplyDescriptorsUnion_t *desc;
+	MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR *reply_desc;
+	MRSAS_RAID_SCSI_IO_REQUEST *scsi_io_req;
+	struct mrsas_mpt_cmd *cmd_mpt;
+	struct mrsas_mfi_cmd *cmd_mfi;
+	u_int8_t reply_descript_type;
+	u_int16_t smid, num_completed;
+	u_int8_t status, extStatus;
+	union desc_value desc_val;
+	PLD_LOAD_BALANCE_INFO lbinfo;
+	u_int32_t device_id;
+	int threshold_reply_count = 0;
+#if TM_DEBUG
+	MR_TASK_MANAGE_REQUEST *mr_tm_req;
+	MPI2_SCSI_TASK_MANAGE_REQUEST *mpi_tm_req;
+#endif
+
+	/* If we have a hardware error, not need to continue */
+	if (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR)
+		return (DONE);
+
+	desc = sc->reply_desc_mem;
+	desc += ((MSIxIndex * sc->reply_alloc_sz) / sizeof(MPI2_REPLY_DESCRIPTORS_UNION))
+	    + sc->last_reply_idx[MSIxIndex];
+
+	reply_desc = (MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR *) desc;
+
+	desc_val.word = desc->Words;
+	num_completed = 0;
+
+	reply_descript_type = reply_desc->ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
+
+	/* Find our reply descriptor for the command and process */
+	while ((desc_val.u.low != 0xFFFFFFFF) && (desc_val.u.high != 0xFFFFFFFF)) {
+		smid = reply_desc->SMID;
+		cmd_mpt = sc->mpt_cmd_list[smid - 1];
+		scsi_io_req = (MRSAS_RAID_SCSI_IO_REQUEST *) cmd_mpt->io_request;
+
+		status = scsi_io_req->RaidContext.status;
+		extStatus = scsi_io_req->RaidContext.exStatus;
+
+		switch (scsi_io_req->Function) {
+		case MPI2_FUNCTION_SCSI_TASK_MGMT:
+#if TM_DEBUG
+			mr_tm_req = (MR_TASK_MANAGE_REQUEST *) cmd_mpt->io_request;
+			mpi_tm_req = (MPI2_SCSI_TASK_MANAGE_REQUEST *)
+			    &mr_tm_req->TmRequest;
+			device_printf(sc->mrsas_dev, "TM completion type 0x%X, "
+			    "TaskMID: 0x%X", mpi_tm_req->TaskType, mpi_tm_req->TaskMID);
+#endif
+            wakeup_one((void *)&sc->ocr_chan);
+            break;
+		case MPI2_FUNCTION_SCSI_IO_REQUEST:	/* Fast Path IO. */
+			device_id = cmd_mpt->ccb_ptr->ccb_h.target_id;
+			lbinfo = &sc->load_balance_info[device_id];
+			if (cmd_mpt->load_balance == MRSAS_LOAD_BALANCE_FLAG) {
+				mrsas_atomic_dec(&lbinfo->scsi_pending_cmds[cmd_mpt->pd_r1_lb]);
+				cmd_mpt->load_balance &= ~MRSAS_LOAD_BALANCE_FLAG;
+			}
+			/* Fall thru and complete IO */
+		case MRSAS_MPI2_FUNCTION_LD_IO_REQUEST:
+			mrsas_map_mpt_cmd_status(cmd_mpt, status, extStatus);
+			mrsas_cmd_done(sc, cmd_mpt);
+			scsi_io_req->RaidContext.status = 0;
+			scsi_io_req->RaidContext.exStatus = 0;
+			mrsas_atomic_dec(&sc->fw_outstanding);
+			break;
+		case MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST:	/* MFI command */
+			cmd_mfi = sc->mfi_cmd_list[cmd_mpt->sync_cmd_idx];
+			/*
+			 * Make sure NOT TO release the mfi command from the called
+			 * function's context if it is fired with issue_polled call.
+			 * And also make sure that the issue_polled call should only be
+			 * used if INTERRUPT IS DISABLED.
+			 */
+			if (cmd_mfi->frame->hdr.flags & MFI_FRAME_DONT_POST_IN_REPLY_QUEUE)
+				mrsas_release_mfi_cmd(cmd_mfi);
+			else
+				mrsas_complete_mptmfi_passthru(sc, cmd_mfi, status);
+			break;
+		}
+
+		sc->last_reply_idx[MSIxIndex]++;
+		if (sc->last_reply_idx[MSIxIndex] >= sc->reply_q_depth)
+			sc->last_reply_idx[MSIxIndex] = 0;
+
+		desc->Words = ~((uint64_t)0x00);	/* set it back to all
+							 * 0xFFFFFFFFs */
+		num_completed++;
+		threshold_reply_count++;
+
+		/* Get the next reply descriptor */
+		if (!sc->last_reply_idx[MSIxIndex]) {
+			desc = sc->reply_desc_mem;
+			desc += ((MSIxIndex * sc->reply_alloc_sz) / sizeof(MPI2_REPLY_DESCRIPTORS_UNION));
+		} else
+			desc++;
+
+		reply_desc = (MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR *) desc;
+		desc_val.word = desc->Words;
+
+		reply_descript_type = reply_desc->ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
+
+		if (reply_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED)
+			break;
+
+		/*
+		 * Write to reply post index after completing threshold reply
+		 * count and still there are more replies in reply queue
+		 * pending to be completed.
+		 */
+		if (threshold_reply_count >= THRESHOLD_REPLY_COUNT) {
+			if (sc->msix_enable) {
+				if (sc->mrsas_gen3_ctrl)
+					mrsas_write_reg(sc, sc->msix_reg_offset[MSIxIndex / 8],
+					    ((MSIxIndex & 0x7) << 24) |
+					    sc->last_reply_idx[MSIxIndex]);
+				else
+					mrsas_write_reg(sc, sc->msix_reg_offset[0], (MSIxIndex << 24) |
+					    sc->last_reply_idx[MSIxIndex]);
+			} else
+				mrsas_write_reg(sc, offsetof(mrsas_reg_set,
+				    reply_post_host_index), sc->last_reply_idx[0]);
+
+			threshold_reply_count = 0;
+		}
+	}
+
+	/* No match, just return */
+	if (num_completed == 0)
+		return (DONE);
+
+	/* Clear response interrupt */
+	if (sc->msix_enable) {
+			if (sc->mrsas_gen3_ctrl) {
+			mrsas_write_reg(sc, sc->msix_reg_offset[MSIxIndex / 8],
+			    ((MSIxIndex & 0x7) << 24) |
+			    sc->last_reply_idx[MSIxIndex]);
+		} else
+			mrsas_write_reg(sc, sc->msix_reg_offset[0], (MSIxIndex << 24) |
+			    sc->last_reply_idx[MSIxIndex]);
+	} else
+		mrsas_write_reg(sc, offsetof(mrsas_reg_set,
+		    reply_post_host_index), sc->last_reply_idx[0]);
+
+	return (0);
+}
+
+/*
+ * mrsas_map_mpt_cmd_status:	Allocate DMAable memory.
+ * input:						Adapter instance soft state
+ *
+ * This function is called from mrsas_complete_cmd(), for LD IO and FastPath IO.
+ * It checks the command status and maps the appropriate CAM status for the
+ * CCB.
+ */
+void
+mrsas_map_mpt_cmd_status(struct mrsas_mpt_cmd *cmd, u_int8_t status, u_int8_t extStatus)
+{
+	struct mrsas_softc *sc = cmd->sc;
+	u_int8_t *sense_data;
+
+	switch (status) {
+	case MFI_STAT_OK:
+		cmd->ccb_ptr->ccb_h.status = CAM_REQ_CMP;
+		break;
+	case MFI_STAT_SCSI_IO_FAILED:
+	case MFI_STAT_SCSI_DONE_WITH_ERROR:
+		cmd->ccb_ptr->ccb_h.status = CAM_SCSI_STATUS_ERROR;
+		sense_data = (u_int8_t *)&cmd->ccb_ptr->csio.sense_data;
+		if (sense_data) {
+			/* For now just copy 18 bytes back */
+			memcpy(sense_data, cmd->sense, 18);
+			cmd->ccb_ptr->csio.sense_len = 18;
+			cmd->ccb_ptr->ccb_h.status |= CAM_AUTOSNS_VALID;
+		}
+		break;
+	case MFI_STAT_LD_OFFLINE:
+	case MFI_STAT_DEVICE_NOT_FOUND:
+		if (cmd->ccb_ptr->ccb_h.target_lun)
+			cmd->ccb_ptr->ccb_h.status |= CAM_LUN_INVALID;
+		else
+			cmd->ccb_ptr->ccb_h.status |= CAM_DEV_NOT_THERE;
+		break;
+	case MFI_STAT_CONFIG_SEQ_MISMATCH:
+		cmd->ccb_ptr->ccb_h.status |= CAM_REQUEUE_REQ;
+		break;
+	default:
+		device_printf(sc->mrsas_dev, "FW cmd complete status %x\n", status);
+		cmd->ccb_ptr->ccb_h.status = CAM_REQ_CMP_ERR;
+		cmd->ccb_ptr->csio.scsi_status = status;
+	}
+	return;
+}
+
+/*
+ * mrsas_alloc_mem:	Allocate DMAable memory
+ * input:			Adapter instance soft state
+ *
+ * This function creates the parent DMA tag and allocates DMAable memory. DMA
+ * tag describes constraints of DMA mapping. Memory allocated is mapped into
+ * Kernel virtual address. Callback argument is physical memory address.
+ */
+static int
+mrsas_alloc_mem(struct mrsas_softc *sc)
+{
+	u_int32_t verbuf_size, io_req_size, reply_desc_size, sense_size,
+	          chain_frame_size, evt_detail_size, count;
+
+	/*
+	 * Allocate parent DMA tag
+	 */
+	if (bus_dma_tag_create(NULL,	/* parent */
+	    1,				/* alignment */
+	    0,				/* boundary */
+	    BUS_SPACE_MAXADDR,		/* lowaddr */
+	    BUS_SPACE_MAXADDR,		/* highaddr */
+	    NULL, NULL,			/* filter, filterarg */
+	    MAXPHYS,			/* maxsize */
+	    sc->max_num_sge,		/* nsegments */
+	    MAXPHYS,			/* maxsegsize */
+	    0,				/* flags */
+	    NULL, NULL,			/* lockfunc, lockarg */
+	    &sc->mrsas_parent_tag	/* tag */
+	    )) {
+		device_printf(sc->mrsas_dev, "Cannot allocate parent DMA tag\n");
+		return (ENOMEM);
+	}
+	/*
+	 * Allocate for version buffer
+	 */
+	verbuf_size = MRSAS_MAX_NAME_LENGTH * (sizeof(bus_addr_t));
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    1, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    verbuf_size,
+	    1,
+	    verbuf_size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->verbuf_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate verbuf DMA tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(sc->verbuf_tag, (void **)&sc->verbuf_mem,
+	    BUS_DMA_NOWAIT, &sc->verbuf_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate verbuf memory\n");
+		return (ENOMEM);
+	}
+	bzero(sc->verbuf_mem, verbuf_size);
+	if (bus_dmamap_load(sc->verbuf_tag, sc->verbuf_dmamap, sc->verbuf_mem,
+	    verbuf_size, mrsas_addr_cb, &sc->verbuf_phys_addr,
+	    BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load verbuf DMA map\n");
+		return (ENOMEM);
+	}
+	/*
+	 * Allocate IO Request Frames
+	 */
+	io_req_size = sc->io_frames_alloc_sz;
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    16, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    io_req_size,
+	    1,
+	    io_req_size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->io_request_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot create IO request tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(sc->io_request_tag, (void **)&sc->io_request_mem,
+	    BUS_DMA_NOWAIT, &sc->io_request_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot alloc IO request memory\n");
+		return (ENOMEM);
+	}
+	bzero(sc->io_request_mem, io_req_size);
+	if (bus_dmamap_load(sc->io_request_tag, sc->io_request_dmamap,
+	    sc->io_request_mem, io_req_size, mrsas_addr_cb,
+	    &sc->io_request_phys_addr, BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load IO request memory\n");
+		return (ENOMEM);
+	}
+	/*
+	 * Allocate Chain Frames
+	 */
+	chain_frame_size = sc->chain_frames_alloc_sz;
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    4, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    chain_frame_size,
+	    1,
+	    chain_frame_size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->chain_frame_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot create chain frame tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(sc->chain_frame_tag, (void **)&sc->chain_frame_mem,
+	    BUS_DMA_NOWAIT, &sc->chain_frame_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot alloc chain frame memory\n");
+		return (ENOMEM);
+	}
+	bzero(sc->chain_frame_mem, chain_frame_size);
+	if (bus_dmamap_load(sc->chain_frame_tag, sc->chain_frame_dmamap,
+	    sc->chain_frame_mem, chain_frame_size, mrsas_addr_cb,
+	    &sc->chain_frame_phys_addr, BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load chain frame memory\n");
+		return (ENOMEM);
+	}
+	count = sc->msix_vectors > 0 ? sc->msix_vectors : 1;
+	/*
+	 * Allocate Reply Descriptor Array
+	 */
+	reply_desc_size = sc->reply_alloc_sz * count;
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    16, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    reply_desc_size,
+	    1,
+	    reply_desc_size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->reply_desc_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot create reply descriptor tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(sc->reply_desc_tag, (void **)&sc->reply_desc_mem,
+	    BUS_DMA_NOWAIT, &sc->reply_desc_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot alloc reply descriptor memory\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamap_load(sc->reply_desc_tag, sc->reply_desc_dmamap,
+	    sc->reply_desc_mem, reply_desc_size, mrsas_addr_cb,
+	    &sc->reply_desc_phys_addr, BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load reply descriptor memory\n");
+		return (ENOMEM);
+	}
+	/*
+	 * Allocate Sense Buffer Array.  Keep in lower 4GB
+	 */
+	sense_size = sc->max_fw_cmds * MRSAS_SENSE_LEN;
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    64, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    sense_size,
+	    1,
+	    sense_size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->sense_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate sense buf tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(sc->sense_tag, (void **)&sc->sense_mem,
+	    BUS_DMA_NOWAIT, &sc->sense_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate sense buf memory\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamap_load(sc->sense_tag, sc->sense_dmamap,
+	    sc->sense_mem, sense_size, mrsas_addr_cb, &sc->sense_phys_addr,
+	    BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load sense buf memory\n");
+		return (ENOMEM);
+	}
+	/*
+	 * Allocate for Event detail structure
+	 */
+	evt_detail_size = sizeof(struct mrsas_evt_detail);
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    1, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    evt_detail_size,
+	    1,
+	    evt_detail_size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->evt_detail_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot create Event detail tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(sc->evt_detail_tag, (void **)&sc->evt_detail_mem,
+	    BUS_DMA_NOWAIT, &sc->evt_detail_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot alloc Event detail buffer memory\n");
+		return (ENOMEM);
+	}
+	bzero(sc->evt_detail_mem, evt_detail_size);
+	if (bus_dmamap_load(sc->evt_detail_tag, sc->evt_detail_dmamap,
+	    sc->evt_detail_mem, evt_detail_size, mrsas_addr_cb,
+	    &sc->evt_detail_phys_addr, BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load Event detail buffer memory\n");
+		return (ENOMEM);
+	}
+	/*
+	 * Create a dma tag for data buffers; size will be the maximum
+	 * possible I/O size (280kB).
+	 */
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    1,
+	    0,
+	    BUS_SPACE_MAXADDR,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    MAXPHYS,
+	    sc->max_num_sge,		/* nsegments */
+	    MAXPHYS,
+	    BUS_DMA_ALLOCNOW,
+	    busdma_lock_mutex,
+	    &sc->io_lock,
+	    &sc->data_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot create data dma tag\n");
+		return (ENOMEM);
+	}
+	return (0);
+}
+
+/*
+ * mrsas_addr_cb:	Callback function of bus_dmamap_load()
+ * input:			callback argument, machine dependent type
+ * 					that describes DMA segments, number of segments, error code
+ *
+ * This function is for the driver to receive mapping information resultant of
+ * the bus_dmamap_load(). The information is actually not being used, but the
+ * address is saved anyway.
+ */
+void
+mrsas_addr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
+{
+	bus_addr_t *addr;
+
+	addr = arg;
+	*addr = segs[0].ds_addr;
+}
+
+/*
+ * mrsas_setup_raidmap:	Set up RAID map.
+ * input:				Adapter instance soft state
+ *
+ * Allocate DMA memory for the RAID maps and perform setup.
+ */
+static int
+mrsas_setup_raidmap(struct mrsas_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		sc->ld_drv_map[i] =
+		    (void *)malloc(sc->drv_map_sz, M_MRSAS, M_NOWAIT);
+		/* Do Error handling */
+		if (!sc->ld_drv_map[i]) {
+			device_printf(sc->mrsas_dev, "Could not allocate memory for local map");
+
+			if (i == 1)
+				free(sc->ld_drv_map[0], M_MRSAS);
+			/* ABORT driver initialization */
+			goto ABORT;
+		}
+	}
+
+	for (int i = 0; i < 2; i++) {
+		if (bus_dma_tag_create(sc->mrsas_parent_tag,
+		    4, 0,
+		    BUS_SPACE_MAXADDR_32BIT,
+		    BUS_SPACE_MAXADDR,
+		    NULL, NULL,
+		    sc->max_map_sz,
+		    1,
+		    sc->max_map_sz,
+		    BUS_DMA_ALLOCNOW,
+		    NULL, NULL,
+		    &sc->raidmap_tag[i])) {
+			device_printf(sc->mrsas_dev,
+			    "Cannot allocate raid map tag.\n");
+			return (ENOMEM);
+		}
+		if (bus_dmamem_alloc(sc->raidmap_tag[i],
+		    (void **)&sc->raidmap_mem[i],
+		    BUS_DMA_NOWAIT, &sc->raidmap_dmamap[i])) {
+			device_printf(sc->mrsas_dev,
+			    "Cannot allocate raidmap memory.\n");
+			return (ENOMEM);
+		}
+		bzero(sc->raidmap_mem[i], sc->max_map_sz);
+
+		if (bus_dmamap_load(sc->raidmap_tag[i], sc->raidmap_dmamap[i],
+		    sc->raidmap_mem[i], sc->max_map_sz,
+		    mrsas_addr_cb, &sc->raidmap_phys_addr[i],
+		    BUS_DMA_NOWAIT)) {
+			device_printf(sc->mrsas_dev, "Cannot load raidmap memory.\n");
+			return (ENOMEM);
+		}
+		if (!sc->raidmap_mem[i]) {
+			device_printf(sc->mrsas_dev,
+			    "Cannot allocate memory for raid map.\n");
+			return (ENOMEM);
+		}
+	}
+
+	if (!mrsas_get_map_info(sc))
+		mrsas_sync_map_info(sc);
+
+	return (0);
+
+ABORT:
+	return (1);
+}
+
+/**
+ * megasas_setup_jbod_map -	setup jbod map for FP seq_number.
+ * @sc:				Adapter soft state
+ *
+ * Return 0 on success.
+ */
+void
+megasas_setup_jbod_map(struct mrsas_softc *sc)
+{
+	int i;
+	uint32_t pd_seq_map_sz;
+
+	pd_seq_map_sz = sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) +
+	    (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1));
+
+	if (!sc->ctrl_info->adapterOperations3.useSeqNumJbodFP) {
+		sc->use_seqnum_jbod_fp = 0;
+		return;
+	}
+	if (sc->jbodmap_mem[0])
+		goto skip_alloc;
+
+	for (i = 0; i < 2; i++) {
+		if (bus_dma_tag_create(sc->mrsas_parent_tag,
+		    4, 0,
+		    BUS_SPACE_MAXADDR_32BIT,
+		    BUS_SPACE_MAXADDR,
+		    NULL, NULL,
+		    pd_seq_map_sz,
+		    1,
+		    pd_seq_map_sz,
+		    BUS_DMA_ALLOCNOW,
+		    NULL, NULL,
+		    &sc->jbodmap_tag[i])) {
+			device_printf(sc->mrsas_dev,
+			    "Cannot allocate jbod map tag.\n");
+			return;
+		}
+		if (bus_dmamem_alloc(sc->jbodmap_tag[i],
+		    (void **)&sc->jbodmap_mem[i],
+		    BUS_DMA_NOWAIT, &sc->jbodmap_dmamap[i])) {
+			device_printf(sc->mrsas_dev,
+			    "Cannot allocate jbod map memory.\n");
+			return;
+		}
+		bzero(sc->jbodmap_mem[i], pd_seq_map_sz);
+
+		if (bus_dmamap_load(sc->jbodmap_tag[i], sc->jbodmap_dmamap[i],
+		    sc->jbodmap_mem[i], pd_seq_map_sz,
+		    mrsas_addr_cb, &sc->jbodmap_phys_addr[i],
+		    BUS_DMA_NOWAIT)) {
+			device_printf(sc->mrsas_dev, "Cannot load jbod map memory.\n");
+			return;
+		}
+		if (!sc->jbodmap_mem[i]) {
+			device_printf(sc->mrsas_dev,
+			    "Cannot allocate memory for jbod map.\n");
+			sc->use_seqnum_jbod_fp = 0;
+			return;
+		}
+	}
+
+skip_alloc:
+	if (!megasas_sync_pd_seq_num(sc, false) &&
+	    !megasas_sync_pd_seq_num(sc, true))
+		sc->use_seqnum_jbod_fp = 1;
+	else
+		sc->use_seqnum_jbod_fp = 0;
+
+	device_printf(sc->mrsas_dev, "Jbod map is supported\n");
+}
+
+/*
+ * mrsas_init_fw:	Initialize Firmware
+ * input:			Adapter soft state
+ *
+ * Calls transition_to_ready() to make sure Firmware is in operational state and
+ * calls mrsas_init_adapter() to send IOC_INIT command to Firmware.  It
+ * issues internal commands to get the controller info after the IOC_INIT
+ * command response is received by Firmware.  Note:  code relating to
+ * get_pdlist, get_ld_list and max_sectors are currently not being used, it
+ * is left here as placeholder.
+ */
+static int
+mrsas_init_fw(struct mrsas_softc *sc)
+{
+
+	int ret, loop, ocr = 0;
+	u_int32_t max_sectors_1;
+	u_int32_t max_sectors_2;
+	u_int32_t tmp_sectors;
+	u_int32_t scratch_pad_2;
+	int msix_enable = 0;
+	int fw_msix_count = 0;
+
+	/* Make sure Firmware is ready */
+	ret = mrsas_transition_to_ready(sc, ocr);
+	if (ret != SUCCESS) {
+		return (ret);
+	}
+	/* MSI-x index 0- reply post host index register */
+	sc->msix_reg_offset[0] = MPI2_REPLY_POST_HOST_INDEX_OFFSET;
+	/* Check if MSI-X is supported while in ready state */
+	msix_enable = (mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)) & 0x4000000) >> 0x1a;
+
+	if (msix_enable) {
+		scratch_pad_2 = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+		    outbound_scratch_pad_2));
+
+		/* Check max MSI-X vectors */
+		if (sc->device_id == MRSAS_TBOLT) {
+			sc->msix_vectors = (scratch_pad_2
+			    & MR_MAX_REPLY_QUEUES_OFFSET) + 1;
+			fw_msix_count = sc->msix_vectors;
+		} else {
+			/* Invader/Fury supports 96 MSI-X vectors */
+			sc->msix_vectors = ((scratch_pad_2
+			    & MR_MAX_REPLY_QUEUES_EXT_OFFSET)
+			    >> MR_MAX_REPLY_QUEUES_EXT_OFFSET_SHIFT) + 1;
+			fw_msix_count = sc->msix_vectors;
+
+			for (loop = 1; loop < MR_MAX_MSIX_REG_ARRAY;
+			    loop++) {
+				sc->msix_reg_offset[loop] =
+				    MPI2_SUP_REPLY_POST_HOST_INDEX_OFFSET +
+				    (loop * 0x10);
+			}
+		}
+
+		/* Don't bother allocating more MSI-X vectors than cpus */
+		sc->msix_vectors = min(sc->msix_vectors,
+		    mp_ncpus);
+
+		/* Allocate MSI-x vectors */
+		if (mrsas_allocate_msix(sc) == SUCCESS)
+			sc->msix_enable = 1;
+		else
+			sc->msix_enable = 0;
+
+		device_printf(sc->mrsas_dev, "FW supports <%d> MSIX vector,"
+		    "Online CPU %d Current MSIX <%d>\n",
+		    fw_msix_count, mp_ncpus, sc->msix_vectors);
+	}
+	if (mrsas_init_adapter(sc) != SUCCESS) {
+		device_printf(sc->mrsas_dev, "Adapter initialize Fail.\n");
+		return (1);
+	}
+	/* Allocate internal commands for pass-thru */
+	if (mrsas_alloc_mfi_cmds(sc) != SUCCESS) {
+		device_printf(sc->mrsas_dev, "Allocate MFI cmd failed.\n");
+		return (1);
+	}
+	sc->ctrl_info = malloc(sizeof(struct mrsas_ctrl_info), M_MRSAS, M_NOWAIT);
+	if (!sc->ctrl_info) {
+		device_printf(sc->mrsas_dev, "Malloc for ctrl_info failed.\n");
+		return (1);
+	}
+	/*
+	 * Get the controller info from FW, so that the MAX VD support
+	 * availability can be decided.
+	 */
+	if (mrsas_get_ctrl_info(sc)) {
+		device_printf(sc->mrsas_dev, "Unable to get FW ctrl_info.\n");
+		return (1);
+	}
+	sc->secure_jbod_support =
+	    (u_int8_t)sc->ctrl_info->adapterOperations3.supportSecurityonJBOD;
+
+	if (sc->secure_jbod_support)
+		device_printf(sc->mrsas_dev, "FW supports SED \n");
+
+	if (sc->use_seqnum_jbod_fp)
+		device_printf(sc->mrsas_dev, "FW supports JBOD Map \n");
+
+	if (mrsas_setup_raidmap(sc) != SUCCESS) {
+		device_printf(sc->mrsas_dev, "Error: RAID map setup FAILED !!! "
+		    "There seems to be some problem in the controller\n"
+		    "Please contact to the SUPPORT TEAM if the problem persists\n");
+	}
+	megasas_setup_jbod_map(sc);
+
+	/* For pass-thru, get PD/LD list and controller info */
+	memset(sc->pd_list, 0,
+	    MRSAS_MAX_PD * sizeof(struct mrsas_pd_list));
+	if (mrsas_get_pd_list(sc) != SUCCESS) {
+		device_printf(sc->mrsas_dev, "Get PD list failed.\n");
+		return (1);
+	}
+	memset(sc->ld_ids, 0xff, MRSAS_MAX_LD_IDS);
+	if (mrsas_get_ld_list(sc) != SUCCESS) {
+		device_printf(sc->mrsas_dev, "Get LD lsit failed.\n");
+		return (1);
+	}
+	/*
+	 * Compute the max allowed sectors per IO: The controller info has
+	 * two limits on max sectors. Driver should use the minimum of these
+	 * two.
+	 *
+	 * 1 << stripe_sz_ops.min = max sectors per strip
+	 *
+	 * Note that older firmwares ( < FW ver 30) didn't report information to
+	 * calculate max_sectors_1. So the number ended up as zero always.
+	 */
+	tmp_sectors = 0;
+	max_sectors_1 = (1 << sc->ctrl_info->stripe_sz_ops.min) *
+	    sc->ctrl_info->max_strips_per_io;
+	max_sectors_2 = sc->ctrl_info->max_request_size;
+	tmp_sectors = min(max_sectors_1, max_sectors_2);
+	sc->max_sectors_per_req = sc->max_num_sge * MRSAS_PAGE_SIZE / 512;
+
+	if (tmp_sectors && (sc->max_sectors_per_req > tmp_sectors))
+		sc->max_sectors_per_req = tmp_sectors;
+
+	sc->disableOnlineCtrlReset =
+	    sc->ctrl_info->properties.OnOffProperties.disableOnlineCtrlReset;
+	sc->UnevenSpanSupport =
+	    sc->ctrl_info->adapterOperations2.supportUnevenSpans;
+	if (sc->UnevenSpanSupport) {
+		device_printf(sc->mrsas_dev, "FW supports: UnevenSpanSupport=%x\n\n",
+		    sc->UnevenSpanSupport);
+
+		if (MR_ValidateMapInfo(sc))
+			sc->fast_path_io = 1;
+		else
+			sc->fast_path_io = 0;
+	}
+	return (0);
+}
+
+/*
+ * mrsas_init_adapter:	Initializes the adapter/controller
+ * input:				Adapter soft state
+ *
+ * Prepares for the issuing of the IOC Init cmd to FW for initializing the
+ * ROC/controller.  The FW register is read to determined the number of
+ * commands that is supported.  All memory allocations for IO is based on
+ * max_cmd.  Appropriate calculations are performed in this function.
+ */
+int
+mrsas_init_adapter(struct mrsas_softc *sc)
+{
+	uint32_t status;
+	u_int32_t max_cmd, scratch_pad_2;
+	int ret;
+	int i = 0;
+
+	/* Read FW status register */
+	status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad));
+
+	/* Get operational params from status register */
+	sc->max_fw_cmds = status & MRSAS_FWSTATE_MAXCMD_MASK;
+
+	/* Decrement the max supported by 1, to correlate with FW */
+	sc->max_fw_cmds = sc->max_fw_cmds - 1;
+	max_cmd = sc->max_fw_cmds;
+
+	/* Determine allocation size of command frames */
+	sc->reply_q_depth = ((max_cmd + 1 + 15) / 16 * 16) * 2;
+	sc->request_alloc_sz = sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION) * max_cmd;
+	sc->reply_alloc_sz = sizeof(MPI2_REPLY_DESCRIPTORS_UNION) * (sc->reply_q_depth);
+	sc->io_frames_alloc_sz = MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE + (MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE * (max_cmd + 1));
+	scratch_pad_2 = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+	    outbound_scratch_pad_2));
+	/*
+	 * If scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK is set,
+	 * Firmware support extended IO chain frame which is 4 time more
+	 * than legacy Firmware. Legacy Firmware - Frame size is (8 * 128) =
+	 * 1K 1M IO Firmware  - Frame size is (8 * 128 * 4)  = 4K
+	 */
+	if (scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK)
+		sc->max_chain_frame_sz =
+		    ((scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_MASK) >> 5)
+		    * MEGASAS_1MB_IO;
+	else
+		sc->max_chain_frame_sz =
+		    ((scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_MASK) >> 5)
+		    * MEGASAS_256K_IO;
+
+	sc->chain_frames_alloc_sz = sc->max_chain_frame_sz * max_cmd;
+	sc->max_sge_in_main_msg = (MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE -
+	    offsetof(MRSAS_RAID_SCSI_IO_REQUEST, SGL)) / 16;
+
+	sc->max_sge_in_chain = sc->max_chain_frame_sz / sizeof(MPI2_SGE_IO_UNION);
+	sc->max_num_sge = sc->max_sge_in_main_msg + sc->max_sge_in_chain - 2;
+
+	mrsas_dprint(sc, MRSAS_INFO, "Avago Debug: MAX sge 0x%X MAX chain frame size 0x%X \n",
+	    sc->max_num_sge, sc->max_chain_frame_sz);
+
+	/* Used for pass thru MFI frame (DCMD) */
+	sc->chain_offset_mfi_pthru = offsetof(MRSAS_RAID_SCSI_IO_REQUEST, SGL) / 16;
+
+	sc->chain_offset_io_request = (MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE -
+	    sizeof(MPI2_SGE_IO_UNION)) / 16;
+
+	int count = sc->msix_vectors > 0 ? sc->msix_vectors : 1;
+
+	for (i = 0; i < count; i++)
+		sc->last_reply_idx[i] = 0;
+
+	ret = mrsas_alloc_mem(sc);
+	if (ret != SUCCESS)
+		return (ret);
+
+	ret = mrsas_alloc_mpt_cmds(sc);
+	if (ret != SUCCESS)
+		return (ret);
+
+	ret = mrsas_ioc_init(sc);
+	if (ret != SUCCESS)
+		return (ret);
+
+	return (0);
+}
+
+/*
+ * mrsas_alloc_ioc_cmd:	Allocates memory for IOC Init command
+ * input:				Adapter soft state
+ *
+ * Allocates for the IOC Init cmd to FW to initialize the ROC/controller.
+ */
+int
+mrsas_alloc_ioc_cmd(struct mrsas_softc *sc)
+{
+	int ioc_init_size;
+
+	/* Allocate IOC INIT command */
+	ioc_init_size = 1024 + sizeof(MPI2_IOC_INIT_REQUEST);
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    1, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    ioc_init_size,
+	    1,
+	    ioc_init_size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->ioc_init_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate ioc init tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(sc->ioc_init_tag, (void **)&sc->ioc_init_mem,
+	    BUS_DMA_NOWAIT, &sc->ioc_init_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate ioc init cmd mem\n");
+		return (ENOMEM);
+	}
+	bzero(sc->ioc_init_mem, ioc_init_size);
+	if (bus_dmamap_load(sc->ioc_init_tag, sc->ioc_init_dmamap,
+	    sc->ioc_init_mem, ioc_init_size, mrsas_addr_cb,
+	    &sc->ioc_init_phys_mem, BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load ioc init cmd mem\n");
+		return (ENOMEM);
+	}
+	return (0);
+}
+
+/*
+ * mrsas_free_ioc_cmd:	Allocates memory for IOC Init command
+ * input:				Adapter soft state
+ *
+ * Deallocates memory of the IOC Init cmd.
+ */
+void
+mrsas_free_ioc_cmd(struct mrsas_softc *sc)
+{
+	if (sc->ioc_init_phys_mem)
+		bus_dmamap_unload(sc->ioc_init_tag, sc->ioc_init_dmamap);
+	if (sc->ioc_init_mem != NULL)
+		bus_dmamem_free(sc->ioc_init_tag, sc->ioc_init_mem, sc->ioc_init_dmamap);
+	if (sc->ioc_init_tag != NULL)
+		bus_dma_tag_destroy(sc->ioc_init_tag);
+}
+
+/*
+ * mrsas_ioc_init:	Sends IOC Init command to FW
+ * input:			Adapter soft state
+ *
+ * Issues the IOC Init cmd to FW to initialize the ROC/controller.
+ */
+int
+mrsas_ioc_init(struct mrsas_softc *sc)
+{
+	struct mrsas_init_frame *init_frame;
+	pMpi2IOCInitRequest_t IOCInitMsg;
+	MRSAS_REQUEST_DESCRIPTOR_UNION req_desc;
+	u_int8_t max_wait = MRSAS_IOC_INIT_WAIT_TIME;
+	bus_addr_t phys_addr;
+	int i, retcode = 0;
+	u_int32_t scratch_pad_2;
+
+	/* Allocate memory for the IOC INIT command */
+	if (mrsas_alloc_ioc_cmd(sc)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate IOC command.\n");
+		return (1);
+	}
+
+	if (!sc->block_sync_cache) {
+		scratch_pad_2 = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+		    outbound_scratch_pad_2));
+		sc->fw_sync_cache_support = (scratch_pad_2 &
+		    MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0;
+	}
+
+	IOCInitMsg = (pMpi2IOCInitRequest_t)(((char *)sc->ioc_init_mem) + 1024);
+	IOCInitMsg->Function = MPI2_FUNCTION_IOC_INIT;
+	IOCInitMsg->WhoInit = MPI2_WHOINIT_HOST_DRIVER;
+	IOCInitMsg->MsgVersion = MPI2_VERSION;
+	IOCInitMsg->HeaderVersion = MPI2_HEADER_VERSION;
+	IOCInitMsg->SystemRequestFrameSize = MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE / 4;
+	IOCInitMsg->ReplyDescriptorPostQueueDepth = sc->reply_q_depth;
+	IOCInitMsg->ReplyDescriptorPostQueueAddress = sc->reply_desc_phys_addr;
+	IOCInitMsg->SystemRequestFrameBaseAddress = sc->io_request_phys_addr;
+	IOCInitMsg->HostMSIxVectors = (sc->msix_vectors > 0 ? sc->msix_vectors : 0);
+
+	init_frame = (struct mrsas_init_frame *)sc->ioc_init_mem;
+	init_frame->cmd = MFI_CMD_INIT;
+	init_frame->cmd_status = 0xFF;
+	init_frame->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
+
+	/* driver support Extended MSIX */
+		if (sc->mrsas_gen3_ctrl) {
+		init_frame->driver_operations.
+		    mfi_capabilities.support_additional_msix = 1;
+	}
+	if (sc->verbuf_mem) {
+		snprintf((char *)sc->verbuf_mem, strlen(MRSAS_VERSION) + 2, "%s\n",
+		    MRSAS_VERSION);
+		init_frame->driver_ver_lo = (bus_addr_t)sc->verbuf_phys_addr;
+		init_frame->driver_ver_hi = 0;
+	}
+	init_frame->driver_operations.mfi_capabilities.support_ndrive_r1_lb = 1;
+	init_frame->driver_operations.mfi_capabilities.support_max_255lds = 1;
+	init_frame->driver_operations.mfi_capabilities.security_protocol_cmds_fw = 1;
+	if (sc->max_chain_frame_sz > MEGASAS_CHAIN_FRAME_SZ_MIN)
+		init_frame->driver_operations.mfi_capabilities.support_ext_io_size = 1;
+	phys_addr = (bus_addr_t)sc->ioc_init_phys_mem + 1024;
+	init_frame->queue_info_new_phys_addr_lo = phys_addr;
+	init_frame->data_xfer_len = sizeof(Mpi2IOCInitRequest_t);
+
+	req_desc.addr.Words = (bus_addr_t)sc->ioc_init_phys_mem;
+	req_desc.MFAIo.RequestFlags =
+	    (MRSAS_REQ_DESCRIPT_FLAGS_MFA << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+
+	mrsas_disable_intr(sc);
+	mrsas_dprint(sc, MRSAS_OCR, "Issuing IOC INIT command to FW.\n");
+	mrsas_fire_cmd(sc, req_desc.addr.u.low, req_desc.addr.u.high);
+
+	/*
+	 * Poll response timer to wait for Firmware response.  While this
+	 * timer with the DELAY call could block CPU, the time interval for
+	 * this is only 1 millisecond.
+	 */
+	if (init_frame->cmd_status == 0xFF) {
+		for (i = 0; i < (max_wait * 1000); i++) {
+			if (init_frame->cmd_status == 0xFF)
+				DELAY(1000);
+			else
+				break;
+		}
+	}
+	if (init_frame->cmd_status == 0)
+		mrsas_dprint(sc, MRSAS_OCR,
+		    "IOC INIT response received from FW.\n");
+	else {
+		if (init_frame->cmd_status == 0xFF)
+			device_printf(sc->mrsas_dev, "IOC Init timed out after %d seconds.\n", max_wait);
+		else
+			device_printf(sc->mrsas_dev, "IOC Init failed, status = 0x%x\n", init_frame->cmd_status);
+		retcode = 1;
+	}
+
+	mrsas_free_ioc_cmd(sc);
+	return (retcode);
+}
+
+/*
+ * mrsas_alloc_mpt_cmds:	Allocates the command packets
+ * input:					Adapter instance soft state
+ *
+ * This function allocates the internal commands for IOs. Each command that is
+ * issued to FW is wrapped in a local data structure called mrsas_mpt_cmd. An
+ * array is allocated with mrsas_mpt_cmd context.  The free commands are
+ * maintained in a linked list (cmd pool). SMID value range is from 1 to
+ * max_fw_cmds.
+ */
+int
+mrsas_alloc_mpt_cmds(struct mrsas_softc *sc)
+{
+	int i, j;
+	u_int32_t max_cmd, count;
+	struct mrsas_mpt_cmd *cmd;
+	pMpi2ReplyDescriptorsUnion_t reply_desc;
+	u_int32_t offset, chain_offset, sense_offset;
+	bus_addr_t io_req_base_phys, chain_frame_base_phys, sense_base_phys;
+	u_int8_t *io_req_base, *chain_frame_base, *sense_base;
+
+	max_cmd = sc->max_fw_cmds;
+
+	sc->req_desc = malloc(sc->request_alloc_sz, M_MRSAS, M_NOWAIT);
+	if (!sc->req_desc) {
+		device_printf(sc->mrsas_dev, "Out of memory, cannot alloc req desc\n");
+		return (ENOMEM);
+	}
+	memset(sc->req_desc, 0, sc->request_alloc_sz);
+
+	/*
+	 * sc->mpt_cmd_list is an array of struct mrsas_mpt_cmd pointers.
+	 * Allocate the dynamic array first and then allocate individual
+	 * commands.
+	 */
+	sc->mpt_cmd_list = malloc(sizeof(struct mrsas_mpt_cmd *) * max_cmd, M_MRSAS, M_NOWAIT);
+	if (!sc->mpt_cmd_list) {
+		device_printf(sc->mrsas_dev, "Cannot alloc memory for mpt_cmd_list.\n");
+		return (ENOMEM);
+	}
+	memset(sc->mpt_cmd_list, 0, sizeof(struct mrsas_mpt_cmd *) * max_cmd);
+	for (i = 0; i < max_cmd; i++) {
+		sc->mpt_cmd_list[i] = malloc(sizeof(struct mrsas_mpt_cmd),
+		    M_MRSAS, M_NOWAIT);
+		if (!sc->mpt_cmd_list[i]) {
+			for (j = 0; j < i; j++)
+				free(sc->mpt_cmd_list[j], M_MRSAS);
+			free(sc->mpt_cmd_list, M_MRSAS);
+			sc->mpt_cmd_list = NULL;
+			return (ENOMEM);
+		}
+	}
+
+	io_req_base = (u_int8_t *)sc->io_request_mem + MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE;
+	io_req_base_phys = (bus_addr_t)sc->io_request_phys_addr + MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE;
+	chain_frame_base = (u_int8_t *)sc->chain_frame_mem;
+	chain_frame_base_phys = (bus_addr_t)sc->chain_frame_phys_addr;
+	sense_base = (u_int8_t *)sc->sense_mem;
+	sense_base_phys = (bus_addr_t)sc->sense_phys_addr;
+	for (i = 0; i < max_cmd; i++) {
+		cmd = sc->mpt_cmd_list[i];
+		offset = MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE * i;
+		chain_offset = sc->max_chain_frame_sz * i;
+		sense_offset = MRSAS_SENSE_LEN * i;
+		memset(cmd, 0, sizeof(struct mrsas_mpt_cmd));
+		cmd->index = i + 1;
+		cmd->ccb_ptr = NULL;
+		callout_init_mtx(&cmd->cm_callout, &sc->sim_lock, 0);
+		cmd->sync_cmd_idx = (u_int32_t)MRSAS_ULONG_MAX;
+		cmd->sc = sc;
+		cmd->io_request = (MRSAS_RAID_SCSI_IO_REQUEST *) (io_req_base + offset);
+		memset(cmd->io_request, 0, sizeof(MRSAS_RAID_SCSI_IO_REQUEST));
+		cmd->io_request_phys_addr = io_req_base_phys + offset;
+		cmd->chain_frame = (MPI2_SGE_IO_UNION *) (chain_frame_base + chain_offset);
+		cmd->chain_frame_phys_addr = chain_frame_base_phys + chain_offset;
+		cmd->sense = sense_base + sense_offset;
+		cmd->sense_phys_addr = sense_base_phys + sense_offset;
+		if (bus_dmamap_create(sc->data_tag, 0, &cmd->data_dmamap)) {
+			return (FAIL);
+		}
+		TAILQ_INSERT_TAIL(&(sc->mrsas_mpt_cmd_list_head), cmd, next);
+	}
+
+	/* Initialize reply descriptor array to 0xFFFFFFFF */
+	reply_desc = sc->reply_desc_mem;
+	count = sc->msix_vectors > 0 ? sc->msix_vectors : 1;
+	for (i = 0; i < sc->reply_q_depth * count; i++, reply_desc++) {
+		reply_desc->Words = MRSAS_ULONG_MAX;
+	}
+	return (0);
+}
+
+/*
+ * mrsas_fire_cmd:	Sends command to FW
+ * input:			Adapter softstate
+ * 					request descriptor address low
+ * 					request descriptor address high
+ *
+ * This functions fires the command to Firmware by writing to the
+ * inbound_low_queue_port and inbound_high_queue_port.
+ */
+void
+mrsas_fire_cmd(struct mrsas_softc *sc, u_int32_t req_desc_lo,
+    u_int32_t req_desc_hi)
+{
+	mtx_lock(&sc->pci_lock);
+	mrsas_write_reg(sc, offsetof(mrsas_reg_set, inbound_low_queue_port),
+	    req_desc_lo);
+	mrsas_write_reg(sc, offsetof(mrsas_reg_set, inbound_high_queue_port),
+	    req_desc_hi);
+	mtx_unlock(&sc->pci_lock);
+}
+
+/*
+ * mrsas_transition_to_ready:  Move FW to Ready state input:
+ * Adapter instance soft state
+ *
+ * During the initialization, FW passes can potentially be in any one of several
+ * possible states. If the FW in operational, waiting-for-handshake states,
+ * driver must take steps to bring it to ready state. Otherwise, it has to
+ * wait for the ready state.
+ */
+int
+mrsas_transition_to_ready(struct mrsas_softc *sc, int ocr)
+{
+	int i;
+	u_int8_t max_wait;
+	u_int32_t val, fw_state;
+	u_int32_t cur_state;
+	u_int32_t abs_state, curr_abs_state;
+
+	val = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad));
+	fw_state = val & MFI_STATE_MASK;
+	max_wait = MRSAS_RESET_WAIT_TIME;
+
+	if (fw_state != MFI_STATE_READY)
+		device_printf(sc->mrsas_dev, "Waiting for FW to come to ready state\n");
+
+	while (fw_state != MFI_STATE_READY) {
+		abs_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad));
+		switch (fw_state) {
+		case MFI_STATE_FAULT:
+			device_printf(sc->mrsas_dev, "FW is in FAULT state!!\n");
+			if (ocr) {
+				cur_state = MFI_STATE_FAULT;
+				break;
+			} else
+				return -ENODEV;
+		case MFI_STATE_WAIT_HANDSHAKE:
+			/* Set the CLR bit in inbound doorbell */
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, doorbell),
+			    MFI_INIT_CLEAR_HANDSHAKE | MFI_INIT_HOTPLUG);
+			cur_state = MFI_STATE_WAIT_HANDSHAKE;
+			break;
+		case MFI_STATE_BOOT_MESSAGE_PENDING:
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, doorbell),
+			    MFI_INIT_HOTPLUG);
+			cur_state = MFI_STATE_BOOT_MESSAGE_PENDING;
+			break;
+		case MFI_STATE_OPERATIONAL:
+			/*
+			 * Bring it to READY state; assuming max wait 10
+			 * secs
+			 */
+			mrsas_disable_intr(sc);
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, doorbell), MFI_RESET_FLAGS);
+			for (i = 0; i < max_wait * 1000; i++) {
+				if (mrsas_read_reg(sc, offsetof(mrsas_reg_set, doorbell)) & 1)
+					DELAY(1000);
+				else
+					break;
+			}
+			cur_state = MFI_STATE_OPERATIONAL;
+			break;
+		case MFI_STATE_UNDEFINED:
+			/*
+			 * This state should not last for more than 2
+			 * seconds
+			 */
+			cur_state = MFI_STATE_UNDEFINED;
+			break;
+		case MFI_STATE_BB_INIT:
+			cur_state = MFI_STATE_BB_INIT;
+			break;
+		case MFI_STATE_FW_INIT:
+			cur_state = MFI_STATE_FW_INIT;
+			break;
+		case MFI_STATE_FW_INIT_2:
+			cur_state = MFI_STATE_FW_INIT_2;
+			break;
+		case MFI_STATE_DEVICE_SCAN:
+			cur_state = MFI_STATE_DEVICE_SCAN;
+			break;
+		case MFI_STATE_FLUSH_CACHE:
+			cur_state = MFI_STATE_FLUSH_CACHE;
+			break;
+		default:
+			device_printf(sc->mrsas_dev, "Unknown state 0x%x\n", fw_state);
+			return -ENODEV;
+		}
+
+		/*
+		 * The cur_state should not last for more than max_wait secs
+		 */
+		for (i = 0; i < (max_wait * 1000); i++) {
+			fw_state = (mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+			    outbound_scratch_pad)) & MFI_STATE_MASK);
+			curr_abs_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+			    outbound_scratch_pad));
+			if (abs_state == curr_abs_state)
+				DELAY(1000);
+			else
+				break;
+		}
+
+		/*
+		 * Return error if fw_state hasn't changed after max_wait
+		 */
+		if (curr_abs_state == abs_state) {
+			device_printf(sc->mrsas_dev, "FW state [%d] hasn't changed "
+			    "in %d secs\n", fw_state, max_wait);
+			return -ENODEV;
+		}
+	}
+	mrsas_dprint(sc, MRSAS_OCR, "FW now in Ready state\n");
+	return 0;
+}
+
+/*
+ * mrsas_get_mfi_cmd:	Get a cmd from free command pool
+ * input:				Adapter soft state
+ *
+ * This function removes an MFI command from the command list.
+ */
+struct mrsas_mfi_cmd *
+mrsas_get_mfi_cmd(struct mrsas_softc *sc)
+{
+	struct mrsas_mfi_cmd *cmd = NULL;
+
+	mtx_lock(&sc->mfi_cmd_pool_lock);
+	if (!TAILQ_EMPTY(&sc->mrsas_mfi_cmd_list_head)) {
+		cmd = TAILQ_FIRST(&sc->mrsas_mfi_cmd_list_head);
+		TAILQ_REMOVE(&sc->mrsas_mfi_cmd_list_head, cmd, next);
+	}
+	mtx_unlock(&sc->mfi_cmd_pool_lock);
+
+	return cmd;
+}
+
+/*
+ * mrsas_ocr_thread:	Thread to handle OCR/Kill Adapter.
+ * input:				Adapter Context.
+ *
+ * This function will check FW status register and flag do_timeout_reset flag.
+ * It will do OCR/Kill adapter if FW is in fault state or IO timed out has
+ * trigger reset.
+ */
+static void
+mrsas_ocr_thread(void *arg)
+{
+	struct mrsas_softc *sc;
+	u_int32_t fw_status, fw_state;
+	u_int8_t tm_target_reset_failed = 0;
+
+	sc = (struct mrsas_softc *)arg;
+
+	mrsas_dprint(sc, MRSAS_TRACE, "%s\n", __func__);
+
+	sc->ocr_thread_active = 1;
+	mtx_lock(&sc->sim_lock);
+	for (;;) {
+		/* Sleep for 1 second and check the queue status */
+		msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO,
+		    "mrsas_ocr", sc->mrsas_fw_fault_check_delay * hz);
+		if (sc->remove_in_progress ||
+		    sc->adprecovery == MRSAS_HW_CRITICAL_ERROR) {
+			mrsas_dprint(sc, MRSAS_OCR,
+			    "Exit due to %s from %s\n",
+			    sc->remove_in_progress ? "Shutdown" :
+			    "Hardware critical error", __func__);
+			break;
+		}
+		fw_status = mrsas_read_reg(sc,
+		    offsetof(mrsas_reg_set, outbound_scratch_pad));
+		fw_state = fw_status & MFI_STATE_MASK;
+		if (fw_state == MFI_STATE_FAULT || sc->do_timedout_reset ||
+			mrsas_atomic_read(&sc->target_reset_outstanding)) {
+
+			/* First, freeze further IOs to come to the SIM */
+			mrsas_xpt_freeze(sc);
+
+			/* If this is an IO timeout then go for target reset */
+			if (mrsas_atomic_read(&sc->target_reset_outstanding)) {
+				device_printf(sc->mrsas_dev, "Initiating Target RESET "
+				    "because of SCSI IO timeout!\n");
+
+				/* Let the remaining IOs to complete */
+				msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO,
+				      "mrsas_reset_targets", 5 * hz);
+
+				/* Try to reset the target device */
+				if (mrsas_reset_targets(sc) == FAIL)
+					tm_target_reset_failed = 1;
+			}
+
+			/* If this is a DCMD timeout or FW fault,
+			 * then go for controller reset
+			 */
+			if (fw_state == MFI_STATE_FAULT || tm_target_reset_failed ||
+			    (sc->do_timedout_reset == MFI_DCMD_TIMEOUT_OCR)) {
+				if (tm_target_reset_failed)
+					device_printf(sc->mrsas_dev, "Initiaiting OCR because of "
+					    "TM FAILURE!\n");
+				else
+					device_printf(sc->mrsas_dev, "Initiaiting OCR "
+						"because of %s!\n", sc->do_timedout_reset ?
+						"DCMD IO Timeout" : "FW fault");
+
+				mtx_lock_spin(&sc->ioctl_lock);
+				sc->reset_in_progress = 1;
+				mtx_unlock_spin(&sc->ioctl_lock);
+				sc->reset_count++;
+				
+				/*
+				 * Wait for the AEN task to be completed if it is running.
+				 */
+				mtx_unlock(&sc->sim_lock);
+				taskqueue_drain(sc->ev_tq, &sc->ev_task);
+				mtx_lock(&sc->sim_lock);
+
+				taskqueue_block(sc->ev_tq);
+				/* Try to reset the controller */
+				mrsas_reset_ctrl(sc, sc->do_timedout_reset);
+
+				sc->do_timedout_reset = 0;
+				sc->reset_in_progress = 0;
+				tm_target_reset_failed = 0;
+				mrsas_atomic_set(&sc->target_reset_outstanding, 0);
+				memset(sc->target_reset_pool, 0,
+				    sizeof(sc->target_reset_pool));
+				taskqueue_unblock(sc->ev_tq);
+			}
+
+			/* Now allow IOs to come to the SIM */
+			 mrsas_xpt_release(sc);
+		}
+	}
+	mtx_unlock(&sc->sim_lock);
+	sc->ocr_thread_active = 0;
+	mrsas_kproc_exit(0);
+}
+
+/*
+ * mrsas_reset_reply_desc:	Reset Reply descriptor as part of OCR.
+ * input:					Adapter Context.
+ *
+ * This function will clear reply descriptor so that post OCR driver and FW will
+ * lost old history.
+ */
+void
+mrsas_reset_reply_desc(struct mrsas_softc *sc)
+{
+	int i, count;
+	pMpi2ReplyDescriptorsUnion_t reply_desc;
+
+	count = sc->msix_vectors > 0 ? sc->msix_vectors : 1;
+	for (i = 0; i < count; i++)
+		sc->last_reply_idx[i] = 0;
+
+	reply_desc = sc->reply_desc_mem;
+	for (i = 0; i < sc->reply_q_depth; i++, reply_desc++) {
+		reply_desc->Words = MRSAS_ULONG_MAX;
+	}
+}
+
+/*
+ * mrsas_reset_ctrl:	Core function to OCR/Kill adapter.
+ * input:				Adapter Context.
+ *
+ * This function will run from thread context so that it can sleep. 1. Do not
+ * handle OCR if FW is in HW critical error. 2. Wait for outstanding command
+ * to complete for 180 seconds. 3. If #2 does not find any outstanding
+ * command Controller is in working state, so skip OCR. Otherwise, do
+ * OCR/kill Adapter based on flag disableOnlineCtrlReset. 4. Start of the
+ * OCR, return all SCSI command back to CAM layer which has ccb_ptr. 5. Post
+ * OCR, Re-fire Managment command and move Controller to Operation state.
+ */
+int
+mrsas_reset_ctrl(struct mrsas_softc *sc, u_int8_t reset_reason)
+{
+	int retval = SUCCESS, i, j, retry = 0;
+	u_int32_t host_diag, abs_state, status_reg, reset_adapter;
+	union ccb *ccb;
+	struct mrsas_mfi_cmd *mfi_cmd;
+	struct mrsas_mpt_cmd *mpt_cmd;
+	union mrsas_evt_class_locale class_locale;
+	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc;
+
+	if (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR) {
+		device_printf(sc->mrsas_dev,
+		    "mrsas: Hardware critical error, returning FAIL.\n");
+		return FAIL;
+	}
+	mrsas_set_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags);
+	sc->adprecovery = MRSAS_ADPRESET_SM_INFAULT;
+	mrsas_disable_intr(sc);
+	msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO, "mrsas_ocr",
+	    sc->mrsas_fw_fault_check_delay * hz);
+
+	/* First try waiting for commands to complete */
+	if (mrsas_wait_for_outstanding(sc, reset_reason)) {
+		mrsas_dprint(sc, MRSAS_OCR,
+		    "resetting adapter from %s.\n",
+		    __func__);
+		/* Now return commands back to the CAM layer */
+		mtx_unlock(&sc->sim_lock);
+		for (i = 0; i < sc->max_fw_cmds; i++) {
+			mpt_cmd = sc->mpt_cmd_list[i];
+			if (mpt_cmd->ccb_ptr) {
+				ccb = (union ccb *)(mpt_cmd->ccb_ptr);
+				ccb->ccb_h.status = CAM_SCSI_BUS_RESET;
+				mrsas_cmd_done(sc, mpt_cmd);
+				mrsas_atomic_dec(&sc->fw_outstanding);
+			}
+		}
+		mtx_lock(&sc->sim_lock);
+
+		status_reg = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+		    outbound_scratch_pad));
+		abs_state = status_reg & MFI_STATE_MASK;
+		reset_adapter = status_reg & MFI_RESET_ADAPTER;
+		if (sc->disableOnlineCtrlReset ||
+		    (abs_state == MFI_STATE_FAULT && !reset_adapter)) {
+			/* Reset not supported, kill adapter */
+			mrsas_dprint(sc, MRSAS_OCR, "Reset not supported, killing adapter.\n");
+			mrsas_kill_hba(sc);
+			retval = FAIL;
+			goto out;
+		}
+		/* Now try to reset the chip */
+		for (i = 0; i < MRSAS_FUSION_MAX_RESET_TRIES; i++) {
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset),
+			    MPI2_WRSEQ_FLUSH_KEY_VALUE);
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset),
+			    MPI2_WRSEQ_1ST_KEY_VALUE);
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset),
+			    MPI2_WRSEQ_2ND_KEY_VALUE);
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset),
+			    MPI2_WRSEQ_3RD_KEY_VALUE);
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset),
+			    MPI2_WRSEQ_4TH_KEY_VALUE);
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset),
+			    MPI2_WRSEQ_5TH_KEY_VALUE);
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset),
+			    MPI2_WRSEQ_6TH_KEY_VALUE);
+
+			/* Check that the diag write enable (DRWE) bit is on */
+			host_diag = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+			    fusion_host_diag));
+			retry = 0;
+			while (!(host_diag & HOST_DIAG_WRITE_ENABLE)) {
+				DELAY(100 * 1000);
+				host_diag = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+				    fusion_host_diag));
+				if (retry++ == 100) {
+					mrsas_dprint(sc, MRSAS_OCR,
+					    "Host diag unlock failed!\n");
+					break;
+				}
+			}
+			if (!(host_diag & HOST_DIAG_WRITE_ENABLE))
+				continue;
+
+			/* Send chip reset command */
+			mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_host_diag),
+			    host_diag | HOST_DIAG_RESET_ADAPTER);
+			DELAY(3000 * 1000);
+
+			/* Make sure reset adapter bit is cleared */
+			host_diag = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+			    fusion_host_diag));
+			retry = 0;
+			while (host_diag & HOST_DIAG_RESET_ADAPTER) {
+				DELAY(100 * 1000);
+				host_diag = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+				    fusion_host_diag));
+				if (retry++ == 1000) {
+					mrsas_dprint(sc, MRSAS_OCR,
+					    "Diag reset adapter never cleared!\n");
+					break;
+				}
+			}
+			if (host_diag & HOST_DIAG_RESET_ADAPTER)
+				continue;
+
+			abs_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+			    outbound_scratch_pad)) & MFI_STATE_MASK;
+			retry = 0;
+
+			while ((abs_state <= MFI_STATE_FW_INIT) && (retry++ < 1000)) {
+				DELAY(100 * 1000);
+				abs_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+				    outbound_scratch_pad)) & MFI_STATE_MASK;
+			}
+			if (abs_state <= MFI_STATE_FW_INIT) {
+				mrsas_dprint(sc, MRSAS_OCR, "firmware state < MFI_STATE_FW_INIT,"
+				    " state = 0x%x\n", abs_state);
+				continue;
+			}
+			/* Wait for FW to become ready */
+			if (mrsas_transition_to_ready(sc, 1)) {
+				mrsas_dprint(sc, MRSAS_OCR,
+				    "mrsas: Failed to transition controller to ready.\n");
+				continue;
+			}
+			mrsas_reset_reply_desc(sc);
+			if (mrsas_ioc_init(sc)) {
+				mrsas_dprint(sc, MRSAS_OCR, "mrsas_ioc_init() failed!\n");
+				continue;
+			}
+			for (j = 0; j < sc->max_fw_cmds; j++) {
+				mpt_cmd = sc->mpt_cmd_list[j];
+				if (mpt_cmd->sync_cmd_idx != (u_int32_t)MRSAS_ULONG_MAX) {
+					mfi_cmd = sc->mfi_cmd_list[mpt_cmd->sync_cmd_idx];
+					/* If not an IOCTL then release the command else re-fire */
+					if (!mfi_cmd->sync_cmd) {
+						mrsas_release_mfi_cmd(mfi_cmd);
+					} else {
+						req_desc = mrsas_get_request_desc(sc,
+						    mfi_cmd->cmd_id.context.smid - 1);
+						mrsas_dprint(sc, MRSAS_OCR,
+						    "Re-fire command DCMD opcode 0x%x index %d\n ",
+						    mfi_cmd->frame->dcmd.opcode, j);
+						if (!req_desc)
+							device_printf(sc->mrsas_dev, 
+							    "Cannot build MPT cmd.\n");
+						else
+							mrsas_fire_cmd(sc, req_desc->addr.u.low,
+							    req_desc->addr.u.high);
+					}
+				}
+			}
+
+			/* Reset load balance info */
+			memset(sc->load_balance_info, 0,
+			    sizeof(LD_LOAD_BALANCE_INFO) * MAX_LOGICAL_DRIVES_EXT);
+
+			if (mrsas_get_ctrl_info(sc)) {
+				mrsas_kill_hba(sc);
+				retval = FAIL;
+				goto out;
+			}
+			if (!mrsas_get_map_info(sc))
+				mrsas_sync_map_info(sc);
+
+			megasas_setup_jbod_map(sc);
+
+			mrsas_clear_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags);
+			mrsas_enable_intr(sc);
+			sc->adprecovery = MRSAS_HBA_OPERATIONAL;
+
+			/* Register AEN with FW for last sequence number */
+			class_locale.members.reserved = 0;
+			class_locale.members.locale = MR_EVT_LOCALE_ALL;
+			class_locale.members.class = MR_EVT_CLASS_DEBUG;
+
+			mtx_unlock(&sc->sim_lock);
+			if (mrsas_register_aen(sc, sc->last_seq_num,
+			    class_locale.word)) {
+				device_printf(sc->mrsas_dev,
+				    "ERROR: AEN registration FAILED from OCR !!! "
+				    "Further events from the controller cannot be notified."
+				    "Either there is some problem in the controller"
+				    "or the controller does not support AEN.\n"
+				    "Please contact to the SUPPORT TEAM if the problem persists\n");
+			}
+			mtx_lock(&sc->sim_lock);
+
+			/* Adapter reset completed successfully */
+			device_printf(sc->mrsas_dev, "Reset successful\n");
+			retval = SUCCESS;
+			goto out;
+		}
+		/* Reset failed, kill the adapter */
+		device_printf(sc->mrsas_dev, "Reset failed, killing adapter.\n");
+		mrsas_kill_hba(sc);
+		retval = FAIL;
+	} else {
+		mrsas_clear_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags);
+		mrsas_enable_intr(sc);
+		sc->adprecovery = MRSAS_HBA_OPERATIONAL;
+	}
+out:
+	mrsas_clear_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags);
+	mrsas_dprint(sc, MRSAS_OCR,
+	    "Reset Exit with %d.\n", retval);
+	return retval;
+}
+
+/*
+ * mrsas_kill_hba:	Kill HBA when OCR is not supported
+ * input:			Adapter Context.
+ *
+ * This function will kill HBA when OCR is not supported.
+ */
+void
+mrsas_kill_hba(struct mrsas_softc *sc)
+{
+	sc->adprecovery = MRSAS_HW_CRITICAL_ERROR;
+	DELAY(1000 * 1000);
+	mrsas_dprint(sc, MRSAS_OCR, "%s\n", __func__);
+	mrsas_write_reg(sc, offsetof(mrsas_reg_set, doorbell),
+	    MFI_STOP_ADP);
+	/* Flush */
+	mrsas_read_reg(sc, offsetof(mrsas_reg_set, doorbell));
+	mrsas_complete_outstanding_ioctls(sc);
+}
+
+/**
+ * mrsas_complete_outstanding_ioctls	Complete pending IOCTLS after kill_hba
+ * input:			Controller softc
+ *
+ * Returns void
+ */
+void 
+mrsas_complete_outstanding_ioctls(struct mrsas_softc *sc)
+{
+	int i;
+	struct mrsas_mpt_cmd *cmd_mpt;
+	struct mrsas_mfi_cmd *cmd_mfi;
+	u_int32_t count, MSIxIndex;
+
+	count = sc->msix_vectors > 0 ? sc->msix_vectors : 1;
+	for (i = 0; i < sc->max_fw_cmds; i++) {
+		cmd_mpt = sc->mpt_cmd_list[i];
+
+		if (cmd_mpt->sync_cmd_idx != (u_int32_t)MRSAS_ULONG_MAX) {
+			cmd_mfi = sc->mfi_cmd_list[cmd_mpt->sync_cmd_idx];
+			if (cmd_mfi->sync_cmd && cmd_mfi->frame->hdr.cmd != MFI_CMD_ABORT) {
+				for (MSIxIndex = 0; MSIxIndex < count; MSIxIndex++)
+					mrsas_complete_mptmfi_passthru(sc, cmd_mfi,
+					    cmd_mpt->io_request->RaidContext.status);
+			}
+		}
+	}
+}
+
+/*
+ * mrsas_wait_for_outstanding:	Wait for outstanding commands
+ * input:						Adapter Context.
+ *
+ * This function will wait for 180 seconds for outstanding commands to be
+ * completed.
+ */
+int
+mrsas_wait_for_outstanding(struct mrsas_softc *sc, u_int8_t check_reason)
+{
+	int i, outstanding, retval = 0;
+	u_int32_t fw_state, count, MSIxIndex;
+
+
+	for (i = 0; i < MRSAS_RESET_WAIT_TIME; i++) {
+		if (sc->remove_in_progress) {
+			mrsas_dprint(sc, MRSAS_OCR,
+			    "Driver remove or shutdown called.\n");
+			retval = 1;
+			goto out;
+		}
+		/* Check if firmware is in fault state */
+		fw_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set,
+		    outbound_scratch_pad)) & MFI_STATE_MASK;
+		if (fw_state == MFI_STATE_FAULT) {
+			mrsas_dprint(sc, MRSAS_OCR,
+			    "Found FW in FAULT state, will reset adapter.\n");
+			count = sc->msix_vectors > 0 ? sc->msix_vectors : 1;
+			mtx_unlock(&sc->sim_lock);
+			for (MSIxIndex = 0; MSIxIndex < count; MSIxIndex++)
+				mrsas_complete_cmd(sc, MSIxIndex);
+			mtx_lock(&sc->sim_lock);
+			retval = 1;
+			goto out;
+		}
+		if (check_reason == MFI_DCMD_TIMEOUT_OCR) {
+			mrsas_dprint(sc, MRSAS_OCR,
+			    "DCMD IO TIMEOUT detected, will reset adapter.\n");
+			retval = 1;
+			goto out;
+		}
+		outstanding = mrsas_atomic_read(&sc->fw_outstanding);
+		if (!outstanding)
+			goto out;
+
+		if (!(i % MRSAS_RESET_NOTICE_INTERVAL)) {
+			mrsas_dprint(sc, MRSAS_OCR, "[%2d]waiting for %d "
+			    "commands to complete\n", i, outstanding);
+			count = sc->msix_vectors > 0 ? sc->msix_vectors : 1;
+			mtx_unlock(&sc->sim_lock);
+			for (MSIxIndex = 0; MSIxIndex < count; MSIxIndex++)
+				mrsas_complete_cmd(sc, MSIxIndex);
+			mtx_lock(&sc->sim_lock);
+		}
+		DELAY(1000 * 1000);
+	}
+
+	if (mrsas_atomic_read(&sc->fw_outstanding)) {
+		mrsas_dprint(sc, MRSAS_OCR,
+		    " pending commands remain after waiting,"
+		    " will reset adapter.\n");
+		retval = 1;
+	}
+out:
+	return retval;
+}
+
+/*
+ * mrsas_release_mfi_cmd:	Return a cmd to free command pool
+ * input:					Command packet for return to free cmd pool
+ *
+ * This function returns the MFI & MPT command to the command list.
+ */
+void
+mrsas_release_mfi_cmd(struct mrsas_mfi_cmd *cmd_mfi)
+{
+	struct mrsas_softc *sc = cmd_mfi->sc;
+	struct mrsas_mpt_cmd *cmd_mpt;
+
+
+	mtx_lock(&sc->mfi_cmd_pool_lock);
+	/*
+	 * Release the mpt command (if at all it is allocated
+	 * associated with the mfi command
+	 */
+	if (cmd_mfi->cmd_id.context.smid) {
+		mtx_lock(&sc->mpt_cmd_pool_lock);
+		/* Get the mpt cmd from mfi cmd frame's smid value */
+		cmd_mpt = sc->mpt_cmd_list[cmd_mfi->cmd_id.context.smid-1];
+		cmd_mpt->flags = 0;
+		cmd_mpt->sync_cmd_idx = (u_int32_t)MRSAS_ULONG_MAX;
+		TAILQ_INSERT_HEAD(&(sc->mrsas_mpt_cmd_list_head), cmd_mpt, next);
+		mtx_unlock(&sc->mpt_cmd_pool_lock);
+	}
+	/* Release the mfi command */
+	cmd_mfi->ccb_ptr = NULL;
+	cmd_mfi->cmd_id.frame_count = 0;
+	TAILQ_INSERT_HEAD(&(sc->mrsas_mfi_cmd_list_head), cmd_mfi, next);
+	mtx_unlock(&sc->mfi_cmd_pool_lock);
+
+	return;
+}
+
+/*
+ * mrsas_get_controller_info:	Returns FW's controller structure
+ * input:						Adapter soft state
+ * 								Controller information structure
+ *
+ * Issues an internal command (DCMD) to get the FW's controller structure. This
+ * information is mainly used to find out the maximum IO transfer per command
+ * supported by the FW.
+ */
+static int
+mrsas_get_ctrl_info(struct mrsas_softc *sc)
+{
+	int retcode = 0;
+	u_int8_t do_ocr = 1;
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+
+	cmd = mrsas_get_mfi_cmd(sc);
+
+	if (!cmd) {
+		device_printf(sc->mrsas_dev, "Failed to get a free cmd\n");
+		return -ENOMEM;
+	}
+	dcmd = &cmd->frame->dcmd;
+
+	if (mrsas_alloc_ctlr_info_cmd(sc) != SUCCESS) {
+		device_printf(sc->mrsas_dev, "Cannot allocate get ctlr info cmd\n");
+		mrsas_release_mfi_cmd(cmd);
+		return -ENOMEM;
+	}
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0xFF;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_READ;
+	dcmd->timeout = 0;
+	dcmd->pad_0 = 0;
+	dcmd->data_xfer_len = sizeof(struct mrsas_ctrl_info);
+	dcmd->opcode = MR_DCMD_CTRL_GET_INFO;
+	dcmd->sgl.sge32[0].phys_addr = sc->ctlr_info_phys_addr;
+	dcmd->sgl.sge32[0].length = sizeof(struct mrsas_ctrl_info);
+
+	if (!sc->mask_interrupts)
+		retcode = mrsas_issue_blocked_cmd(sc, cmd);
+	else
+		retcode = mrsas_issue_polled(sc, cmd);
+
+	if (retcode == ETIMEDOUT)
+		goto dcmd_timeout;
+	else
+		memcpy(sc->ctrl_info, sc->ctlr_info_mem, sizeof(struct mrsas_ctrl_info));
+
+	do_ocr = 0;
+	mrsas_update_ext_vd_details(sc);
+
+	sc->use_seqnum_jbod_fp =
+	    sc->ctrl_info->adapterOperations3.useSeqNumJbodFP;
+	sc->disableOnlineCtrlReset =
+	    sc->ctrl_info->properties.OnOffProperties.disableOnlineCtrlReset;
+
+dcmd_timeout:
+	mrsas_free_ctlr_info_cmd(sc);
+
+	if (do_ocr)
+		sc->do_timedout_reset = MFI_DCMD_TIMEOUT_OCR;
+
+	if (!sc->mask_interrupts)
+		mrsas_release_mfi_cmd(cmd);
+
+	return (retcode);
+}
+
+/*
+ * mrsas_update_ext_vd_details : Update details w.r.t Extended VD
+ * input:
+ *	sc - Controller's softc
+*/
+static void 
+mrsas_update_ext_vd_details(struct mrsas_softc *sc)
+{
+	sc->max256vdSupport =
+	sc->ctrl_info->adapterOperations3.supportMaxExtLDs;
+	/* Below is additional check to address future FW enhancement */
+	if (sc->ctrl_info->max_lds > 64)
+		sc->max256vdSupport = 1;
+
+	sc->drv_supported_vd_count = MRSAS_MAX_LD_CHANNELS
+	    * MRSAS_MAX_DEV_PER_CHANNEL;
+	sc->drv_supported_pd_count = MRSAS_MAX_PD_CHANNELS
+	    * MRSAS_MAX_DEV_PER_CHANNEL;
+	if (sc->max256vdSupport) {
+		sc->fw_supported_vd_count = MAX_LOGICAL_DRIVES_EXT;
+		sc->fw_supported_pd_count = MAX_PHYSICAL_DEVICES;
+	} else {
+		sc->fw_supported_vd_count = MAX_LOGICAL_DRIVES;
+		sc->fw_supported_pd_count = MAX_PHYSICAL_DEVICES;
+	}
+
+	sc->old_map_sz = sizeof(MR_FW_RAID_MAP) +
+	    (sizeof(MR_LD_SPAN_MAP) *
+	    (sc->fw_supported_vd_count - 1));
+	sc->new_map_sz = sizeof(MR_FW_RAID_MAP_EXT);
+	sc->drv_map_sz = sizeof(MR_DRV_RAID_MAP) +
+	    (sizeof(MR_LD_SPAN_MAP) *
+	    (sc->drv_supported_vd_count - 1));
+
+	sc->max_map_sz = max(sc->old_map_sz, sc->new_map_sz);
+
+	if (sc->max256vdSupport)
+		sc->current_map_sz = sc->new_map_sz;
+	else
+		sc->current_map_sz = sc->old_map_sz;
+}
+
+/*
+ * mrsas_alloc_ctlr_info_cmd:	Allocates memory for controller info command
+ * input:						Adapter soft state
+ *
+ * Allocates DMAable memory for the controller info internal command.
+ */
+int
+mrsas_alloc_ctlr_info_cmd(struct mrsas_softc *sc)
+{
+	int ctlr_info_size;
+
+	/* Allocate get controller info command */
+	ctlr_info_size = sizeof(struct mrsas_ctrl_info);
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    1, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    ctlr_info_size,
+	    1,
+	    ctlr_info_size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->ctlr_info_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate ctlr info tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(sc->ctlr_info_tag, (void **)&sc->ctlr_info_mem,
+	    BUS_DMA_NOWAIT, &sc->ctlr_info_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate ctlr info cmd mem\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamap_load(sc->ctlr_info_tag, sc->ctlr_info_dmamap,
+	    sc->ctlr_info_mem, ctlr_info_size, mrsas_addr_cb,
+	    &sc->ctlr_info_phys_addr, BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load ctlr info cmd mem\n");
+		return (ENOMEM);
+	}
+	memset(sc->ctlr_info_mem, 0, ctlr_info_size);
+	return (0);
+}
+
+/*
+ * mrsas_free_ctlr_info_cmd:	Free memory for controller info command
+ * input:						Adapter soft state
+ *
+ * Deallocates memory of the get controller info cmd.
+ */
+void
+mrsas_free_ctlr_info_cmd(struct mrsas_softc *sc)
+{
+	if (sc->ctlr_info_phys_addr)
+		bus_dmamap_unload(sc->ctlr_info_tag, sc->ctlr_info_dmamap);
+	if (sc->ctlr_info_mem != NULL)
+		bus_dmamem_free(sc->ctlr_info_tag, sc->ctlr_info_mem, sc->ctlr_info_dmamap);
+	if (sc->ctlr_info_tag != NULL)
+		bus_dma_tag_destroy(sc->ctlr_info_tag);
+}
+
+/*
+ * mrsas_issue_polled:	Issues a polling command
+ * inputs:				Adapter soft state
+ * 						Command packet to be issued
+ *
+ * This function is for posting of internal commands to Firmware.  MFI requires
+ * the cmd_status to be set to 0xFF before posting.  The maximun wait time of
+ * the poll response timer is 180 seconds.
+ */
+int
+mrsas_issue_polled(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd)
+{
+	struct mrsas_header *frame_hdr = &cmd->frame->hdr;
+	u_int8_t max_wait = MRSAS_INTERNAL_CMD_WAIT_TIME;
+	int i, retcode = SUCCESS;
+
+	frame_hdr->cmd_status = 0xFF;
+	frame_hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
+
+	/* Issue the frame using inbound queue port */
+	if (mrsas_issue_dcmd(sc, cmd)) {
+		device_printf(sc->mrsas_dev, "Cannot issue DCMD internal command.\n");
+		return (1);
+	}
+	/*
+	 * Poll response timer to wait for Firmware response.  While this
+	 * timer with the DELAY call could block CPU, the time interval for
+	 * this is only 1 millisecond.
+	 */
+	if (frame_hdr->cmd_status == 0xFF) {
+		for (i = 0; i < (max_wait * 1000); i++) {
+			if (frame_hdr->cmd_status == 0xFF)
+				DELAY(1000);
+			else
+				break;
+		}
+	}
+	if (frame_hdr->cmd_status == 0xFF) {
+		device_printf(sc->mrsas_dev, "DCMD timed out after %d "
+		    "seconds from %s\n", max_wait, __func__);
+		device_printf(sc->mrsas_dev, "DCMD opcode 0x%X\n",
+		    cmd->frame->dcmd.opcode);
+		retcode = ETIMEDOUT;
+	}
+	return (retcode);
+}
+
+/*
+ * mrsas_issue_dcmd:	Issues a MFI Pass thru cmd
+ * input:				Adapter soft state mfi cmd pointer
+ *
+ * This function is called by mrsas_issued_blocked_cmd() and
+ * mrsas_issued_polled(), to build the MPT command and then fire the command
+ * to Firmware.
+ */
+int
+mrsas_issue_dcmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd)
+{
+	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc;
+
+	req_desc = mrsas_build_mpt_cmd(sc, cmd);
+	if (!req_desc) {
+		device_printf(sc->mrsas_dev, "Cannot build MPT cmd.\n");
+		return (1);
+	}
+	mrsas_fire_cmd(sc, req_desc->addr.u.low, req_desc->addr.u.high);
+
+	return (0);
+}
+
+/*
+ * mrsas_build_mpt_cmd:	Calls helper function to build Passthru cmd
+ * input:				Adapter soft state mfi cmd to build
+ *
+ * This function is called by mrsas_issue_cmd() to build the MPT-MFI passthru
+ * command and prepares the MPT command to send to Firmware.
+ */
+MRSAS_REQUEST_DESCRIPTOR_UNION *
+mrsas_build_mpt_cmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd)
+{
+	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc;
+	u_int16_t index;
+
+	if (mrsas_build_mptmfi_passthru(sc, cmd)) {
+		device_printf(sc->mrsas_dev, "Cannot build MPT-MFI passthru cmd.\n");
+		return NULL;
+	}
+	index = cmd->cmd_id.context.smid;
+
+	req_desc = mrsas_get_request_desc(sc, index - 1);
+	if (!req_desc)
+		return NULL;
+
+	req_desc->addr.Words = 0;
+	req_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+
+	req_desc->SCSIIO.SMID = index;
+
+	return (req_desc);
+}
+
+/*
+ * mrsas_build_mptmfi_passthru:	Builds a MPT MFI Passthru command
+ * input:						Adapter soft state mfi cmd pointer
+ *
+ * The MPT command and the io_request are setup as a passthru command. The SGE
+ * chain address is set to frame_phys_addr of the MFI command.
+ */
+u_int8_t
+mrsas_build_mptmfi_passthru(struct mrsas_softc *sc, struct mrsas_mfi_cmd *mfi_cmd)
+{
+	MPI25_IEEE_SGE_CHAIN64 *mpi25_ieee_chain;
+	PTR_MRSAS_RAID_SCSI_IO_REQUEST io_req;
+	struct mrsas_mpt_cmd *mpt_cmd;
+	struct mrsas_header *frame_hdr = &mfi_cmd->frame->hdr;
+
+	mpt_cmd = mrsas_get_mpt_cmd(sc);
+	if (!mpt_cmd)
+		return (1);
+
+	/* Save the smid. To be used for returning the cmd */
+	mfi_cmd->cmd_id.context.smid = mpt_cmd->index;
+
+	mpt_cmd->sync_cmd_idx = mfi_cmd->index;
+
+	/*
+	 * For cmds where the flag is set, store the flag and check on
+	 * completion. For cmds with this flag, don't call
+	 * mrsas_complete_cmd.
+	 */
+
+	if (frame_hdr->flags & MFI_FRAME_DONT_POST_IN_REPLY_QUEUE)
+		mpt_cmd->flags = MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
+
+	io_req = mpt_cmd->io_request;
+
+		if (sc->mrsas_gen3_ctrl) {
+		pMpi25IeeeSgeChain64_t sgl_ptr_end = (pMpi25IeeeSgeChain64_t)&io_req->SGL;
+
+		sgl_ptr_end += sc->max_sge_in_main_msg - 1;
+		sgl_ptr_end->Flags = 0;
+	}
+	mpi25_ieee_chain = (MPI25_IEEE_SGE_CHAIN64 *) & io_req->SGL.IeeeChain;
+
+	io_req->Function = MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST;
+	io_req->SGLOffset0 = offsetof(MRSAS_RAID_SCSI_IO_REQUEST, SGL) / 4;
+	io_req->ChainOffset = sc->chain_offset_mfi_pthru;
+
+	mpi25_ieee_chain->Address = mfi_cmd->frame_phys_addr;
+
+	mpi25_ieee_chain->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT |
+	    MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR;
+
+	mpi25_ieee_chain->Length = sc->max_chain_frame_sz;
+
+	return (0);
+}
+
+/*
+ * mrsas_issue_blocked_cmd:	Synchronous wrapper around regular FW cmds
+ * input:					Adapter soft state Command to be issued
+ *
+ * This function waits on an event for the command to be returned from the ISR.
+ * Max wait time is MRSAS_INTERNAL_CMD_WAIT_TIME secs. Used for issuing
+ * internal and ioctl commands.
+ */
+int
+mrsas_issue_blocked_cmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd)
+{
+	u_int8_t max_wait = MRSAS_INTERNAL_CMD_WAIT_TIME;
+	unsigned long total_time = 0;
+	int retcode = SUCCESS;
+
+	/* Initialize cmd_status */
+	cmd->cmd_status = 0xFF;
+
+	/* Build MPT-MFI command for issue to FW */
+	if (mrsas_issue_dcmd(sc, cmd)) {
+		device_printf(sc->mrsas_dev, "Cannot issue DCMD internal command.\n");
+		return (1);
+	}
+	sc->chan = (void *)&cmd;
+
+	while (1) {
+		if (cmd->cmd_status == 0xFF) {
+			tsleep((void *)&sc->chan, 0, "mrsas_sleep", hz);
+		} else
+			break;
+
+		if (!cmd->sync_cmd) {	/* cmd->sync will be set for an IOCTL
+					 * command */
+			total_time++;
+			if (total_time >= max_wait) {
+				device_printf(sc->mrsas_dev,
+				    "Internal command timed out after %d seconds.\n", max_wait);
+				retcode = 1;
+				break;
+			}
+		}
+	}
+
+	if (cmd->cmd_status == 0xFF) {
+		device_printf(sc->mrsas_dev, "DCMD timed out after %d "
+		    "seconds from %s\n", max_wait, __func__);
+		device_printf(sc->mrsas_dev, "DCMD opcode 0x%X\n",
+		    cmd->frame->dcmd.opcode);
+		retcode = ETIMEDOUT;
+	}
+	return (retcode);
+}
+
+/*
+ * mrsas_complete_mptmfi_passthru:	Completes a command
+ * input:	@sc:					Adapter soft state
+ * 			@cmd:					Command to be completed
+ * 			@status:				cmd completion status
+ *
+ * This function is called from mrsas_complete_cmd() after an interrupt is
+ * received from Firmware, and io_request->Function is
+ * MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST.
+ */
+void
+mrsas_complete_mptmfi_passthru(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd,
+    u_int8_t status)
+{
+	struct mrsas_header *hdr = &cmd->frame->hdr;
+	u_int8_t cmd_status = cmd->frame->hdr.cmd_status;
+
+	/* Reset the retry counter for future re-tries */
+	cmd->retry_for_fw_reset = 0;
+
+	if (cmd->ccb_ptr)
+		cmd->ccb_ptr = NULL;
+
+	switch (hdr->cmd) {
+	case MFI_CMD_INVALID:
+		device_printf(sc->mrsas_dev, "MFI_CMD_INVALID command.\n");
+		break;
+	case MFI_CMD_PD_SCSI_IO:
+	case MFI_CMD_LD_SCSI_IO:
+		/*
+		 * MFI_CMD_PD_SCSI_IO and MFI_CMD_LD_SCSI_IO could have been
+		 * issued either through an IO path or an IOCTL path. If it
+		 * was via IOCTL, we will send it to internal completion.
+		 */
+		if (cmd->sync_cmd) {
+			cmd->sync_cmd = 0;
+			mrsas_wakeup(sc, cmd);
+			break;
+		}
+	case MFI_CMD_SMP:
+	case MFI_CMD_STP:
+	case MFI_CMD_DCMD:
+		/* Check for LD map update */
+		if ((cmd->frame->dcmd.opcode == MR_DCMD_LD_MAP_GET_INFO) &&
+		    (cmd->frame->dcmd.mbox.b[1] == 1)) {
+			sc->fast_path_io = 0;
+			mtx_lock(&sc->raidmap_lock);
+			sc->map_update_cmd = NULL;
+			if (cmd_status != 0) {
+				if (cmd_status != MFI_STAT_NOT_FOUND)
+					device_printf(sc->mrsas_dev, "map sync failed, status=%x\n", cmd_status);
+				else {
+					mrsas_release_mfi_cmd(cmd);
+					mtx_unlock(&sc->raidmap_lock);
+					break;
+				}
+			} else
+				sc->map_id++;
+			mrsas_release_mfi_cmd(cmd);
+			if (MR_ValidateMapInfo(sc))
+				sc->fast_path_io = 0;
+			else
+				sc->fast_path_io = 1;
+			mrsas_sync_map_info(sc);
+			mtx_unlock(&sc->raidmap_lock);
+			break;
+		}
+		if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_GET_INFO ||
+		    cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_GET) {
+			sc->mrsas_aen_triggered = 0;
+		}
+		/* FW has an updated PD sequence */
+		if ((cmd->frame->dcmd.opcode ==
+		    MR_DCMD_SYSTEM_PD_MAP_GET_INFO) &&
+		    (cmd->frame->dcmd.mbox.b[0] == 1)) {
+
+			mtx_lock(&sc->raidmap_lock);
+			sc->jbod_seq_cmd = NULL;
+			mrsas_release_mfi_cmd(cmd);
+
+			if (cmd_status == MFI_STAT_OK) {
+				sc->pd_seq_map_id++;
+				/* Re-register a pd sync seq num cmd */
+				if (megasas_sync_pd_seq_num(sc, true))
+					sc->use_seqnum_jbod_fp = 0;
+			} else {
+				sc->use_seqnum_jbod_fp = 0;
+				device_printf(sc->mrsas_dev,
+				    "Jbod map sync failed, status=%x\n", cmd_status);
+			}
+			mtx_unlock(&sc->raidmap_lock);
+			break;
+		}
+		/* See if got an event notification */
+		if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_WAIT)
+			mrsas_complete_aen(sc, cmd);
+		else
+			mrsas_wakeup(sc, cmd);
+		break;
+	case MFI_CMD_ABORT:
+		/* Command issued to abort another cmd return */
+		mrsas_complete_abort(sc, cmd);
+		break;
+	default:
+		device_printf(sc->mrsas_dev, "Unknown command completed! [0x%X]\n", hdr->cmd);
+		break;
+	}
+}
+
+/*
+ * mrsas_wakeup:	Completes an internal command
+ * input:			Adapter soft state
+ * 					Command to be completed
+ *
+ * In mrsas_issue_blocked_cmd(), after a command is issued to Firmware, a wait
+ * timer is started.  This function is called from
+ * mrsas_complete_mptmfi_passthru() as it completes the command, to wake up
+ * from the command wait.
+ */
+void
+mrsas_wakeup(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd)
+{
+	cmd->cmd_status = cmd->frame->io.cmd_status;
+
+	if (cmd->cmd_status == 0xFF)
+		cmd->cmd_status = 0;
+
+	sc->chan = (void *)&cmd;
+	wakeup_one((void *)&sc->chan);
+	return;
+}
+
+/*
+ * mrsas_shutdown_ctlr:       Instructs FW to shutdown the controller input:
+ * Adapter soft state Shutdown/Hibernate
+ *
+ * This function issues a DCMD internal command to Firmware to initiate shutdown
+ * of the controller.
+ */
+static void
+mrsas_shutdown_ctlr(struct mrsas_softc *sc, u_int32_t opcode)
+{
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+
+	if (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR)
+		return;
+
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd) {
+		device_printf(sc->mrsas_dev, "Cannot allocate for shutdown cmd.\n");
+		return;
+	}
+	if (sc->aen_cmd)
+		mrsas_issue_blocked_abort_cmd(sc, sc->aen_cmd);
+	if (sc->map_update_cmd)
+		mrsas_issue_blocked_abort_cmd(sc, sc->map_update_cmd);
+	if (sc->jbod_seq_cmd)
+		mrsas_issue_blocked_abort_cmd(sc, sc->jbod_seq_cmd);
+
+	dcmd = &cmd->frame->dcmd;
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0x0;
+	dcmd->sge_count = 0;
+	dcmd->flags = MFI_FRAME_DIR_NONE;
+	dcmd->timeout = 0;
+	dcmd->pad_0 = 0;
+	dcmd->data_xfer_len = 0;
+	dcmd->opcode = opcode;
+
+	device_printf(sc->mrsas_dev, "Preparing to shut down controller.\n");
+
+	mrsas_issue_blocked_cmd(sc, cmd);
+	mrsas_release_mfi_cmd(cmd);
+
+	return;
+}
+
+/*
+ * mrsas_flush_cache:         Requests FW to flush all its caches input:
+ * Adapter soft state
+ *
+ * This function is issues a DCMD internal command to Firmware to initiate
+ * flushing of all caches.
+ */
+static void
+mrsas_flush_cache(struct mrsas_softc *sc)
+{
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+
+	if (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR)
+		return;
+
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd) {
+		device_printf(sc->mrsas_dev, "Cannot allocate for flush cache cmd.\n");
+		return;
+	}
+	dcmd = &cmd->frame->dcmd;
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0x0;
+	dcmd->sge_count = 0;
+	dcmd->flags = MFI_FRAME_DIR_NONE;
+	dcmd->timeout = 0;
+	dcmd->pad_0 = 0;
+	dcmd->data_xfer_len = 0;
+	dcmd->opcode = MR_DCMD_CTRL_CACHE_FLUSH;
+	dcmd->mbox.b[0] = MR_FLUSH_CTRL_CACHE | MR_FLUSH_DISK_CACHE;
+
+	mrsas_issue_blocked_cmd(sc, cmd);
+	mrsas_release_mfi_cmd(cmd);
+
+	return;
+}
+
+int
+megasas_sync_pd_seq_num(struct mrsas_softc *sc, boolean_t pend)
+{
+	int retcode = 0;
+	u_int8_t do_ocr = 1;
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+	uint32_t pd_seq_map_sz;
+	struct MR_PD_CFG_SEQ_NUM_SYNC *pd_sync;
+	bus_addr_t pd_seq_h;
+
+	pd_seq_map_sz = sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) +
+	    (sizeof(struct MR_PD_CFG_SEQ) *
+	    (MAX_PHYSICAL_DEVICES - 1));
+
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd) {
+		device_printf(sc->mrsas_dev,
+		    "Cannot alloc for ld map info cmd.\n");
+		return 1;
+	}
+	dcmd = &cmd->frame->dcmd;
+
+	pd_sync = (void *)sc->jbodmap_mem[(sc->pd_seq_map_id & 1)];
+	pd_seq_h = sc->jbodmap_phys_addr[(sc->pd_seq_map_id & 1)];
+	if (!pd_sync) {
+		device_printf(sc->mrsas_dev,
+		    "Failed to alloc mem for jbod map info.\n");
+		mrsas_release_mfi_cmd(cmd);
+		return (ENOMEM);
+	}
+	memset(pd_sync, 0, pd_seq_map_sz);
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0xFF;
+	dcmd->sge_count = 1;
+	dcmd->timeout = 0;
+	dcmd->pad_0 = 0;
+	dcmd->data_xfer_len = (pd_seq_map_sz);
+	dcmd->opcode = (MR_DCMD_SYSTEM_PD_MAP_GET_INFO);
+	dcmd->sgl.sge32[0].phys_addr = (pd_seq_h);
+	dcmd->sgl.sge32[0].length = (pd_seq_map_sz);
+
+	if (pend) {
+		dcmd->mbox.b[0] = MRSAS_DCMD_MBOX_PEND_FLAG;
+		dcmd->flags = (MFI_FRAME_DIR_WRITE);
+		sc->jbod_seq_cmd = cmd;
+		if (mrsas_issue_dcmd(sc, cmd)) {
+			device_printf(sc->mrsas_dev,
+			    "Fail to send sync map info command.\n");
+			return 1;
+		} else
+			return 0;
+	} else
+		dcmd->flags = MFI_FRAME_DIR_READ;
+
+	retcode = mrsas_issue_polled(sc, cmd);
+	if (retcode == ETIMEDOUT)
+		goto dcmd_timeout;
+
+	if (pd_sync->count > MAX_PHYSICAL_DEVICES) {
+		device_printf(sc->mrsas_dev,
+		    "driver supports max %d JBOD, but FW reports %d\n",
+		    MAX_PHYSICAL_DEVICES, pd_sync->count);
+		retcode = -EINVAL;
+	}
+	if (!retcode)
+		sc->pd_seq_map_id++;
+	do_ocr = 0;
+
+dcmd_timeout:
+	if (do_ocr)
+		sc->do_timedout_reset = MFI_DCMD_TIMEOUT_OCR;
+
+	return (retcode);
+}
+
+/*
+ * mrsas_get_map_info:        Load and validate RAID map input:
+ * Adapter instance soft state
+ *
+ * This function calls mrsas_get_ld_map_info() and MR_ValidateMapInfo() to load
+ * and validate RAID map.  It returns 0 if successful, 1 other- wise.
+ */
+static int
+mrsas_get_map_info(struct mrsas_softc *sc)
+{
+	uint8_t retcode = 0;
+
+	sc->fast_path_io = 0;
+	if (!mrsas_get_ld_map_info(sc)) {
+		retcode = MR_ValidateMapInfo(sc);
+		if (retcode == 0) {
+			sc->fast_path_io = 1;
+			return 0;
+		}
+	}
+	return 1;
+}
+
+/*
+ * mrsas_get_ld_map_info:      Get FW's ld_map structure input:
+ * Adapter instance soft state
+ *
+ * Issues an internal command (DCMD) to get the FW's controller PD list
+ * structure.
+ */
+static int
+mrsas_get_ld_map_info(struct mrsas_softc *sc)
+{
+	int retcode = 0;
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+	void *map;
+	bus_addr_t map_phys_addr = 0;
+
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd) {
+		device_printf(sc->mrsas_dev,
+		    "Cannot alloc for ld map info cmd.\n");
+		return 1;
+	}
+	dcmd = &cmd->frame->dcmd;
+
+	map = (void *)sc->raidmap_mem[(sc->map_id & 1)];
+	map_phys_addr = sc->raidmap_phys_addr[(sc->map_id & 1)];
+	if (!map) {
+		device_printf(sc->mrsas_dev,
+		    "Failed to alloc mem for ld map info.\n");
+		mrsas_release_mfi_cmd(cmd);
+		return (ENOMEM);
+	}
+	memset(map, 0, sizeof(sc->max_map_sz));
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0xFF;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_READ;
+	dcmd->timeout = 0;
+	dcmd->pad_0 = 0;
+	dcmd->data_xfer_len = sc->current_map_sz;
+	dcmd->opcode = MR_DCMD_LD_MAP_GET_INFO;
+	dcmd->sgl.sge32[0].phys_addr = map_phys_addr;
+	dcmd->sgl.sge32[0].length = sc->current_map_sz;
+
+	retcode = mrsas_issue_polled(sc, cmd);
+	if (retcode == ETIMEDOUT)
+		sc->do_timedout_reset = MFI_DCMD_TIMEOUT_OCR;
+
+	return (retcode);
+}
+
+/*
+ * mrsas_sync_map_info:        Get FW's ld_map structure input:
+ * Adapter instance soft state
+ *
+ * Issues an internal command (DCMD) to get the FW's controller PD list
+ * structure.
+ */
+static int
+mrsas_sync_map_info(struct mrsas_softc *sc)
+{
+	int retcode = 0, i;
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+	uint32_t size_sync_info, num_lds;
+	MR_LD_TARGET_SYNC *target_map = NULL;
+	MR_DRV_RAID_MAP_ALL *map;
+	MR_LD_RAID *raid;
+	MR_LD_TARGET_SYNC *ld_sync;
+	bus_addr_t map_phys_addr = 0;
+
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd) {
+		device_printf(sc->mrsas_dev, "Cannot alloc for sync map info cmd\n");
+		return ENOMEM;
+	}
+	map = sc->ld_drv_map[sc->map_id & 1];
+	num_lds = map->raidMap.ldCount;
+
+	dcmd = &cmd->frame->dcmd;
+	size_sync_info = sizeof(MR_LD_TARGET_SYNC) * num_lds;
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	target_map = (MR_LD_TARGET_SYNC *) sc->raidmap_mem[(sc->map_id - 1) & 1];
+	memset(target_map, 0, sc->max_map_sz);
+
+	map_phys_addr = sc->raidmap_phys_addr[(sc->map_id - 1) & 1];
+
+	ld_sync = (MR_LD_TARGET_SYNC *) target_map;
+
+	for (i = 0; i < num_lds; i++, ld_sync++) {
+		raid = MR_LdRaidGet(i, map);
+		ld_sync->targetId = MR_GetLDTgtId(i, map);
+		ld_sync->seqNum = raid->seqNum;
+	}
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0xFF;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_WRITE;
+	dcmd->timeout = 0;
+	dcmd->pad_0 = 0;
+	dcmd->data_xfer_len = sc->current_map_sz;
+	dcmd->mbox.b[0] = num_lds;
+	dcmd->mbox.b[1] = MRSAS_DCMD_MBOX_PEND_FLAG;
+	dcmd->opcode = MR_DCMD_LD_MAP_GET_INFO;
+	dcmd->sgl.sge32[0].phys_addr = map_phys_addr;
+	dcmd->sgl.sge32[0].length = sc->current_map_sz;
+
+	sc->map_update_cmd = cmd;
+	if (mrsas_issue_dcmd(sc, cmd)) {
+		device_printf(sc->mrsas_dev,
+		    "Fail to send sync map info command.\n");
+		return (1);
+	}
+	return (retcode);
+}
+
+/*
+ * mrsas_get_pd_list:           Returns FW's PD list structure input:
+ * Adapter soft state
+ *
+ * Issues an internal command (DCMD) to get the FW's controller PD list
+ * structure.  This information is mainly used to find out about system
+ * supported by Firmware.
+ */
+static int
+mrsas_get_pd_list(struct mrsas_softc *sc)
+{
+	int retcode = 0, pd_index = 0, pd_count = 0, pd_list_size;
+	u_int8_t do_ocr = 1;
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+	struct MR_PD_LIST *pd_list_mem;
+	struct MR_PD_ADDRESS *pd_addr;
+	bus_addr_t pd_list_phys_addr = 0;
+	struct mrsas_tmp_dcmd *tcmd;
+
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd) {
+		device_printf(sc->mrsas_dev,
+		    "Cannot alloc for get PD list cmd\n");
+		return 1;
+	}
+	dcmd = &cmd->frame->dcmd;
+
+	tcmd = malloc(sizeof(struct mrsas_tmp_dcmd), M_MRSAS, M_NOWAIT);
+	pd_list_size = MRSAS_MAX_PD * sizeof(struct MR_PD_LIST);
+	if (mrsas_alloc_tmp_dcmd(sc, tcmd, pd_list_size) != SUCCESS) {
+		device_printf(sc->mrsas_dev,
+		    "Cannot alloc dmamap for get PD list cmd\n");
+		mrsas_release_mfi_cmd(cmd);
+		mrsas_free_tmp_dcmd(tcmd);
+		free(tcmd, M_MRSAS);
+		return (ENOMEM);
+	} else {
+		pd_list_mem = tcmd->tmp_dcmd_mem;
+		pd_list_phys_addr = tcmd->tmp_dcmd_phys_addr;
+	}
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->mbox.b[0] = MR_PD_QUERY_TYPE_EXPOSED_TO_HOST;
+	dcmd->mbox.b[1] = 0;
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0xFF;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_READ;
+	dcmd->timeout = 0;
+	dcmd->pad_0 = 0;
+	dcmd->data_xfer_len = MRSAS_MAX_PD * sizeof(struct MR_PD_LIST);
+	dcmd->opcode = MR_DCMD_PD_LIST_QUERY;
+	dcmd->sgl.sge32[0].phys_addr = pd_list_phys_addr;
+	dcmd->sgl.sge32[0].length = MRSAS_MAX_PD * sizeof(struct MR_PD_LIST);
+
+	if (!sc->mask_interrupts)
+		retcode = mrsas_issue_blocked_cmd(sc, cmd);
+	else
+		retcode = mrsas_issue_polled(sc, cmd);
+
+	if (retcode == ETIMEDOUT)
+		goto dcmd_timeout;
+
+	/* Get the instance PD list */
+	pd_count = MRSAS_MAX_PD;
+	pd_addr = pd_list_mem->addr;
+	if (pd_list_mem->count < pd_count) {
+		memset(sc->local_pd_list, 0,
+		    MRSAS_MAX_PD * sizeof(struct mrsas_pd_list));
+		for (pd_index = 0; pd_index < pd_list_mem->count; pd_index++) {
+			sc->local_pd_list[pd_addr->deviceId].tid = pd_addr->deviceId;
+			sc->local_pd_list[pd_addr->deviceId].driveType =
+			    pd_addr->scsiDevType;
+			sc->local_pd_list[pd_addr->deviceId].driveState =
+			    MR_PD_STATE_SYSTEM;
+			pd_addr++;
+		}
+		/*
+		 * Use mutext/spinlock if pd_list component size increase more than
+		 * 32 bit.
+		 */
+		memcpy(sc->pd_list, sc->local_pd_list, sizeof(sc->local_pd_list));
+		do_ocr = 0;
+	}
+dcmd_timeout:
+	mrsas_free_tmp_dcmd(tcmd);
+	free(tcmd, M_MRSAS);
+
+	if (do_ocr)
+		sc->do_timedout_reset = MFI_DCMD_TIMEOUT_OCR;
+
+	if (!sc->mask_interrupts)
+		mrsas_release_mfi_cmd(cmd);
+
+	return (retcode);
+}
+
+/*
+ * mrsas_get_ld_list:           Returns FW's LD list structure input:
+ * Adapter soft state
+ *
+ * Issues an internal command (DCMD) to get the FW's controller PD list
+ * structure.  This information is mainly used to find out about supported by
+ * the FW.
+ */
+static int
+mrsas_get_ld_list(struct mrsas_softc *sc)
+{
+	int ld_list_size, retcode = 0, ld_index = 0, ids = 0;
+	u_int8_t do_ocr = 1;
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_dcmd_frame *dcmd;
+	struct MR_LD_LIST *ld_list_mem;
+	bus_addr_t ld_list_phys_addr = 0;
+	struct mrsas_tmp_dcmd *tcmd;
+
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd) {
+		device_printf(sc->mrsas_dev,
+		    "Cannot alloc for get LD list cmd\n");
+		return 1;
+	}
+	dcmd = &cmd->frame->dcmd;
+
+	tcmd = malloc(sizeof(struct mrsas_tmp_dcmd), M_MRSAS, M_NOWAIT);
+	ld_list_size = sizeof(struct MR_LD_LIST);
+	if (mrsas_alloc_tmp_dcmd(sc, tcmd, ld_list_size) != SUCCESS) {
+		device_printf(sc->mrsas_dev,
+		    "Cannot alloc dmamap for get LD list cmd\n");
+		mrsas_release_mfi_cmd(cmd);
+		mrsas_free_tmp_dcmd(tcmd);
+		free(tcmd, M_MRSAS);
+		return (ENOMEM);
+	} else {
+		ld_list_mem = tcmd->tmp_dcmd_mem;
+		ld_list_phys_addr = tcmd->tmp_dcmd_phys_addr;
+	}
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	if (sc->max256vdSupport)
+		dcmd->mbox.b[0] = 1;
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0xFF;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_READ;
+	dcmd->timeout = 0;
+	dcmd->data_xfer_len = sizeof(struct MR_LD_LIST);
+	dcmd->opcode = MR_DCMD_LD_GET_LIST;
+	dcmd->sgl.sge32[0].phys_addr = ld_list_phys_addr;
+	dcmd->sgl.sge32[0].length = sizeof(struct MR_LD_LIST);
+	dcmd->pad_0 = 0;
+
+	if (!sc->mask_interrupts)
+		retcode = mrsas_issue_blocked_cmd(sc, cmd);
+	else
+		retcode = mrsas_issue_polled(sc, cmd);
+
+	if (retcode == ETIMEDOUT)
+		goto dcmd_timeout;
+
+#if VD_EXT_DEBUG
+	printf("Number of LDs %d\n", ld_list_mem->ldCount);
+#endif
+
+	/* Get the instance LD list */
+	if (ld_list_mem->ldCount <= sc->fw_supported_vd_count) {
+		sc->CurLdCount = ld_list_mem->ldCount;
+		memset(sc->ld_ids, 0xff, MAX_LOGICAL_DRIVES_EXT);
+		for (ld_index = 0; ld_index < ld_list_mem->ldCount; ld_index++) {
+			if (ld_list_mem->ldList[ld_index].state != 0) {
+				ids = ld_list_mem->ldList[ld_index].ref.ld_context.targetId;
+				sc->ld_ids[ids] = ld_list_mem->ldList[ld_index].ref.ld_context.targetId;
+			}
+		}
+		do_ocr = 0;
+	}
+dcmd_timeout:
+	mrsas_free_tmp_dcmd(tcmd);
+	free(tcmd, M_MRSAS);
+
+	if (do_ocr)
+		sc->do_timedout_reset = MFI_DCMD_TIMEOUT_OCR;
+	if (!sc->mask_interrupts)
+		mrsas_release_mfi_cmd(cmd);
+
+	return (retcode);
+}
+
+/*
+ * mrsas_alloc_tmp_dcmd:       Allocates memory for temporary command input:
+ * Adapter soft state Temp command Size of alloction
+ *
+ * Allocates DMAable memory for a temporary internal command. The allocated
+ * memory is initialized to all zeros upon successful loading of the dma
+ * mapped memory.
+ */
+int
+mrsas_alloc_tmp_dcmd(struct mrsas_softc *sc,
+    struct mrsas_tmp_dcmd *tcmd, int size)
+{
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    1, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    size,
+	    1,
+	    size,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &tcmd->tmp_dcmd_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate tmp dcmd tag\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamem_alloc(tcmd->tmp_dcmd_tag, (void **)&tcmd->tmp_dcmd_mem,
+	    BUS_DMA_NOWAIT, &tcmd->tmp_dcmd_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate tmp dcmd mem\n");
+		return (ENOMEM);
+	}
+	if (bus_dmamap_load(tcmd->tmp_dcmd_tag, tcmd->tmp_dcmd_dmamap,
+	    tcmd->tmp_dcmd_mem, size, mrsas_addr_cb,
+	    &tcmd->tmp_dcmd_phys_addr, BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load tmp dcmd mem\n");
+		return (ENOMEM);
+	}
+	memset(tcmd->tmp_dcmd_mem, 0, size);
+	return (0);
+}
+
+/*
+ * mrsas_free_tmp_dcmd:      Free memory for temporary command input:
+ * temporary dcmd pointer
+ *
+ * Deallocates memory of the temporary command for use in the construction of
+ * the internal DCMD.
+ */
+void
+mrsas_free_tmp_dcmd(struct mrsas_tmp_dcmd *tmp)
+{
+	if (tmp->tmp_dcmd_phys_addr)
+		bus_dmamap_unload(tmp->tmp_dcmd_tag, tmp->tmp_dcmd_dmamap);
+	if (tmp->tmp_dcmd_mem != NULL)
+		bus_dmamem_free(tmp->tmp_dcmd_tag, tmp->tmp_dcmd_mem, tmp->tmp_dcmd_dmamap);
+	if (tmp->tmp_dcmd_tag != NULL)
+		bus_dma_tag_destroy(tmp->tmp_dcmd_tag);
+}
+
+/*
+ * mrsas_issue_blocked_abort_cmd:       Aborts previously issued cmd input:
+ * Adapter soft state Previously issued cmd to be aborted
+ *
+ * This function is used to abort previously issued commands, such as AEN and
+ * RAID map sync map commands.  The abort command is sent as a DCMD internal
+ * command and subsequently the driver will wait for a return status.  The
+ * max wait time is MRSAS_INTERNAL_CMD_WAIT_TIME seconds.
+ */
+static int
+mrsas_issue_blocked_abort_cmd(struct mrsas_softc *sc,
+    struct mrsas_mfi_cmd *cmd_to_abort)
+{
+	struct mrsas_mfi_cmd *cmd;
+	struct mrsas_abort_frame *abort_fr;
+	u_int8_t retcode = 0;
+	unsigned long total_time = 0;
+	u_int8_t max_wait = MRSAS_INTERNAL_CMD_WAIT_TIME;
+
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd) {
+		device_printf(sc->mrsas_dev, "Cannot alloc for abort cmd\n");
+		return (1);
+	}
+	abort_fr = &cmd->frame->abort;
+
+	/* Prepare and issue the abort frame */
+	abort_fr->cmd = MFI_CMD_ABORT;
+	abort_fr->cmd_status = 0xFF;
+	abort_fr->flags = 0;
+	abort_fr->abort_context = cmd_to_abort->index;
+	abort_fr->abort_mfi_phys_addr_lo = cmd_to_abort->frame_phys_addr;
+	abort_fr->abort_mfi_phys_addr_hi = 0;
+
+	cmd->sync_cmd = 1;
+	cmd->cmd_status = 0xFF;
+
+	if (mrsas_issue_dcmd(sc, cmd)) {
+		device_printf(sc->mrsas_dev, "Fail to send abort command.\n");
+		return (1);
+	}
+	/* Wait for this cmd to complete */
+	sc->chan = (void *)&cmd;
+	while (1) {
+		if (cmd->cmd_status == 0xFF) {
+			tsleep((void *)&sc->chan, 0, "mrsas_sleep", hz);
+		} else
+			break;
+		total_time++;
+		if (total_time >= max_wait) {
+			device_printf(sc->mrsas_dev, "Abort cmd timed out after %d sec.\n", max_wait);
+			retcode = 1;
+			break;
+		}
+	}
+
+	cmd->sync_cmd = 0;
+	mrsas_release_mfi_cmd(cmd);
+	return (retcode);
+}
+
+/*
+ * mrsas_complete_abort:      Completes aborting a command input:
+ * Adapter soft state Cmd that was issued to abort another cmd
+ *
+ * The mrsas_issue_blocked_abort_cmd() function waits for the command status to
+ * change after sending the command.  This function is called from
+ * mrsas_complete_mptmfi_passthru() to wake up the sleep thread associated.
+ */
+void
+mrsas_complete_abort(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd)
+{
+	if (cmd->sync_cmd) {
+		cmd->sync_cmd = 0;
+		cmd->cmd_status = 0;
+		sc->chan = (void *)&cmd;
+		wakeup_one((void *)&sc->chan);
+	}
+	return;
+}
+
+/*
+ * mrsas_aen_handler:	AEN processing callback function from thread context
+ * input:				Adapter soft state
+ *
+ * Asynchronous event handler
+ */
+void
+mrsas_aen_handler(struct mrsas_softc *sc)
+{
+	union mrsas_evt_class_locale class_locale;
+	int doscan = 0;
+	u_int32_t seq_num;
+ 	int error, fail_aen = 0;
+
+	if (sc == NULL) {
+		printf("invalid instance!\n");
+		return;
+	}
+	if (sc->remove_in_progress || sc->reset_in_progress) {
+		device_printf(sc->mrsas_dev, "Returning from %s, line no %d\n",
+			__func__, __LINE__);
+		return;
+	}
+	if (sc->evt_detail_mem) {
+		switch (sc->evt_detail_mem->code) {
+		case MR_EVT_PD_INSERTED:
+			fail_aen = mrsas_get_pd_list(sc);
+			if (!fail_aen)
+				mrsas_bus_scan_sim(sc, sc->sim_1);
+			else
+				goto skip_register_aen;
+			break;
+		case MR_EVT_PD_REMOVED:
+			fail_aen = mrsas_get_pd_list(sc);
+			if (!fail_aen)
+				mrsas_bus_scan_sim(sc, sc->sim_1);
+			else
+				goto skip_register_aen;
+			break;
+		case MR_EVT_LD_OFFLINE:
+		case MR_EVT_CFG_CLEARED:
+		case MR_EVT_LD_DELETED:
+			mrsas_bus_scan_sim(sc, sc->sim_0);
+			break;
+		case MR_EVT_LD_CREATED:
+			fail_aen = mrsas_get_ld_list(sc);
+			if (!fail_aen)
+				mrsas_bus_scan_sim(sc, sc->sim_0);
+			else
+				goto skip_register_aen;
+			break;
+		case MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED:
+		case MR_EVT_FOREIGN_CFG_IMPORTED:
+		case MR_EVT_LD_STATE_CHANGE:
+			doscan = 1;
+			break;
+		case MR_EVT_CTRL_PROP_CHANGED:
+			fail_aen = mrsas_get_ctrl_info(sc);
+			if (fail_aen)
+				goto skip_register_aen;
+			break;
+		default:
+			break;
+		}
+	} else {
+		device_printf(sc->mrsas_dev, "invalid evt_detail\n");
+		return;
+	}
+	if (doscan) {
+		fail_aen = mrsas_get_pd_list(sc);
+		if (!fail_aen) {
+			mrsas_dprint(sc, MRSAS_AEN, "scanning ...sim 1\n");
+			mrsas_bus_scan_sim(sc, sc->sim_1);
+		} else
+			goto skip_register_aen;
+
+		fail_aen = mrsas_get_ld_list(sc);
+		if (!fail_aen) {
+			mrsas_dprint(sc, MRSAS_AEN, "scanning ...sim 0\n");
+			mrsas_bus_scan_sim(sc, sc->sim_0);
+		} else
+			goto skip_register_aen;
+	}
+	seq_num = sc->evt_detail_mem->seq_num + 1;
+
+	/* Register AEN with FW for latest sequence number plus 1 */
+	class_locale.members.reserved = 0;
+	class_locale.members.locale = MR_EVT_LOCALE_ALL;
+	class_locale.members.class = MR_EVT_CLASS_DEBUG;
+
+	if (sc->aen_cmd != NULL)
+		return;
+
+	mtx_lock(&sc->aen_lock);
+	error = mrsas_register_aen(sc, seq_num,
+	    class_locale.word);
+	mtx_unlock(&sc->aen_lock);
+
+	if (error)
+		device_printf(sc->mrsas_dev, "register aen failed error %x\n", error);
+
+skip_register_aen:
+	return;
+
+}
+
+
+/*
+ * mrsas_complete_aen:	Completes AEN command
+ * input:				Adapter soft state
+ * 						Cmd that was issued to abort another cmd
+ *
+ * This function will be called from ISR and will continue event processing from
+ * thread context by enqueuing task in ev_tq (callback function
+ * "mrsas_aen_handler").
+ */
+void
+mrsas_complete_aen(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd)
+{
+	/*
+	 * Don't signal app if it is just an aborted previously registered
+	 * aen
+	 */
+	if ((!cmd->abort_aen) && (sc->remove_in_progress == 0)) {
+		sc->mrsas_aen_triggered = 1;
+		mtx_lock(&sc->aen_lock);
+		if (sc->mrsas_poll_waiting) {
+			sc->mrsas_poll_waiting = 0;
+			selwakeup(&sc->mrsas_select);
+		}
+		mtx_unlock(&sc->aen_lock);
+	} else
+		cmd->abort_aen = 0;
+
+	sc->aen_cmd = NULL;
+	mrsas_release_mfi_cmd(cmd);
+
+	taskqueue_enqueue(sc->ev_tq, &sc->ev_task);
+
+	return;
+}
+
+static device_method_t mrsas_methods[] = {
+	DEVMETHOD(device_probe, mrsas_probe),
+	DEVMETHOD(device_attach, mrsas_attach),
+	DEVMETHOD(device_detach, mrsas_detach),
+	DEVMETHOD(device_suspend, mrsas_suspend),
+	DEVMETHOD(device_resume, mrsas_resume),
+	DEVMETHOD(bus_print_child, bus_generic_print_child),
+	DEVMETHOD(bus_driver_added, bus_generic_driver_added),
+	{0, 0}
+};
+
+static driver_t mrsas_driver = {
+	"mrsas",
+	mrsas_methods,
+	sizeof(struct mrsas_softc)
+};
+
+static devclass_t mrsas_devclass;
+
+DRIVER_MODULE(mrsas, pci, mrsas_driver, mrsas_devclass, 0, 0);
+MODULE_DEPEND(mrsas, cam, 1, 1, 1);


Property changes on: trunk/sys/dev/mrsas/mrsas.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/mrsas/mrsas.h
===================================================================
--- trunk/sys/dev/mrsas/mrsas.h	                        (rev 0)
+++ trunk/sys/dev/mrsas/mrsas.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,2950 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2015, AVAGO Tech. All rights reserved. Authors: Marian Choy
+ * Copyright (c) 2014, LSI Corp. All rights reserved. Authors: Marian Choy
+ * Support: freebsdraid at avagotech.com
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer. 2. Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution. 3. Neither the name of the
+ * <ORGANIZATION> nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing
+ * official policies,either expressed or implied, of the FreeBSD Project.
+ *
+ * Send feedback to: <megaraidfbsd at avagotech.com> Mail to: AVAGO TECHNOLOGIES, 1621
+ * Barber Lane, Milpitas, CA 95035 ATTN: MegaRaid FreeBSD
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/mrsas/mrsas.h 310264 2016-12-19 13:14:39Z kadesai $");
+
+#ifndef MRSAS_H
+#define	MRSAS_H
+
+#include <sys/param.h>			/* defines used in kernel.h */
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>			/* types used in module initialization */
+#include <sys/conf.h>			/* cdevsw struct */
+#include <sys/uio.h>			/* uio struct */
+#include <sys/malloc.h>
+#include <sys/bus.h>			/* structs, prototypes for pci bus
+					 * stuff */
+#include <sys/rman.h>
+#include <sys/types.h>
+#include <sys/lock.h>
+#include <sys/sema.h>
+#include <sys/sysctl.h>
+#include <sys/stat.h>
+#include <sys/taskqueue.h>
+#include <sys/poll.h>
+#include <sys/selinfo.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/atomic.h>
+
+#include <dev/pci/pcivar.h>		/* For pci_get macros! */
+#include <dev/pci/pcireg.h>
+
+
+#define	IOCTL_SEMA_DESCRIPTION	"mrsas semaphore for MFI pool"
+
+/*
+ * Device IDs and PCI
+ */
+#define	MRSAS_TBOLT			0x005b
+#define	MRSAS_INVADER		0x005d
+#define	MRSAS_FURY			0x005f
+#define	MRSAS_INTRUDER		0x00ce
+#define	MRSAS_INTRUDER_24	0x00cf
+#define	MRSAS_CUTLASS_52	0x0052
+#define	MRSAS_CUTLASS_53	0x0053
+#define	MRSAS_PCI_BAR0		0x10
+#define	MRSAS_PCI_BAR1		0x14
+#define	MRSAS_PCI_BAR2		0x1C
+
+/*
+ * Firmware State Defines
+ */
+#define	MRSAS_FWSTATE_MAXCMD_MASK		0x0000FFFF
+#define	MRSAS_FWSTATE_SGE_MASK			0x00FF0000
+#define	MRSAS_FW_STATE_CHNG_INTERRUPT	1
+
+/*
+ * Message Frame Defines
+ */
+#define	MRSAS_SENSE_LEN					96
+#define	MRSAS_FUSION_MAX_RESET_TRIES	3
+
+/*
+ * Miscellaneous Defines
+ */
+#define	BYTE_ALIGNMENT					1
+#define	MRSAS_MAX_NAME_LENGTH			32
+#define	MRSAS_VERSION					"06.712.04.00-fbsd"
+#define	MRSAS_ULONG_MAX					0xFFFFFFFFFFFFFFFF
+#define	MRSAS_DEFAULT_TIMEOUT			0x14	/* Temporarily set */
+#define	DONE							0
+#define	MRSAS_PAGE_SIZE					4096
+#define	MRSAS_RESET_NOTICE_INTERVAL		5
+#define	MRSAS_IO_TIMEOUT				180000	/* 180 second timeout */
+#define	MRSAS_LDIO_QUEUE_DEPTH			70	/* 70 percent as default */
+#define	THRESHOLD_REPLY_COUNT			50
+#define	MAX_MSIX_COUNT					128
+
+/*
+ * Boolean types
+ */
+#if (__FreeBSD_version < 901000)
+typedef enum _boolean {
+	false, true
+}	boolean;
+
+#endif
+enum err {
+	SUCCESS, FAIL
+};
+
+MALLOC_DECLARE(M_MRSAS);
+SYSCTL_DECL(_hw_mrsas);
+
+#define	MRSAS_INFO		(1 << 0)
+#define	MRSAS_TRACE		(1 << 1)
+#define	MRSAS_FAULT		(1 << 2)
+#define	MRSAS_OCR		(1 << 3)
+#define	MRSAS_TOUT		MRSAS_OCR
+#define	MRSAS_AEN		(1 << 4)
+#define	MRSAS_PRL11		(1 << 5)
+
+#define	mrsas_dprint(sc, level, msg, args...)       \
+do {                                                \
+    if (sc->mrsas_debug & level)                    \
+        device_printf(sc->mrsas_dev, msg, ##args);  \
+} while (0)
+
+
+/****************************************************************************
+ * Raid Context structure which describes MegaRAID specific IO Paramenters
+ * This resides at offset 0x60 where the SGL normally starts in MPT IO Frames
+ ****************************************************************************/
+
+typedef struct _RAID_CONTEXT {
+	u_int8_t Type:4;
+	u_int8_t nseg:4;
+	u_int8_t resvd0;
+	u_int16_t timeoutValue;
+	u_int8_t regLockFlags;
+	u_int8_t resvd1;
+	u_int16_t VirtualDiskTgtId;
+	u_int64_t regLockRowLBA;
+	u_int32_t regLockLength;
+	u_int16_t nextLMId;
+	u_int8_t exStatus;
+	u_int8_t status;
+	u_int8_t RAIDFlags;
+	u_int8_t numSGE;
+	u_int16_t configSeqNum;
+	u_int8_t spanArm;
+	u_int8_t priority;		/* 0x1D MR_PRIORITY_RANGE */
+	u_int8_t numSGEExt;		/* 0x1E 1M IO support */
+	u_int8_t resvd2;		/* 0x1F */
+}	RAID_CONTEXT;
+
+
+/*************************************************************************
+ * MPI2 Defines
+ ************************************************************************/
+
+#define	MPI2_FUNCTION_IOC_INIT					(0x02)	/* IOC Init */
+#define	MPI2_WHOINIT_HOST_DRIVER				(0x04)
+#define	MPI2_VERSION_MAJOR						(0x02)
+#define	MPI2_VERSION_MINOR						(0x00)
+#define	MPI2_VERSION_MAJOR_MASK					(0xFF00)
+#define	MPI2_VERSION_MAJOR_SHIFT				(8)
+#define	MPI2_VERSION_MINOR_MASK					(0x00FF)
+#define	MPI2_VERSION_MINOR_SHIFT				(0)
+#define	MPI2_VERSION ((MPI2_VERSION_MAJOR << MPI2_VERSION_MAJOR_SHIFT) | \
+                      MPI2_VERSION_MINOR)
+#define	MPI2_HEADER_VERSION_UNIT				(0x10)
+#define	MPI2_HEADER_VERSION_DEV					(0x00)
+#define	MPI2_HEADER_VERSION_UNIT_MASK			(0xFF00)
+#define	MPI2_HEADER_VERSION_UNIT_SHIFT			(8)
+#define	MPI2_HEADER_VERSION_DEV_MASK			(0x00FF)
+#define	MPI2_HEADER_VERSION_DEV_SHIFT			(0)
+#define	MPI2_HEADER_VERSION ((MPI2_HEADER_VERSION_UNIT << 8) | MPI2_HEADER_VERSION_DEV)
+#define	MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR		(0x03)
+#define	MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG	(0x8000)
+#define	MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG		(0x0400)
+#define	MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP	(0x0003)
+#define	MPI2_SCSIIO_EEDPFLAGS_CHECK_APPTAG		(0x0200)
+#define	MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD		(0x0100)
+#define	MPI2_SCSIIO_EEDPFLAGS_INSERT_OP			(0x0004)
+#define	MPI2_FUNCTION_SCSI_IO_REQUEST			(0x00)	/* SCSI IO */
+#define	MPI2_FUNCTION_SCSI_TASK_MGMT			(0x01)
+#define	MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY	(0x03)
+#define	MPI2_REQ_DESCRIPT_FLAGS_FP_IO			(0x06)
+#define	MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO			(0x00)
+#define	MPI2_SGE_FLAGS_64_BIT_ADDRESSING		(0x02)
+#define	MPI2_SCSIIO_CONTROL_WRITE				(0x01000000)
+#define	MPI2_SCSIIO_CONTROL_READ				(0x02000000)
+#define	MPI2_REQ_DESCRIPT_FLAGS_TYPE_MASK		(0x0E)
+#define	MPI2_RPY_DESCRIPT_FLAGS_UNUSED			(0x0F)
+#define	MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS	(0x00)
+#define	MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK		(0x0F)
+#define	MPI2_WRSEQ_FLUSH_KEY_VALUE				(0x0)
+#define	MPI2_WRITE_SEQUENCE_OFFSET				(0x00000004)
+#define	MPI2_WRSEQ_1ST_KEY_VALUE				(0xF)
+#define	MPI2_WRSEQ_2ND_KEY_VALUE				(0x4)
+#define	MPI2_WRSEQ_3RD_KEY_VALUE				(0xB)
+#define	MPI2_WRSEQ_4TH_KEY_VALUE				(0x2)
+#define	MPI2_WRSEQ_5TH_KEY_VALUE				(0x7)
+#define	MPI2_WRSEQ_6TH_KEY_VALUE				(0xD)
+
+#ifndef MPI2_POINTER
+#define	MPI2_POINTER	*
+#endif
+
+
+/***************************************
+ * MPI2 Structures
+ ***************************************/
+
+typedef struct _MPI25_IEEE_SGE_CHAIN64 {
+	u_int64_t Address;
+	u_int32_t Length;
+	u_int16_t Reserved1;
+	u_int8_t NextChainOffset;
+	u_int8_t Flags;
+}	MPI25_IEEE_SGE_CHAIN64, MPI2_POINTER PTR_MPI25_IEEE_SGE_CHAIN64,
+Mpi25IeeeSgeChain64_t, MPI2_POINTER pMpi25IeeeSgeChain64_t;
+
+typedef struct _MPI2_SGE_SIMPLE_UNION {
+	u_int32_t FlagsLength;
+	union {
+		u_int32_t Address32;
+		u_int64_t Address64;
+	}	u;
+}	MPI2_SGE_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_SGE_SIMPLE_UNION,
+Mpi2SGESimpleUnion_t, MPI2_POINTER pMpi2SGESimpleUnion_t;
+
+typedef struct {
+	u_int8_t CDB[20];		/* 0x00 */
+	u_int32_t PrimaryReferenceTag;	/* 0x14 */
+	u_int16_t PrimaryApplicationTag;/* 0x18 */
+	u_int16_t PrimaryApplicationTagMask;	/* 0x1A */
+	u_int32_t TransferLength;	/* 0x1C */
+}	MPI2_SCSI_IO_CDB_EEDP32, MPI2_POINTER PTR_MPI2_SCSI_IO_CDB_EEDP32,
+Mpi2ScsiIoCdbEedp32_t, MPI2_POINTER pMpi2ScsiIoCdbEedp32_t;
+
+typedef struct _MPI2_SGE_CHAIN_UNION {
+	u_int16_t Length;
+	u_int8_t NextChainOffset;
+	u_int8_t Flags;
+	union {
+		u_int32_t Address32;
+		u_int64_t Address64;
+	}	u;
+}	MPI2_SGE_CHAIN_UNION, MPI2_POINTER PTR_MPI2_SGE_CHAIN_UNION,
+Mpi2SGEChainUnion_t, MPI2_POINTER pMpi2SGEChainUnion_t;
+
+typedef struct _MPI2_IEEE_SGE_SIMPLE32 {
+	u_int32_t Address;
+	u_int32_t FlagsLength;
+}	MPI2_IEEE_SGE_SIMPLE32, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE32,
+Mpi2IeeeSgeSimple32_t, MPI2_POINTER pMpi2IeeeSgeSimple32_t;
+typedef struct _MPI2_IEEE_SGE_SIMPLE64 {
+	u_int64_t Address;
+	u_int32_t Length;
+	u_int16_t Reserved1;
+	u_int8_t Reserved2;
+	u_int8_t Flags;
+}	MPI2_IEEE_SGE_SIMPLE64, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE64,
+Mpi2IeeeSgeSimple64_t, MPI2_POINTER pMpi2IeeeSgeSimple64_t;
+
+typedef union _MPI2_IEEE_SGE_SIMPLE_UNION {
+	MPI2_IEEE_SGE_SIMPLE32 Simple32;
+	MPI2_IEEE_SGE_SIMPLE64 Simple64;
+}	MPI2_IEEE_SGE_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE_UNION,
+Mpi2IeeeSgeSimpleUnion_t, MPI2_POINTER pMpi2IeeeSgeSimpleUnion_t;
+
+typedef MPI2_IEEE_SGE_SIMPLE32 MPI2_IEEE_SGE_CHAIN32;
+typedef MPI2_IEEE_SGE_SIMPLE64 MPI2_IEEE_SGE_CHAIN64;
+
+typedef union _MPI2_IEEE_SGE_CHAIN_UNION {
+	MPI2_IEEE_SGE_CHAIN32 Chain32;
+	MPI2_IEEE_SGE_CHAIN64 Chain64;
+}	MPI2_IEEE_SGE_CHAIN_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_CHAIN_UNION,
+Mpi2IeeeSgeChainUnion_t, MPI2_POINTER pMpi2IeeeSgeChainUnion_t;
+
+typedef union _MPI2_SGE_IO_UNION {
+	MPI2_SGE_SIMPLE_UNION MpiSimple;
+	MPI2_SGE_CHAIN_UNION MpiChain;
+	MPI2_IEEE_SGE_SIMPLE_UNION IeeeSimple;
+	MPI2_IEEE_SGE_CHAIN_UNION IeeeChain;
+}	MPI2_SGE_IO_UNION, MPI2_POINTER PTR_MPI2_SGE_IO_UNION,
+Mpi2SGEIOUnion_t, MPI2_POINTER pMpi2SGEIOUnion_t;
+
+typedef union {
+	u_int8_t CDB32[32];
+	MPI2_SCSI_IO_CDB_EEDP32 EEDP32;
+	MPI2_SGE_SIMPLE_UNION SGE;
+}	MPI2_SCSI_IO_CDB_UNION, MPI2_POINTER PTR_MPI2_SCSI_IO_CDB_UNION,
+Mpi2ScsiIoCdb_t, MPI2_POINTER pMpi2ScsiIoCdb_t;
+
+/****************************************************************************
+ *  *  SCSI Task Management messages
+ *   ****************************************************************************/
+
+/*SCSI Task Management Request Message */
+typedef struct _MPI2_SCSI_TASK_MANAGE_REQUEST {
+	u_int16_t DevHandle;        /*0x00 */
+	u_int8_t ChainOffset;       /*0x02 */
+	u_int8_t Function;      /*0x03 */
+	u_int8_t Reserved1;     /*0x04 */
+	u_int8_t TaskType;      /*0x05 */
+	u_int8_t Reserved2;     /*0x06 */
+	u_int8_t MsgFlags;      /*0x07 */
+	u_int8_t VP_ID;     /*0x08 */
+	u_int8_t VF_ID;     /*0x09 */
+	u_int16_t Reserved3;        /*0x0A */
+	u_int8_t LUN[8];        /*0x0C */
+	u_int32_t Reserved4[7]; /*0x14 */
+	u_int16_t TaskMID;      /*0x30 */
+	u_int16_t Reserved5;        /*0x32 */
+} MPI2_SCSI_TASK_MANAGE_REQUEST;
+
+/*SCSI Task Management Reply Message */
+typedef struct _MPI2_SCSI_TASK_MANAGE_REPLY {
+	u_int16_t DevHandle;        /*0x00 */
+	u_int8_t MsgLength;     /*0x02 */
+	u_int8_t Function;      /*0x03 */
+	u_int8_t ResponseCode;  /*0x04 */
+	u_int8_t TaskType;      /*0x05 */
+	u_int8_t Reserved1;     /*0x06 */
+	u_int8_t MsgFlags;      /*0x07 */
+	u_int8_t VP_ID;     /*0x08 */
+	u_int8_t VF_ID;     /*0x09 */
+	u_int16_t Reserved2;        /*0x0A */
+	u_int16_t Reserved3;        /*0x0C */
+	u_int16_t IOCStatus;        /*0x0E */
+	u_int32_t IOCLogInfo;       /*0x10 */
+	u_int32_t TerminationCount; /*0x14 */
+	u_int32_t ResponseInfo; /*0x18 */
+} MPI2_SCSI_TASK_MANAGE_REPLY;
+
+typedef struct _MR_TM_REQUEST {
+	char request[128];
+} MR_TM_REQUEST;
+
+typedef struct _MR_TM_REPLY {
+	char reply[128];
+} MR_TM_REPLY;
+
+/* SCSI Task Management Request Message */
+typedef struct _MR_TASK_MANAGE_REQUEST {
+	/*To be type casted to struct MPI2_SCSI_TASK_MANAGE_REQUEST */
+	MR_TM_REQUEST        TmRequest;
+	union {
+		struct {
+			u_int32_t isTMForLD:1;
+			u_int32_t isTMForPD:1;
+			u_int32_t reserved1:30;
+			u_int32_t reserved2;
+		} tmReqFlags;
+		MR_TM_REPLY   TMReply;
+	} uTmReqReply;
+} MR_TASK_MANAGE_REQUEST;
+
+/* TaskType values */
+#define MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK           (0x01)
+#define MPI2_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET        (0x02)
+#define MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET         (0x03)
+#define MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET   (0x05)
+#define MPI2_SCSITASKMGMT_TASKTYPE_CLEAR_TASK_SET       (0x06)
+#define MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK           (0x07)
+#define MPI2_SCSITASKMGMT_TASKTYPE_CLR_ACA              (0x08)
+#define MPI2_SCSITASKMGMT_TASKTYPE_QRY_TASK_SET         (0x09)
+#define MPI2_SCSITASKMGMT_TASKTYPE_QRY_ASYNC_EVENT      (0x0A)
+
+/* ResponseCode values */
+#define MPI2_SCSITASKMGMT_RSP_TM_COMPLETE               (0x00)
+#define MPI2_SCSITASKMGMT_RSP_INVALID_FRAME             (0x02)
+#define MPI2_SCSITASKMGMT_RSP_TM_NOT_SUPPORTED          (0x04)
+#define MPI2_SCSITASKMGMT_RSP_TM_FAILED                 (0x05)
+#define MPI2_SCSITASKMGMT_RSP_TM_SUCCEEDED              (0x08)
+#define MPI2_SCSITASKMGMT_RSP_TM_INVALID_LUN            (0x09)
+#define MPI2_SCSITASKMGMT_RSP_TM_OVERLAPPED_TAG         (0x0A)
+#define MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC          (0x80)
+
+/*
+ * RAID SCSI IO Request Message Total SGE count will be one less than
+ * _MPI2_SCSI_IO_REQUEST
+ */
+typedef struct _MPI2_RAID_SCSI_IO_REQUEST {
+	u_int16_t DevHandle;		/* 0x00 */
+	u_int8_t ChainOffset;		/* 0x02 */
+	u_int8_t Function;		/* 0x03 */
+	u_int16_t Reserved1;		/* 0x04 */
+	u_int8_t Reserved2;		/* 0x06 */
+	u_int8_t MsgFlags;		/* 0x07 */
+	u_int8_t VP_ID;			/* 0x08 */
+	u_int8_t VF_ID;			/* 0x09 */
+	u_int16_t Reserved3;		/* 0x0A */
+	u_int32_t SenseBufferLowAddress;/* 0x0C */
+	u_int16_t SGLFlags;		/* 0x10 */
+	u_int8_t SenseBufferLength;	/* 0x12 */
+	u_int8_t Reserved4;		/* 0x13 */
+	u_int8_t SGLOffset0;		/* 0x14 */
+	u_int8_t SGLOffset1;		/* 0x15 */
+	u_int8_t SGLOffset2;		/* 0x16 */
+	u_int8_t SGLOffset3;		/* 0x17 */
+	u_int32_t SkipCount;		/* 0x18 */
+	u_int32_t DataLength;		/* 0x1C */
+	u_int32_t BidirectionalDataLength;	/* 0x20 */
+	u_int16_t IoFlags;		/* 0x24 */
+	u_int16_t EEDPFlags;		/* 0x26 */
+	u_int32_t EEDPBlockSize;	/* 0x28 */
+	u_int32_t SecondaryReferenceTag;/* 0x2C */
+	u_int16_t SecondaryApplicationTag;	/* 0x30 */
+	u_int16_t ApplicationTagTranslationMask;	/* 0x32 */
+	u_int8_t LUN[8];		/* 0x34 */
+	u_int32_t Control;		/* 0x3C */
+	MPI2_SCSI_IO_CDB_UNION CDB;	/* 0x40 */
+	RAID_CONTEXT RaidContext;	/* 0x60 */
+	MPI2_SGE_IO_UNION SGL;		/* 0x80 */
+}	MRSAS_RAID_SCSI_IO_REQUEST, MPI2_POINTER PTR_MRSAS_RAID_SCSI_IO_REQUEST,
+MRSASRaidSCSIIORequest_t, MPI2_POINTER pMRSASRaidSCSIIORequest_t;
+
+/*
+ * MPT RAID MFA IO Descriptor.
+ */
+typedef struct _MRSAS_RAID_MFA_IO_DESCRIPTOR {
+	u_int32_t RequestFlags:8;
+	u_int32_t MessageAddress1:24;	/* bits 31:8 */
+	u_int32_t MessageAddress2;	/* bits 61:32 */
+}	MRSAS_RAID_MFA_IO_REQUEST_DESCRIPTOR, *PMRSAS_RAID_MFA_IO_REQUEST_DESCRIPTOR;
+
+/* Default Request Descriptor */
+typedef struct _MPI2_DEFAULT_REQUEST_DESCRIPTOR {
+	u_int8_t RequestFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t SMID;			/* 0x02 */
+	u_int16_t LMID;			/* 0x04 */
+	u_int16_t DescriptorTypeDependent;	/* 0x06 */
+}	MPI2_DEFAULT_REQUEST_DESCRIPTOR,
+
+	MPI2_POINTER PTR_MPI2_DEFAULT_REQUEST_DESCRIPTOR,
+Mpi2DefaultRequestDescriptor_t, MPI2_POINTER pMpi2DefaultRequestDescriptor_t;
+
+/* High Priority Request Descriptor */
+typedef struct _MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR {
+	u_int8_t RequestFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t SMID;			/* 0x02 */
+	u_int16_t LMID;			/* 0x04 */
+	u_int16_t Reserved1;		/* 0x06 */
+}	MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR,
+
+	MPI2_POINTER PTR_MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR,
+Mpi2HighPriorityRequestDescriptor_t, MPI2_POINTER pMpi2HighPriorityRequestDescriptor_t;
+
+/* SCSI IO Request Descriptor */
+typedef struct _MPI2_SCSI_IO_REQUEST_DESCRIPTOR {
+	u_int8_t RequestFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t SMID;			/* 0x02 */
+	u_int16_t LMID;			/* 0x04 */
+	u_int16_t DevHandle;		/* 0x06 */
+}	MPI2_SCSI_IO_REQUEST_DESCRIPTOR,
+
+	MPI2_POINTER PTR_MPI2_SCSI_IO_REQUEST_DESCRIPTOR,
+Mpi2SCSIIORequestDescriptor_t, MPI2_POINTER pMpi2SCSIIORequestDescriptor_t;
+
+/* SCSI Target Request Descriptor */
+typedef struct _MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR {
+	u_int8_t RequestFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t SMID;			/* 0x02 */
+	u_int16_t LMID;			/* 0x04 */
+	u_int16_t IoIndex;		/* 0x06 */
+}	MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR,
+
+	MPI2_POINTER PTR_MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR,
+Mpi2SCSITargetRequestDescriptor_t, MPI2_POINTER pMpi2SCSITargetRequestDescriptor_t;
+
+/* RAID Accelerator Request Descriptor */
+typedef struct _MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR {
+	u_int8_t RequestFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t SMID;			/* 0x02 */
+	u_int16_t LMID;			/* 0x04 */
+	u_int16_t Reserved;		/* 0x06 */
+}	MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR,
+
+	MPI2_POINTER PTR_MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR,
+Mpi2RAIDAcceleratorRequestDescriptor_t, MPI2_POINTER pMpi2RAIDAcceleratorRequestDescriptor_t;
+
+/* union of Request Descriptors */
+typedef union _MRSAS_REQUEST_DESCRIPTOR_UNION {
+	MPI2_DEFAULT_REQUEST_DESCRIPTOR Default;
+	MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR HighPriority;
+	MPI2_SCSI_IO_REQUEST_DESCRIPTOR SCSIIO;
+	MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR SCSITarget;
+	MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR RAIDAccelerator;
+	MRSAS_RAID_MFA_IO_REQUEST_DESCRIPTOR MFAIo;
+	union {
+		struct {
+			u_int32_t low;
+			u_int32_t high;
+		}	u;
+		u_int64_t Words;
+	}	addr;
+}	MRSAS_REQUEST_DESCRIPTOR_UNION;
+
+/* Default Reply Descriptor */
+typedef struct _MPI2_DEFAULT_REPLY_DESCRIPTOR {
+	u_int8_t ReplyFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t DescriptorTypeDependent1;	/* 0x02 */
+	u_int32_t DescriptorTypeDependent2;	/* 0x04 */
+}	MPI2_DEFAULT_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_DEFAULT_REPLY_DESCRIPTOR,
+Mpi2DefaultReplyDescriptor_t, MPI2_POINTER pMpi2DefaultReplyDescriptor_t;
+
+/* Address Reply Descriptor */
+typedef struct _MPI2_ADDRESS_REPLY_DESCRIPTOR {
+	u_int8_t ReplyFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t SMID;			/* 0x02 */
+	u_int32_t ReplyFrameAddress;	/* 0x04 */
+}	MPI2_ADDRESS_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_ADDRESS_REPLY_DESCRIPTOR,
+Mpi2AddressReplyDescriptor_t, MPI2_POINTER pMpi2AddressReplyDescriptor_t;
+
+/* SCSI IO Success Reply Descriptor */
+typedef struct _MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR {
+	u_int8_t ReplyFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t SMID;			/* 0x02 */
+	u_int16_t TaskTag;		/* 0x04 */
+	u_int16_t Reserved1;		/* 0x06 */
+}	MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
+
+	MPI2_POINTER PTR_MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
+Mpi2SCSIIOSuccessReplyDescriptor_t, MPI2_POINTER pMpi2SCSIIOSuccessReplyDescriptor_t;
+
+/* TargetAssist Success Reply Descriptor */
+typedef struct _MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR {
+	u_int8_t ReplyFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t SMID;			/* 0x02 */
+	u_int8_t SequenceNumber;	/* 0x04 */
+	u_int8_t Reserved1;		/* 0x05 */
+	u_int16_t IoIndex;		/* 0x06 */
+}	MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR,
+
+	MPI2_POINTER PTR_MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR,
+Mpi2TargetAssistSuccessReplyDescriptor_t, MPI2_POINTER pMpi2TargetAssistSuccessReplyDescriptor_t;
+
+/* Target Command Buffer Reply Descriptor */
+typedef struct _MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR {
+	u_int8_t ReplyFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int8_t VP_ID;			/* 0x02 */
+	u_int8_t Flags;			/* 0x03 */
+	u_int16_t InitiatorDevHandle;	/* 0x04 */
+	u_int16_t IoIndex;		/* 0x06 */
+}	MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR,
+
+	MPI2_POINTER PTR_MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR,
+Mpi2TargetCommandBufferReplyDescriptor_t, MPI2_POINTER pMpi2TargetCommandBufferReplyDescriptor_t;
+
+/* RAID Accelerator Success Reply Descriptor */
+typedef struct _MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR {
+	u_int8_t ReplyFlags;		/* 0x00 */
+	u_int8_t MSIxIndex;		/* 0x01 */
+	u_int16_t SMID;			/* 0x02 */
+	u_int32_t Reserved;		/* 0x04 */
+}	MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR,
+
+	MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR,
+Mpi2RAIDAcceleratorSuccessReplyDescriptor_t, MPI2_POINTER pMpi2RAIDAcceleratorSuccessReplyDescriptor_t;
+
+/* union of Reply Descriptors */
+typedef union _MPI2_REPLY_DESCRIPTORS_UNION {
+	MPI2_DEFAULT_REPLY_DESCRIPTOR Default;
+	MPI2_ADDRESS_REPLY_DESCRIPTOR AddressReply;
+	MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR SCSIIOSuccess;
+	MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR TargetAssistSuccess;
+	MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR TargetCommandBuffer;
+	MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR RAIDAcceleratorSuccess;
+	u_int64_t Words;
+}	MPI2_REPLY_DESCRIPTORS_UNION, MPI2_POINTER PTR_MPI2_REPLY_DESCRIPTORS_UNION,
+Mpi2ReplyDescriptorsUnion_t, MPI2_POINTER pMpi2ReplyDescriptorsUnion_t;
+
+typedef union {
+	volatile unsigned int val;
+	unsigned int val_rdonly;
+} mrsas_atomic_t;
+
+#define	mrsas_atomic_read(v)	atomic_load_acq_int(&(v)->val)
+#define	mrsas_atomic_set(v,i)	atomic_store_rel_int(&(v)->val, i)
+#define	mrsas_atomic_dec(v)	atomic_fetchadd_int(&(v)->val, -1)
+#define	mrsas_atomic_inc(v)	atomic_fetchadd_int(&(v)->val, 1)
+
+/* IOCInit Request message */
+typedef struct _MPI2_IOC_INIT_REQUEST {
+	u_int8_t WhoInit;		/* 0x00 */
+	u_int8_t Reserved1;		/* 0x01 */
+	u_int8_t ChainOffset;		/* 0x02 */
+	u_int8_t Function;		/* 0x03 */
+	u_int16_t Reserved2;		/* 0x04 */
+	u_int8_t Reserved3;		/* 0x06 */
+	u_int8_t MsgFlags;		/* 0x07 */
+	u_int8_t VP_ID;			/* 0x08 */
+	u_int8_t VF_ID;			/* 0x09 */
+	u_int16_t Reserved4;		/* 0x0A */
+	u_int16_t MsgVersion;		/* 0x0C */
+	u_int16_t HeaderVersion;	/* 0x0E */
+	u_int32_t Reserved5;		/* 0x10 */
+	u_int16_t Reserved6;		/* 0x14 */
+	u_int8_t Reserved7;		/* 0x16 */
+	u_int8_t HostMSIxVectors;	/* 0x17 */
+	u_int16_t Reserved8;		/* 0x18 */
+	u_int16_t SystemRequestFrameSize;	/* 0x1A */
+	u_int16_t ReplyDescriptorPostQueueDepth;	/* 0x1C */
+	u_int16_t ReplyFreeQueueDepth;	/* 0x1E */
+	u_int32_t SenseBufferAddressHigh;	/* 0x20 */
+	u_int32_t SystemReplyAddressHigh;	/* 0x24 */
+	u_int64_t SystemRequestFrameBaseAddress;	/* 0x28 */
+	u_int64_t ReplyDescriptorPostQueueAddress;	/* 0x30 */
+	u_int64_t ReplyFreeQueueAddress;/* 0x38 */
+	u_int64_t TimeStamp;		/* 0x40 */
+}	MPI2_IOC_INIT_REQUEST, MPI2_POINTER PTR_MPI2_IOC_INIT_REQUEST,
+Mpi2IOCInitRequest_t, MPI2_POINTER pMpi2IOCInitRequest_t;
+
+/*
+ * MR private defines
+ */
+#define	MR_PD_INVALID			0xFFFF
+#define	MAX_SPAN_DEPTH			8
+#define	MAX_QUAD_DEPTH			MAX_SPAN_DEPTH
+#define	MAX_RAIDMAP_SPAN_DEPTH	(MAX_SPAN_DEPTH)
+#define	MAX_ROW_SIZE			32
+#define	MAX_RAIDMAP_ROW_SIZE	(MAX_ROW_SIZE)
+#define	MAX_LOGICAL_DRIVES		64
+#define	MAX_LOGICAL_DRIVES_EXT	256
+
+#define	MAX_RAIDMAP_LOGICAL_DRIVES	(MAX_LOGICAL_DRIVES)
+#define	MAX_RAIDMAP_VIEWS			(MAX_LOGICAL_DRIVES)
+
+#define	MAX_ARRAYS				128
+#define	MAX_RAIDMAP_ARRAYS		(MAX_ARRAYS)
+
+#define	MAX_ARRAYS_EXT			256
+#define	MAX_API_ARRAYS_EXT		MAX_ARRAYS_EXT
+
+#define	MAX_PHYSICAL_DEVICES	256
+#define	MAX_RAIDMAP_PHYSICAL_DEVICES	(MAX_PHYSICAL_DEVICES)
+#define	MR_DCMD_LD_MAP_GET_INFO	0x0300e101
+#define	MR_DCMD_SYSTEM_PD_MAP_GET_INFO	0x0200e102
+#define MR_DCMD_PD_MFI_TASK_MGMT	0x0200e100
+
+#define	MRSAS_MAX_PD_CHANNELS		1
+#define	MRSAS_MAX_LD_CHANNELS		1
+#define	MRSAS_MAX_DEV_PER_CHANNEL	256
+#define	MRSAS_DEFAULT_INIT_ID		-1
+#define	MRSAS_MAX_LUN				8
+#define	MRSAS_DEFAULT_CMD_PER_LUN	256
+#define	MRSAS_MAX_PD				(MRSAS_MAX_PD_CHANNELS * \
+			MRSAS_MAX_DEV_PER_CHANNEL)
+#define	MRSAS_MAX_LD_IDS			(MRSAS_MAX_LD_CHANNELS * \
+			MRSAS_MAX_DEV_PER_CHANNEL)
+
+
+#define	VD_EXT_DEBUG	0
+#define TM_DEBUG		1
+
+/*******************************************************************
+ * RAID map related structures
+ ********************************************************************/
+#pragma pack(1)
+typedef struct _MR_DEV_HANDLE_INFO {
+	u_int16_t curDevHdl;
+	u_int8_t validHandles;
+	u_int8_t reserved;
+	u_int16_t devHandle[2];
+}	MR_DEV_HANDLE_INFO;
+
+#pragma pack()
+
+typedef struct _MR_ARRAY_INFO {
+	u_int16_t pd[MAX_RAIDMAP_ROW_SIZE];
+}	MR_ARRAY_INFO;
+
+typedef struct _MR_QUAD_ELEMENT {
+	u_int64_t logStart;
+	u_int64_t logEnd;
+	u_int64_t offsetInSpan;
+	u_int32_t diff;
+	u_int32_t reserved1;
+}	MR_QUAD_ELEMENT;
+
+typedef struct _MR_SPAN_INFO {
+	u_int32_t noElements;
+	u_int32_t reserved1;
+	MR_QUAD_ELEMENT quad[MAX_RAIDMAP_SPAN_DEPTH];
+}	MR_SPAN_INFO;
+
+typedef struct _MR_LD_SPAN_ {
+	u_int64_t startBlk;
+	u_int64_t numBlks;
+	u_int16_t arrayRef;
+	u_int8_t spanRowSize;
+	u_int8_t spanRowDataSize;
+	u_int8_t reserved[4];
+}	MR_LD_SPAN;
+
+typedef struct _MR_SPAN_BLOCK_INFO {
+	u_int64_t num_rows;
+	MR_LD_SPAN span;
+	MR_SPAN_INFO block_span_info;
+}	MR_SPAN_BLOCK_INFO;
+
+typedef struct _MR_LD_RAID {
+	struct {
+		u_int32_t fpCapable:1;
+		u_int32_t reserved5:3;
+		u_int32_t ldPiMode:4;
+		u_int32_t pdPiMode:4;
+		u_int32_t encryptionType:8;
+		u_int32_t fpWriteCapable:1;
+		u_int32_t fpReadCapable:1;
+		u_int32_t fpWriteAcrossStripe:1;
+		u_int32_t fpReadAcrossStripe:1;
+		u_int32_t fpNonRWCapable:1;
+		u_int32_t tmCapable:1;
+		u_int32_t reserved4:6;
+	}	capability;
+	u_int32_t reserved6;
+	u_int64_t size;
+
+	u_int8_t spanDepth;
+	u_int8_t level;
+	u_int8_t stripeShift;
+	u_int8_t rowSize;
+
+	u_int8_t rowDataSize;
+	u_int8_t writeMode;
+	u_int8_t PRL;
+	u_int8_t SRL;
+
+	u_int16_t targetId;
+	u_int8_t ldState;
+	u_int8_t regTypeReqOnWrite;
+	u_int8_t modFactor;
+	u_int8_t regTypeReqOnRead;
+	u_int16_t seqNum;
+
+	struct {
+		u_int32_t ldSyncRequired:1;
+		u_int32_t regTypeReqOnReadLsValid:1;
+		u_int32_t reserved:30;
+	}	flags;
+
+	u_int8_t LUN[8];
+	u_int8_t fpIoTimeoutForLd;
+	u_int8_t reserved2[3];
+	u_int32_t logicalBlockLength;
+	struct {
+		u_int32_t LdPiExp:4;
+		u_int32_t LdLogicalBlockExp:4;
+		u_int32_t reserved1:24;
+	}	exponent;
+	u_int8_t reserved3[0x80 - 0x38];
+}	MR_LD_RAID;
+
+typedef struct _MR_LD_SPAN_MAP {
+	MR_LD_RAID ldRaid;
+	u_int8_t dataArmMap[MAX_RAIDMAP_ROW_SIZE];
+	MR_SPAN_BLOCK_INFO spanBlock[MAX_RAIDMAP_SPAN_DEPTH];
+}	MR_LD_SPAN_MAP;
+
+typedef struct _MR_FW_RAID_MAP {
+	u_int32_t totalSize;
+	union {
+		struct {
+			u_int32_t maxLd;
+			u_int32_t maxSpanDepth;
+			u_int32_t maxRowSize;
+			u_int32_t maxPdCount;
+			u_int32_t maxArrays;
+		}	validationInfo;
+		u_int32_t version[5];
+		u_int32_t reserved1[5];
+	}	raid_desc;
+	u_int32_t ldCount;
+	u_int32_t Reserved1;
+
+	/*
+	 * This doesn't correspond to FW Ld Tgt Id to LD, but will purge. For
+	 * example: if tgt Id is 4 and FW LD is 2, and there is only one LD,
+	 * FW will populate the array like this. [0xFF, 0xFF, 0xFF, 0xFF,
+	 * 0x0,.....]. This is to help reduce the entire strcture size if
+	 * there are few LDs or driver is looking info for 1 LD only.
+	 */
+	u_int8_t ldTgtIdToLd[MAX_RAIDMAP_LOGICAL_DRIVES + MAX_RAIDMAP_VIEWS];
+	u_int8_t fpPdIoTimeoutSec;
+	u_int8_t reserved2[7];
+	MR_ARRAY_INFO arMapInfo[MAX_RAIDMAP_ARRAYS];
+	MR_DEV_HANDLE_INFO devHndlInfo[MAX_RAIDMAP_PHYSICAL_DEVICES];
+	MR_LD_SPAN_MAP ldSpanMap[1];
+}	MR_FW_RAID_MAP;
+
+
+typedef struct _MR_FW_RAID_MAP_EXT {
+	/* Not used in new map */
+	u_int32_t reserved;
+
+	union {
+		struct {
+			u_int32_t maxLd;
+			u_int32_t maxSpanDepth;
+			u_int32_t maxRowSize;
+			u_int32_t maxPdCount;
+			u_int32_t maxArrays;
+		}	validationInfo;
+		u_int32_t version[5];
+		u_int32_t reserved1[5];
+	}	fw_raid_desc;
+
+	u_int8_t fpPdIoTimeoutSec;
+	u_int8_t reserved2[7];
+
+	u_int16_t ldCount;
+	u_int16_t arCount;
+	u_int16_t spanCount;
+	u_int16_t reserve3;
+
+	MR_DEV_HANDLE_INFO devHndlInfo[MAX_RAIDMAP_PHYSICAL_DEVICES];
+	u_int8_t ldTgtIdToLd[MAX_LOGICAL_DRIVES_EXT];
+	MR_ARRAY_INFO arMapInfo[MAX_API_ARRAYS_EXT];
+	MR_LD_SPAN_MAP ldSpanMap[MAX_LOGICAL_DRIVES_EXT];
+}	MR_FW_RAID_MAP_EXT;
+
+
+typedef struct _MR_DRV_RAID_MAP {
+	/*
+	 * Total size of this structure, including this field. This feild
+	 * will be manupulated by driver for ext raid map, else pick the
+	 * value from firmware raid map.
+	 */
+	u_int32_t totalSize;
+
+	union {
+		struct {
+			u_int32_t maxLd;
+			u_int32_t maxSpanDepth;
+			u_int32_t maxRowSize;
+			u_int32_t maxPdCount;
+			u_int32_t maxArrays;
+		}	validationInfo;
+		u_int32_t version[5];
+		u_int32_t reserved1[5];
+	}	drv_raid_desc;
+
+	/* timeout value used by driver in FP IOs */
+	u_int8_t fpPdIoTimeoutSec;
+	u_int8_t reserved2[7];
+
+	u_int16_t ldCount;
+	u_int16_t arCount;
+	u_int16_t spanCount;
+	u_int16_t reserve3;
+
+	MR_DEV_HANDLE_INFO devHndlInfo[MAX_RAIDMAP_PHYSICAL_DEVICES];
+	u_int8_t ldTgtIdToLd[MAX_LOGICAL_DRIVES_EXT];
+	MR_ARRAY_INFO arMapInfo[MAX_API_ARRAYS_EXT];
+	MR_LD_SPAN_MAP ldSpanMap[1];
+
+}	MR_DRV_RAID_MAP;
+
+/*
+ * Driver raid map size is same as raid map ext MR_DRV_RAID_MAP_ALL is
+ * created to sync with old raid. And it is mainly for code re-use purpose.
+ */
+
+#pragma pack(1)
+typedef struct _MR_DRV_RAID_MAP_ALL {
+
+	MR_DRV_RAID_MAP raidMap;
+	MR_LD_SPAN_MAP ldSpanMap[MAX_LOGICAL_DRIVES_EXT - 1];
+}	MR_DRV_RAID_MAP_ALL;
+
+#pragma pack()
+
+typedef struct _LD_LOAD_BALANCE_INFO {
+	u_int8_t loadBalanceFlag;
+	u_int8_t reserved1;
+	mrsas_atomic_t scsi_pending_cmds[MAX_PHYSICAL_DEVICES];
+	u_int64_t last_accessed_block[MAX_PHYSICAL_DEVICES];
+}	LD_LOAD_BALANCE_INFO, *PLD_LOAD_BALANCE_INFO;
+
+/* SPAN_SET is info caclulated from span info from Raid map per ld */
+typedef struct _LD_SPAN_SET {
+	u_int64_t log_start_lba;
+	u_int64_t log_end_lba;
+	u_int64_t span_row_start;
+	u_int64_t span_row_end;
+	u_int64_t data_strip_start;
+	u_int64_t data_strip_end;
+	u_int64_t data_row_start;
+	u_int64_t data_row_end;
+	u_int8_t strip_offset[MAX_SPAN_DEPTH];
+	u_int32_t span_row_data_width;
+	u_int32_t diff;
+	u_int32_t reserved[2];
+}	LD_SPAN_SET, *PLD_SPAN_SET;
+
+typedef struct LOG_BLOCK_SPAN_INFO {
+	LD_SPAN_SET span_set[MAX_SPAN_DEPTH];
+}	LD_SPAN_INFO, *PLD_SPAN_INFO;
+
+#pragma pack(1)
+typedef struct _MR_FW_RAID_MAP_ALL {
+	MR_FW_RAID_MAP raidMap;
+	MR_LD_SPAN_MAP ldSpanMap[MAX_LOGICAL_DRIVES - 1];
+}	MR_FW_RAID_MAP_ALL;
+
+#pragma pack()
+
+struct IO_REQUEST_INFO {
+	u_int64_t ldStartBlock;
+	u_int32_t numBlocks;
+	u_int16_t ldTgtId;
+	u_int8_t isRead;
+	u_int16_t devHandle;
+	u_int64_t pdBlock;
+	u_int8_t fpOkForIo;
+	u_int8_t IoforUnevenSpan;
+	u_int8_t start_span;
+	u_int8_t reserved;
+	u_int64_t start_row;
+	/* span[7:5], arm[4:0] */
+	u_int8_t span_arm;
+	u_int8_t pd_after_lb;
+};
+
+/*
+ * define MR_PD_CFG_SEQ structure for system PDs
+ */
+struct MR_PD_CFG_SEQ {
+	u_int16_t seqNum;
+	u_int16_t devHandle;
+	struct {
+		u_int8_t tmCapable:1;
+		u_int8_t reserved:7;
+	} capability;
+	u_int8_t reserved[3];
+} __packed;
+
+struct MR_PD_CFG_SEQ_NUM_SYNC {
+	u_int32_t size;
+	u_int32_t count;
+	struct MR_PD_CFG_SEQ seq[1];
+} __packed;
+
+
+typedef struct _MR_LD_TARGET_SYNC {
+	u_int8_t targetId;
+	u_int8_t reserved;
+	u_int16_t seqNum;
+}	MR_LD_TARGET_SYNC;
+
+#define	IEEE_SGE_FLAGS_ADDR_MASK		(0x03)
+#define	IEEE_SGE_FLAGS_SYSTEM_ADDR		(0x00)
+#define	IEEE_SGE_FLAGS_IOCDDR_ADDR		(0x01)
+#define	IEEE_SGE_FLAGS_IOCPLB_ADDR		(0x02)
+#define	IEEE_SGE_FLAGS_IOCPLBNTA_ADDR	(0x03)
+#define	IEEE_SGE_FLAGS_CHAIN_ELEMENT	(0x80)
+#define	IEEE_SGE_FLAGS_END_OF_LIST		(0x40)
+
+union desc_value {
+	u_int64_t word;
+	struct {
+		u_int32_t low;
+		u_int32_t high;
+	}	u;
+};
+
+/*******************************************************************
+ * Temporary command
+ ********************************************************************/
+struct mrsas_tmp_dcmd {
+	bus_dma_tag_t tmp_dcmd_tag;
+	bus_dmamap_t tmp_dcmd_dmamap;
+	void   *tmp_dcmd_mem;
+	bus_addr_t tmp_dcmd_phys_addr;
+};
+
+/*******************************************************************
+ * Register set, included legacy controllers 1068 and 1078,
+ * structure extended for 1078 registers
+ *******************************************************************/
+#pragma pack(1)
+typedef struct _mrsas_register_set {
+	u_int32_t doorbell;		/* 0000h */
+	u_int32_t fusion_seq_offset;	/* 0004h */
+	u_int32_t fusion_host_diag;	/* 0008h */
+	u_int32_t reserved_01;		/* 000Ch */
+
+	u_int32_t inbound_msg_0;	/* 0010h */
+	u_int32_t inbound_msg_1;	/* 0014h */
+	u_int32_t outbound_msg_0;	/* 0018h */
+	u_int32_t outbound_msg_1;	/* 001Ch */
+
+	u_int32_t inbound_doorbell;	/* 0020h */
+	u_int32_t inbound_intr_status;	/* 0024h */
+	u_int32_t inbound_intr_mask;	/* 0028h */
+
+	u_int32_t outbound_doorbell;	/* 002Ch */
+	u_int32_t outbound_intr_status;	/* 0030h */
+	u_int32_t outbound_intr_mask;	/* 0034h */
+
+	u_int32_t reserved_1[2];	/* 0038h */
+
+	u_int32_t inbound_queue_port;	/* 0040h */
+	u_int32_t outbound_queue_port;	/* 0044h */
+
+	u_int32_t reserved_2[9];	/* 0048h */
+	u_int32_t reply_post_host_index;/* 006Ch */
+	u_int32_t reserved_2_2[12];	/* 0070h */
+
+	u_int32_t outbound_doorbell_clear;	/* 00A0h */
+
+	u_int32_t reserved_3[3];	/* 00A4h */
+
+	u_int32_t outbound_scratch_pad;	/* 00B0h */
+	u_int32_t outbound_scratch_pad_2;	/* 00B4h */
+
+	u_int32_t reserved_4[2];	/* 00B8h */
+
+	u_int32_t inbound_low_queue_port;	/* 00C0h */
+
+	u_int32_t inbound_high_queue_port;	/* 00C4h */
+
+	u_int32_t reserved_5;		/* 00C8h */
+	u_int32_t res_6[11];		/* CCh */
+	u_int32_t host_diag;
+	u_int32_t seq_offset;
+	u_int32_t index_registers[807];	/* 00CCh */
+}	mrsas_reg_set;
+
+#pragma pack()
+
+/*******************************************************************
+ * Firmware Interface Defines
+ *******************************************************************
+ * MFI stands for MegaRAID SAS FW Interface. This is just a moniker
+ * for protocol between the software and firmware. Commands are
+ * issued using "message frames".
+ ******************************************************************/
+/*
+ * FW posts its state in upper 4 bits of outbound_msg_0 register
+ */
+#define	MFI_STATE_MASK					0xF0000000
+#define	MFI_STATE_UNDEFINED				0x00000000
+#define	MFI_STATE_BB_INIT				0x10000000
+#define	MFI_STATE_FW_INIT				0x40000000
+#define	MFI_STATE_WAIT_HANDSHAKE		0x60000000
+#define	MFI_STATE_FW_INIT_2				0x70000000
+#define	MFI_STATE_DEVICE_SCAN			0x80000000
+#define	MFI_STATE_BOOT_MESSAGE_PENDING	0x90000000
+#define	MFI_STATE_FLUSH_CACHE			0xA0000000
+#define	MFI_STATE_READY					0xB0000000
+#define	MFI_STATE_OPERATIONAL			0xC0000000
+#define	MFI_STATE_FAULT					0xF0000000
+#define	MFI_RESET_REQUIRED				0x00000001
+#define	MFI_RESET_ADAPTER				0x00000002
+#define	MEGAMFI_FRAME_SIZE				64
+#define	MRSAS_MFI_FRAME_SIZE			1024
+#define	MRSAS_MFI_SENSE_SIZE			128
+
+/*
+ * During FW init, clear pending cmds & reset state using inbound_msg_0
+ *
+ * ABORT        : Abort all pending cmds READY        : Move from OPERATIONAL to
+ * READY state; discard queue info MFIMODE      : Discard (possible) low MFA
+ * posted in 64-bit mode (??) CLR_HANDSHAKE: FW is waiting for HANDSHAKE from
+ * BIOS or Driver HOTPLUG      : Resume from Hotplug MFI_STOP_ADP : Send
+ * signal to FW to stop processing
+ */
+
+#define	WRITE_SEQUENCE_OFFSET		(0x0000000FC)
+#define	HOST_DIAGNOSTIC_OFFSET		(0x000000F8)
+#define	DIAG_WRITE_ENABLE			(0x00000080)
+#define	DIAG_RESET_ADAPTER			(0x00000004)
+
+#define	MFI_ADP_RESET				0x00000040
+#define	MFI_INIT_ABORT				0x00000001
+#define	MFI_INIT_READY				0x00000002
+#define	MFI_INIT_MFIMODE			0x00000004
+#define	MFI_INIT_CLEAR_HANDSHAKE	0x00000008
+#define	MFI_INIT_HOTPLUG			0x00000010
+#define	MFI_STOP_ADP				0x00000020
+#define	MFI_RESET_FLAGS				MFI_INIT_READY|		\
+									MFI_INIT_MFIMODE|	\
+									MFI_INIT_ABORT
+
+/*
+ * MFI frame flags
+ */
+#define	MFI_FRAME_POST_IN_REPLY_QUEUE			0x0000
+#define	MFI_FRAME_DONT_POST_IN_REPLY_QUEUE		0x0001
+#define	MFI_FRAME_SGL32							0x0000
+#define	MFI_FRAME_SGL64							0x0002
+#define	MFI_FRAME_SENSE32						0x0000
+#define	MFI_FRAME_SENSE64						0x0004
+#define	MFI_FRAME_DIR_NONE						0x0000
+#define	MFI_FRAME_DIR_WRITE						0x0008
+#define	MFI_FRAME_DIR_READ						0x0010
+#define	MFI_FRAME_DIR_BOTH						0x0018
+#define	MFI_FRAME_IEEE							0x0020
+
+/*
+ * Definition for cmd_status
+ */
+#define	MFI_CMD_STATUS_POLL_MODE				0xFF
+
+/*
+ * MFI command opcodes
+ */
+#define	MFI_CMD_INIT							0x00
+#define	MFI_CMD_LD_READ							0x01
+#define	MFI_CMD_LD_WRITE						0x02
+#define	MFI_CMD_LD_SCSI_IO						0x03
+#define	MFI_CMD_PD_SCSI_IO						0x04
+#define	MFI_CMD_DCMD							0x05
+#define	MFI_CMD_ABORT							0x06
+#define	MFI_CMD_SMP								0x07
+#define	MFI_CMD_STP								0x08
+#define	MFI_CMD_INVALID							0xff
+
+#define	MR_DCMD_CTRL_GET_INFO					0x01010000
+#define	MR_DCMD_LD_GET_LIST						0x03010000
+#define	MR_DCMD_CTRL_CACHE_FLUSH				0x01101000
+#define	MR_FLUSH_CTRL_CACHE						0x01
+#define	MR_FLUSH_DISK_CACHE						0x02
+
+#define	MR_DCMD_CTRL_SHUTDOWN					0x01050000
+#define	MR_DCMD_HIBERNATE_SHUTDOWN				0x01060000
+#define	MR_ENABLE_DRIVE_SPINDOWN				0x01
+
+#define	MR_DCMD_CTRL_EVENT_GET_INFO				0x01040100
+#define	MR_DCMD_CTRL_EVENT_GET					0x01040300
+#define	MR_DCMD_CTRL_EVENT_WAIT					0x01040500
+#define	MR_DCMD_LD_GET_PROPERTIES				0x03030000
+
+#define	MR_DCMD_CLUSTER							0x08000000
+#define	MR_DCMD_CLUSTER_RESET_ALL				0x08010100
+#define	MR_DCMD_CLUSTER_RESET_LD				0x08010200
+#define	MR_DCMD_PD_LIST_QUERY					0x02010100
+
+#define	MR_DCMD_CTRL_MISC_CPX					0x0100e200
+#define	MR_DCMD_CTRL_MISC_CPX_INIT_DATA_GET		0x0100e201
+#define	MR_DCMD_CTRL_MISC_CPX_QUEUE_DATA		0x0100e202
+#define	MR_DCMD_CTRL_MISC_CPX_UNREGISTER		0x0100e203
+#define	MAX_MR_ROW_SIZE							32
+#define	MR_CPX_DIR_WRITE						1
+#define	MR_CPX_DIR_READ							0
+#define	MR_CPX_VERSION							1
+
+#define	MR_DCMD_CTRL_IO_METRICS_GET				0x01170200
+
+#define	MR_EVT_CFG_CLEARED						0x0004
+
+#define	MR_EVT_LD_STATE_CHANGE					0x0051
+#define	MR_EVT_PD_INSERTED						0x005b
+#define	MR_EVT_PD_REMOVED						0x0070
+#define	MR_EVT_LD_CREATED						0x008a
+#define	MR_EVT_LD_DELETED						0x008b
+#define	MR_EVT_FOREIGN_CFG_IMPORTED				0x00db
+#define	MR_EVT_LD_OFFLINE						0x00fc
+#define	MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED		0x0152
+#define	MR_EVT_CTRL_PERF_COLLECTION				0x017e
+
+/*
+ * MFI command completion codes
+ */
+enum MFI_STAT {
+	MFI_STAT_OK = 0x00,
+	MFI_STAT_INVALID_CMD = 0x01,
+	MFI_STAT_INVALID_DCMD = 0x02,
+	MFI_STAT_INVALID_PARAMETER = 0x03,
+	MFI_STAT_INVALID_SEQUENCE_NUMBER = 0x04,
+	MFI_STAT_ABORT_NOT_POSSIBLE = 0x05,
+	MFI_STAT_APP_HOST_CODE_NOT_FOUND = 0x06,
+	MFI_STAT_APP_IN_USE = 0x07,
+	MFI_STAT_APP_NOT_INITIALIZED = 0x08,
+	MFI_STAT_ARRAY_INDEX_INVALID = 0x09,
+	MFI_STAT_ARRAY_ROW_NOT_EMPTY = 0x0a,
+	MFI_STAT_CONFIG_RESOURCE_CONFLICT = 0x0b,
+	MFI_STAT_DEVICE_NOT_FOUND = 0x0c,
+	MFI_STAT_DRIVE_TOO_SMALL = 0x0d,
+	MFI_STAT_FLASH_ALLOC_FAIL = 0x0e,
+	MFI_STAT_FLASH_BUSY = 0x0f,
+	MFI_STAT_FLASH_ERROR = 0x10,
+	MFI_STAT_FLASH_IMAGE_BAD = 0x11,
+	MFI_STAT_FLASH_IMAGE_INCOMPLETE = 0x12,
+	MFI_STAT_FLASH_NOT_OPEN = 0x13,
+	MFI_STAT_FLASH_NOT_STARTED = 0x14,
+	MFI_STAT_FLUSH_FAILED = 0x15,
+	MFI_STAT_HOST_CODE_NOT_FOUNT = 0x16,
+	MFI_STAT_LD_CC_IN_PROGRESS = 0x17,
+	MFI_STAT_LD_INIT_IN_PROGRESS = 0x18,
+	MFI_STAT_LD_LBA_OUT_OF_RANGE = 0x19,
+	MFI_STAT_LD_MAX_CONFIGURED = 0x1a,
+	MFI_STAT_LD_NOT_OPTIMAL = 0x1b,
+	MFI_STAT_LD_RBLD_IN_PROGRESS = 0x1c,
+	MFI_STAT_LD_RECON_IN_PROGRESS = 0x1d,
+	MFI_STAT_LD_WRONG_RAID_LEVEL = 0x1e,
+	MFI_STAT_MAX_SPARES_EXCEEDED = 0x1f,
+	MFI_STAT_MEMORY_NOT_AVAILABLE = 0x20,
+	MFI_STAT_MFC_HW_ERROR = 0x21,
+	MFI_STAT_NO_HW_PRESENT = 0x22,
+	MFI_STAT_NOT_FOUND = 0x23,
+	MFI_STAT_NOT_IN_ENCL = 0x24,
+	MFI_STAT_PD_CLEAR_IN_PROGRESS = 0x25,
+	MFI_STAT_PD_TYPE_WRONG = 0x26,
+	MFI_STAT_PR_DISABLED = 0x27,
+	MFI_STAT_ROW_INDEX_INVALID = 0x28,
+	MFI_STAT_SAS_CONFIG_INVALID_ACTION = 0x29,
+	MFI_STAT_SAS_CONFIG_INVALID_DATA = 0x2a,
+	MFI_STAT_SAS_CONFIG_INVALID_PAGE = 0x2b,
+	MFI_STAT_SAS_CONFIG_INVALID_TYPE = 0x2c,
+	MFI_STAT_SCSI_DONE_WITH_ERROR = 0x2d,
+	MFI_STAT_SCSI_IO_FAILED = 0x2e,
+	MFI_STAT_SCSI_RESERVATION_CONFLICT = 0x2f,
+	MFI_STAT_SHUTDOWN_FAILED = 0x30,
+	MFI_STAT_TIME_NOT_SET = 0x31,
+	MFI_STAT_WRONG_STATE = 0x32,
+	MFI_STAT_LD_OFFLINE = 0x33,
+	MFI_STAT_PEER_NOTIFICATION_REJECTED = 0x34,
+	MFI_STAT_PEER_NOTIFICATION_FAILED = 0x35,
+	MFI_STAT_RESERVATION_IN_PROGRESS = 0x36,
+	MFI_STAT_I2C_ERRORS_DETECTED = 0x37,
+	MFI_STAT_PCI_ERRORS_DETECTED = 0x38,
+	MFI_STAT_CONFIG_SEQ_MISMATCH = 0x67,
+
+	MFI_STAT_INVALID_STATUS = 0xFF
+};
+
+/*
+ * Number of mailbox bytes in DCMD message frame
+ */
+#define	MFI_MBOX_SIZE	12
+
+enum MR_EVT_CLASS {
+
+	MR_EVT_CLASS_DEBUG = -2,
+	MR_EVT_CLASS_PROGRESS = -1,
+	MR_EVT_CLASS_INFO = 0,
+	MR_EVT_CLASS_WARNING = 1,
+	MR_EVT_CLASS_CRITICAL = 2,
+	MR_EVT_CLASS_FATAL = 3,
+	MR_EVT_CLASS_DEAD = 4,
+
+};
+
+enum MR_EVT_LOCALE {
+
+	MR_EVT_LOCALE_LD = 0x0001,
+	MR_EVT_LOCALE_PD = 0x0002,
+	MR_EVT_LOCALE_ENCL = 0x0004,
+	MR_EVT_LOCALE_BBU = 0x0008,
+	MR_EVT_LOCALE_SAS = 0x0010,
+	MR_EVT_LOCALE_CTRL = 0x0020,
+	MR_EVT_LOCALE_CONFIG = 0x0040,
+	MR_EVT_LOCALE_CLUSTER = 0x0080,
+	MR_EVT_LOCALE_ALL = 0xffff,
+
+};
+
+enum MR_EVT_ARGS {
+
+	MR_EVT_ARGS_NONE,
+	MR_EVT_ARGS_CDB_SENSE,
+	MR_EVT_ARGS_LD,
+	MR_EVT_ARGS_LD_COUNT,
+	MR_EVT_ARGS_LD_LBA,
+	MR_EVT_ARGS_LD_OWNER,
+	MR_EVT_ARGS_LD_LBA_PD_LBA,
+	MR_EVT_ARGS_LD_PROG,
+	MR_EVT_ARGS_LD_STATE,
+	MR_EVT_ARGS_LD_STRIP,
+	MR_EVT_ARGS_PD,
+	MR_EVT_ARGS_PD_ERR,
+	MR_EVT_ARGS_PD_LBA,
+	MR_EVT_ARGS_PD_LBA_LD,
+	MR_EVT_ARGS_PD_PROG,
+	MR_EVT_ARGS_PD_STATE,
+	MR_EVT_ARGS_PCI,
+	MR_EVT_ARGS_RATE,
+	MR_EVT_ARGS_STR,
+	MR_EVT_ARGS_TIME,
+	MR_EVT_ARGS_ECC,
+	MR_EVT_ARGS_LD_PROP,
+	MR_EVT_ARGS_PD_SPARE,
+	MR_EVT_ARGS_PD_INDEX,
+	MR_EVT_ARGS_DIAG_PASS,
+	MR_EVT_ARGS_DIAG_FAIL,
+	MR_EVT_ARGS_PD_LBA_LBA,
+	MR_EVT_ARGS_PORT_PHY,
+	MR_EVT_ARGS_PD_MISSING,
+	MR_EVT_ARGS_PD_ADDRESS,
+	MR_EVT_ARGS_BITMAP,
+	MR_EVT_ARGS_CONNECTOR,
+	MR_EVT_ARGS_PD_PD,
+	MR_EVT_ARGS_PD_FRU,
+	MR_EVT_ARGS_PD_PATHINFO,
+	MR_EVT_ARGS_PD_POWER_STATE,
+	MR_EVT_ARGS_GENERIC,
+};
+
+/*
+ * Thunderbolt (and later) Defines
+ */
+#define	MEGASAS_CHAIN_FRAME_SZ_MIN					1024
+#define	MFI_FUSION_ENABLE_INTERRUPT_MASK			(0x00000009)
+#define	MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE		256
+#define	MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST		0xF0
+#define	MRSAS_MPI2_FUNCTION_LD_IO_REQUEST			0xF1
+#define	MRSAS_LOAD_BALANCE_FLAG						0x1
+#define	MRSAS_DCMD_MBOX_PEND_FLAG					0x1
+#define	HOST_DIAG_WRITE_ENABLE						0x80
+#define	HOST_DIAG_RESET_ADAPTER						0x4
+#define	MRSAS_TBOLT_MAX_RESET_TRIES					3
+#define MRSAS_MAX_MFI_CMDS                          16
+#define MRSAS_MAX_IOCTL_CMDS                        3
+
+/*
+ * Invader Defines
+ */
+#define	MPI2_TYPE_CUDA								0x2
+#define	MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH	0x4000
+#define	MR_RL_FLAGS_GRANT_DESTINATION_CPU0			0x00
+#define	MR_RL_FLAGS_GRANT_DESTINATION_CPU1			0x10
+#define	MR_RL_FLAGS_GRANT_DESTINATION_CUDA			0x80
+#define	MR_RL_FLAGS_SEQ_NUM_ENABLE					0x8
+
+/*
+ * T10 PI defines
+ */
+#define	MR_PROT_INFO_TYPE_CONTROLLER				0x8
+#define	MRSAS_SCSI_VARIABLE_LENGTH_CMD				0x7f
+#define	MRSAS_SCSI_SERVICE_ACTION_READ32			0x9
+#define	MRSAS_SCSI_SERVICE_ACTION_WRITE32			0xB
+#define	MRSAS_SCSI_ADDL_CDB_LEN						0x18
+#define	MRSAS_RD_WR_PROTECT_CHECK_ALL				0x20
+#define	MRSAS_RD_WR_PROTECT_CHECK_NONE				0x60
+#define	MRSAS_SCSIBLOCKSIZE							512
+
+/*
+ * Raid context flags
+ */
+#define	MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_SHIFT	0x4
+#define	MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_MASK		0x30
+typedef enum MR_RAID_FLAGS_IO_SUB_TYPE {
+	MR_RAID_FLAGS_IO_SUB_TYPE_NONE = 0,
+	MR_RAID_FLAGS_IO_SUB_TYPE_SYSTEM_PD = 1,
+}	MR_RAID_FLAGS_IO_SUB_TYPE;
+
+/*
+ * Request descriptor types
+ */
+#define	MRSAS_REQ_DESCRIPT_FLAGS_LD_IO		0x7
+#define	MRSAS_REQ_DESCRIPT_FLAGS_MFA		0x1
+#define	MRSAS_REQ_DESCRIPT_FLAGS_NO_LOCK	0x2
+#define	MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT	1
+#define	MRSAS_FP_CMD_LEN					16
+#define	MRSAS_FUSION_IN_RESET				0
+
+#define	RAID_CTX_SPANARM_ARM_SHIFT			(0)
+#define	RAID_CTX_SPANARM_ARM_MASK			(0x1f)
+#define	RAID_CTX_SPANARM_SPAN_SHIFT			(5)
+#define	RAID_CTX_SPANARM_SPAN_MASK			(0xE0)
+
+/*
+ * Define region lock types
+ */
+typedef enum _REGION_TYPE {
+	REGION_TYPE_UNUSED = 0,
+	REGION_TYPE_SHARED_READ = 1,
+	REGION_TYPE_SHARED_WRITE = 2,
+	REGION_TYPE_EXCLUSIVE = 3,
+}	REGION_TYPE;
+
+
+/*
+ * SCSI-CAM Related Defines
+ */
+#define	MRSAS_SCSI_MAX_LUNS				0
+#define	MRSAS_SCSI_INITIATOR_ID			255
+#define	MRSAS_SCSI_MAX_CMDS				8
+#define	MRSAS_SCSI_MAX_CDB_LEN			16
+#define	MRSAS_SCSI_SENSE_BUFFERSIZE		96
+#define	MRSAS_INTERNAL_CMDS				32
+
+#define	MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK	0x400000
+#define	MEGASAS_MAX_CHAIN_SIZE_MASK		0x3E0
+#define	MEGASAS_256K_IO					128
+#define	MEGASAS_1MB_IO					(MEGASAS_256K_IO * 4)
+
+/* Request types */
+#define	MRSAS_REQ_TYPE_INTERNAL_CMD		0x0
+#define	MRSAS_REQ_TYPE_AEN_FETCH		0x1
+#define	MRSAS_REQ_TYPE_PASSTHRU			0x2
+#define	MRSAS_REQ_TYPE_GETSET_PARAM		0x3
+#define	MRSAS_REQ_TYPE_SCSI_IO			0x4
+
+/* Request states */
+#define	MRSAS_REQ_STATE_FREE			0
+#define	MRSAS_REQ_STATE_BUSY			1
+#define	MRSAS_REQ_STATE_TRAN			2
+#define	MRSAS_REQ_STATE_COMPLETE		3
+
+typedef enum _MR_SCSI_CMD_TYPE {
+	READ_WRITE_LDIO = 0,
+	NON_READ_WRITE_LDIO = 1,
+	READ_WRITE_SYSPDIO = 2,
+	NON_READ_WRITE_SYSPDIO = 3,
+}	MR_SCSI_CMD_TYPE;
+
+enum mrsas_req_flags {
+	MRSAS_DIR_UNKNOWN = 0x1,
+	MRSAS_DIR_IN = 0x2,
+	MRSAS_DIR_OUT = 0x4,
+	MRSAS_DIR_NONE = 0x8,
+};
+
+/*
+ * Adapter Reset States
+ */
+enum {
+	MRSAS_HBA_OPERATIONAL = 0,
+	MRSAS_ADPRESET_SM_INFAULT = 1,
+	MRSAS_ADPRESET_SM_FW_RESET_SUCCESS = 2,
+	MRSAS_ADPRESET_SM_OPERATIONAL = 3,
+	MRSAS_HW_CRITICAL_ERROR = 4,
+	MRSAS_ADPRESET_INPROG_SIGN = 0xDEADDEAD,
+};
+
+/*
+ * MPT Command Structure
+ */
+struct mrsas_mpt_cmd {
+	MRSAS_RAID_SCSI_IO_REQUEST *io_request;
+	bus_addr_t io_request_phys_addr;
+	MPI2_SGE_IO_UNION *chain_frame;
+	bus_addr_t chain_frame_phys_addr;
+	u_int32_t sge_count;
+	u_int8_t *sense;
+	bus_addr_t sense_phys_addr;
+	u_int8_t retry_for_fw_reset;
+	MRSAS_REQUEST_DESCRIPTOR_UNION *request_desc;
+	u_int32_t sync_cmd_idx;
+	u_int32_t index;
+	u_int8_t flags;
+	u_int8_t pd_r1_lb;
+	u_int8_t load_balance;
+	bus_size_t length;
+	u_int32_t error_code;
+	bus_dmamap_t data_dmamap;
+	void   *data;
+	union ccb *ccb_ptr;
+	struct callout cm_callout;
+	struct mrsas_softc *sc;
+	boolean_t tmCapable;
+	TAILQ_ENTRY(mrsas_mpt_cmd) next;
+};
+
+/*
+ * MFI Command Structure
+ */
+struct mrsas_mfi_cmd {
+	union mrsas_frame *frame;
+	bus_dmamap_t frame_dmamap;
+	void   *frame_mem;
+	bus_addr_t frame_phys_addr;
+	u_int8_t *sense;
+	bus_dmamap_t sense_dmamap;
+	void   *sense_mem;
+	bus_addr_t sense_phys_addr;
+	u_int32_t index;
+	u_int8_t sync_cmd;
+	u_int8_t cmd_status;
+	u_int8_t abort_aen;
+	u_int8_t retry_for_fw_reset;
+	struct mrsas_softc *sc;
+	union ccb *ccb_ptr;
+	union {
+		struct {
+			u_int16_t smid;
+			u_int16_t resvd;
+		}	context;
+		u_int32_t frame_count;
+	}	cmd_id;
+	TAILQ_ENTRY(mrsas_mfi_cmd) next;
+};
+
+
+/*
+ * define constants for device list query options
+ */
+enum MR_PD_QUERY_TYPE {
+	MR_PD_QUERY_TYPE_ALL = 0,
+	MR_PD_QUERY_TYPE_STATE = 1,
+	MR_PD_QUERY_TYPE_POWER_STATE = 2,
+	MR_PD_QUERY_TYPE_MEDIA_TYPE = 3,
+	MR_PD_QUERY_TYPE_SPEED = 4,
+	MR_PD_QUERY_TYPE_EXPOSED_TO_HOST = 5,
+};
+
+#define	MR_EVT_CFG_CLEARED						0x0004
+#define	MR_EVT_LD_STATE_CHANGE					0x0051
+#define	MR_EVT_PD_INSERTED						0x005b
+#define	MR_EVT_PD_REMOVED						0x0070
+#define	MR_EVT_LD_CREATED						0x008a
+#define	MR_EVT_LD_DELETED						0x008b
+#define	MR_EVT_FOREIGN_CFG_IMPORTED				0x00db
+#define	MR_EVT_LD_OFFLINE						0x00fc
+#define	MR_EVT_CTRL_PROP_CHANGED				0x012f
+#define	MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED		0x0152
+
+enum MR_PD_STATE {
+	MR_PD_STATE_UNCONFIGURED_GOOD = 0x00,
+	MR_PD_STATE_UNCONFIGURED_BAD = 0x01,
+	MR_PD_STATE_HOT_SPARE = 0x02,
+	MR_PD_STATE_OFFLINE = 0x10,
+	MR_PD_STATE_FAILED = 0x11,
+	MR_PD_STATE_REBUILD = 0x14,
+	MR_PD_STATE_ONLINE = 0x18,
+	MR_PD_STATE_COPYBACK = 0x20,
+	MR_PD_STATE_SYSTEM = 0x40,
+};
+
+/*
+ * defines the physical drive address structure
+ */
+#pragma pack(1)
+struct MR_PD_ADDRESS {
+	u_int16_t deviceId;
+	u_int16_t enclDeviceId;
+
+	union {
+		struct {
+			u_int8_t enclIndex;
+			u_int8_t slotNumber;
+		}	mrPdAddress;
+		struct {
+			u_int8_t enclPosition;
+			u_int8_t enclConnectorIndex;
+		}	mrEnclAddress;
+	}	u1;
+	u_int8_t scsiDevType;
+	union {
+		u_int8_t connectedPortBitmap;
+		u_int8_t connectedPortNumbers;
+	}	u2;
+	u_int64_t sasAddr[2];
+};
+
+#pragma pack()
+
+/*
+ * defines the physical drive list structure
+ */
+#pragma pack(1)
+struct MR_PD_LIST {
+	u_int32_t size;
+	u_int32_t count;
+	struct MR_PD_ADDRESS addr[1];
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct mrsas_pd_list {
+	u_int16_t tid;
+	u_int8_t driveType;
+	u_int8_t driveState;
+};
+
+#pragma pack()
+
+/*
+ * defines the logical drive reference structure
+ */
+typedef union _MR_LD_REF {
+	struct {
+		u_int8_t targetId;
+		u_int8_t reserved;
+		u_int16_t seqNum;
+	}	ld_context;
+	u_int32_t ref;
+}	MR_LD_REF;
+
+
+/*
+ * defines the logical drive list structure
+ */
+#pragma pack(1)
+struct MR_LD_LIST {
+	u_int32_t ldCount;
+	u_int32_t reserved;
+	struct {
+		MR_LD_REF ref;
+		u_int8_t state;
+		u_int8_t reserved[3];
+		u_int64_t size;
+	}	ldList[MAX_LOGICAL_DRIVES_EXT];
+};
+
+#pragma pack()
+
+/*
+ * SAS controller properties
+ */
+#pragma pack(1)
+struct mrsas_ctrl_prop {
+	u_int16_t seq_num;
+	u_int16_t pred_fail_poll_interval;
+	u_int16_t intr_throttle_count;
+	u_int16_t intr_throttle_timeouts;
+	u_int8_t rebuild_rate;
+	u_int8_t patrol_read_rate;
+	u_int8_t bgi_rate;
+	u_int8_t cc_rate;
+	u_int8_t recon_rate;
+	u_int8_t cache_flush_interval;
+	u_int8_t spinup_drv_count;
+	u_int8_t spinup_delay;
+	u_int8_t cluster_enable;
+	u_int8_t coercion_mode;
+	u_int8_t alarm_enable;
+	u_int8_t disable_auto_rebuild;
+	u_int8_t disable_battery_warn;
+	u_int8_t ecc_bucket_size;
+	u_int16_t ecc_bucket_leak_rate;
+	u_int8_t restore_hotspare_on_insertion;
+	u_int8_t expose_encl_devices;
+	u_int8_t maintainPdFailHistory;
+	u_int8_t disallowHostRequestReordering;
+	u_int8_t abortCCOnError;
+	u_int8_t loadBalanceMode;
+	u_int8_t disableAutoDetectBackplane;
+	u_int8_t snapVDSpace;
+	/*
+	 * Add properties that can be controlled by a bit in the following
+	 * structure.
+	 */
+	struct {
+		u_int32_t copyBackDisabled:1;
+		u_int32_t SMARTerEnabled:1;
+		u_int32_t prCorrectUnconfiguredAreas:1;
+		u_int32_t useFdeOnly:1;
+		u_int32_t disableNCQ:1;
+		u_int32_t SSDSMARTerEnabled:1;
+		u_int32_t SSDPatrolReadEnabled:1;
+		u_int32_t enableSpinDownUnconfigured:1;
+		u_int32_t autoEnhancedImport:1;
+		u_int32_t enableSecretKeyControl:1;
+		u_int32_t disableOnlineCtrlReset:1;
+		u_int32_t allowBootWithPinnedCache:1;
+		u_int32_t disableSpinDownHS:1;
+		u_int32_t enableJBOD:1;
+		u_int32_t disableCacheBypass:1;
+		u_int32_t useDiskActivityForLocate:1;
+		u_int32_t enablePI:1;
+		u_int32_t preventPIImport:1;
+		u_int32_t useGlobalSparesForEmergency:1;
+		u_int32_t useUnconfGoodForEmergency:1;
+		u_int32_t useEmergencySparesforSMARTer:1;
+		u_int32_t forceSGPIOForQuadOnly:1;
+		u_int32_t enableConfigAutoBalance:1;
+		u_int32_t enableVirtualCache:1;
+		u_int32_t enableAutoLockRecovery:1;
+		u_int32_t disableImmediateIO:1;
+		u_int32_t disableT10RebuildAssist:1;
+		u_int32_t ignore64ldRestriction:1;
+		u_int32_t enableSwZone:1;
+		u_int32_t limitMaxRateSATA3G:1;
+		u_int32_t reserved:2;
+	}	OnOffProperties;
+	u_int8_t autoSnapVDSpace;
+	u_int8_t viewSpace;
+	u_int16_t spinDownTime;
+	u_int8_t reserved[24];
+
+};
+
+#pragma pack()
+
+
+/*
+ * SAS controller information
+ */
+struct mrsas_ctrl_info {
+	/*
+	 * PCI device information
+	 */
+	struct {
+		u_int16_t vendor_id;
+		u_int16_t device_id;
+		u_int16_t sub_vendor_id;
+		u_int16_t sub_device_id;
+		u_int8_t reserved[24];
+	} __packed pci;
+	/*
+	 * Host interface information
+	 */
+	struct {
+		u_int8_t PCIX:1;
+		u_int8_t PCIE:1;
+		u_int8_t iSCSI:1;
+		u_int8_t SAS_3G:1;
+		u_int8_t reserved_0:4;
+		u_int8_t reserved_1[6];
+		u_int8_t port_count;
+		u_int64_t port_addr[8];
+	} __packed host_interface;
+	/*
+	 * Device (backend) interface information
+	 */
+	struct {
+		u_int8_t SPI:1;
+		u_int8_t SAS_3G:1;
+		u_int8_t SATA_1_5G:1;
+		u_int8_t SATA_3G:1;
+		u_int8_t reserved_0:4;
+		u_int8_t reserved_1[6];
+		u_int8_t port_count;
+		u_int64_t port_addr[8];
+	} __packed device_interface;
+
+	u_int32_t image_check_word;
+	u_int32_t image_component_count;
+
+	struct {
+		char	name[8];
+		char	version[32];
+		char	build_date[16];
+		char	built_time[16];
+	} __packed image_component[8];
+
+	u_int32_t pending_image_component_count;
+
+	struct {
+		char	name[8];
+		char	version[32];
+		char	build_date[16];
+		char	build_time[16];
+	} __packed pending_image_component[8];
+
+	u_int8_t max_arms;
+	u_int8_t max_spans;
+	u_int8_t max_arrays;
+	u_int8_t max_lds;
+	char	product_name[80];
+	char	serial_no[32];
+
+	/*
+	 * Other physical/controller/operation information. Indicates the
+	 * presence of the hardware
+	 */
+	struct {
+		u_int32_t bbu:1;
+		u_int32_t alarm:1;
+		u_int32_t nvram:1;
+		u_int32_t uart:1;
+		u_int32_t reserved:28;
+	} __packed hw_present;
+
+	u_int32_t current_fw_time;
+
+	/*
+	 * Maximum data transfer sizes
+	 */
+	u_int16_t max_concurrent_cmds;
+	u_int16_t max_sge_count;
+	u_int32_t max_request_size;
+
+	/*
+	 * Logical and physical device counts
+	 */
+	u_int16_t ld_present_count;
+	u_int16_t ld_degraded_count;
+	u_int16_t ld_offline_count;
+
+	u_int16_t pd_present_count;
+	u_int16_t pd_disk_present_count;
+	u_int16_t pd_disk_pred_failure_count;
+	u_int16_t pd_disk_failed_count;
+
+	/*
+	 * Memory size information
+	 */
+	u_int16_t nvram_size;
+	u_int16_t memory_size;
+	u_int16_t flash_size;
+
+	/*
+	 * Error counters
+	 */
+	u_int16_t mem_correctable_error_count;
+	u_int16_t mem_uncorrectable_error_count;
+
+	/*
+	 * Cluster information
+	 */
+	u_int8_t cluster_permitted;
+	u_int8_t cluster_active;
+
+	/*
+	 * Additional max data transfer sizes
+	 */
+	u_int16_t max_strips_per_io;
+
+	/*
+	 * Controller capabilities structures
+	 */
+	struct {
+		u_int32_t raid_level_0:1;
+		u_int32_t raid_level_1:1;
+		u_int32_t raid_level_5:1;
+		u_int32_t raid_level_1E:1;
+		u_int32_t raid_level_6:1;
+		u_int32_t reserved:27;
+	} __packed raid_levels;
+
+	struct {
+		u_int32_t rbld_rate:1;
+		u_int32_t cc_rate:1;
+		u_int32_t bgi_rate:1;
+		u_int32_t recon_rate:1;
+		u_int32_t patrol_rate:1;
+		u_int32_t alarm_control:1;
+		u_int32_t cluster_supported:1;
+		u_int32_t bbu:1;
+		u_int32_t spanning_allowed:1;
+		u_int32_t dedicated_hotspares:1;
+		u_int32_t revertible_hotspares:1;
+		u_int32_t foreign_config_import:1;
+		u_int32_t self_diagnostic:1;
+		u_int32_t mixed_redundancy_arr:1;
+		u_int32_t global_hot_spares:1;
+		u_int32_t reserved:17;
+	} __packed adapter_operations;
+
+	struct {
+		u_int32_t read_policy:1;
+		u_int32_t write_policy:1;
+		u_int32_t io_policy:1;
+		u_int32_t access_policy:1;
+		u_int32_t disk_cache_policy:1;
+		u_int32_t reserved:27;
+	} __packed ld_operations;
+
+	struct {
+		u_int8_t min;
+		u_int8_t max;
+		u_int8_t reserved[2];
+	} __packed stripe_sz_ops;
+
+	struct {
+		u_int32_t force_online:1;
+		u_int32_t force_offline:1;
+		u_int32_t force_rebuild:1;
+		u_int32_t reserved:29;
+	} __packed pd_operations;
+
+	struct {
+		u_int32_t ctrl_supports_sas:1;
+		u_int32_t ctrl_supports_sata:1;
+		u_int32_t allow_mix_in_encl:1;
+		u_int32_t allow_mix_in_ld:1;
+		u_int32_t allow_sata_in_cluster:1;
+		u_int32_t reserved:27;
+	} __packed pd_mix_support;
+
+	/*
+	 * Define ECC single-bit-error bucket information
+	 */
+	u_int8_t ecc_bucket_count;
+	u_int8_t reserved_2[11];
+
+	/*
+	 * Include the controller properties (changeable items)
+	 */
+	struct mrsas_ctrl_prop properties;
+
+	/*
+	 * Define FW pkg version (set in envt v'bles on OEM basis)
+	 */
+	char	package_version[0x60];
+
+	u_int64_t deviceInterfacePortAddr2[8];
+	u_int8_t reserved3[128];
+
+	struct {
+		u_int16_t minPdRaidLevel_0:4;
+		u_int16_t maxPdRaidLevel_0:12;
+
+		u_int16_t minPdRaidLevel_1:4;
+		u_int16_t maxPdRaidLevel_1:12;
+
+		u_int16_t minPdRaidLevel_5:4;
+		u_int16_t maxPdRaidLevel_5:12;
+
+		u_int16_t minPdRaidLevel_1E:4;
+		u_int16_t maxPdRaidLevel_1E:12;
+
+		u_int16_t minPdRaidLevel_6:4;
+		u_int16_t maxPdRaidLevel_6:12;
+
+		u_int16_t minPdRaidLevel_10:4;
+		u_int16_t maxPdRaidLevel_10:12;
+
+		u_int16_t minPdRaidLevel_50:4;
+		u_int16_t maxPdRaidLevel_50:12;
+
+		u_int16_t minPdRaidLevel_60:4;
+		u_int16_t maxPdRaidLevel_60:12;
+
+		u_int16_t minPdRaidLevel_1E_RLQ0:4;
+		u_int16_t maxPdRaidLevel_1E_RLQ0:12;
+
+		u_int16_t minPdRaidLevel_1E0_RLQ0:4;
+		u_int16_t maxPdRaidLevel_1E0_RLQ0:12;
+
+		u_int16_t reserved[6];
+	}	pdsForRaidLevels;
+
+	u_int16_t maxPds;		/* 0x780 */
+	u_int16_t maxDedHSPs;		/* 0x782 */
+	u_int16_t maxGlobalHSPs;	/* 0x784 */
+	u_int16_t ddfSize;		/* 0x786 */
+	u_int8_t maxLdsPerArray;	/* 0x788 */
+	u_int8_t partitionsInDDF;	/* 0x789 */
+	u_int8_t lockKeyBinding;	/* 0x78a */
+	u_int8_t maxPITsPerLd;		/* 0x78b */
+	u_int8_t maxViewsPerLd;		/* 0x78c */
+	u_int8_t maxTargetId;		/* 0x78d */
+	u_int16_t maxBvlVdSize;		/* 0x78e */
+
+	u_int16_t maxConfigurableSSCSize;	/* 0x790 */
+	u_int16_t currentSSCsize;	/* 0x792 */
+
+	char	expanderFwVersion[12];	/* 0x794 */
+
+	u_int16_t PFKTrialTimeRemaining;/* 0x7A0 */
+
+	u_int16_t cacheMemorySize;	/* 0x7A2 */
+
+	struct {			/* 0x7A4 */
+		u_int32_t supportPIcontroller:1;
+		u_int32_t supportLdPIType1:1;
+		u_int32_t supportLdPIType2:1;
+		u_int32_t supportLdPIType3:1;
+		u_int32_t supportLdBBMInfo:1;
+		u_int32_t supportShieldState:1;
+		u_int32_t blockSSDWriteCacheChange:1;
+		u_int32_t supportSuspendResumeBGops:1;
+		u_int32_t supportEmergencySpares:1;
+		u_int32_t supportSetLinkSpeed:1;
+		u_int32_t supportBootTimePFKChange:1;
+		u_int32_t supportJBOD:1;
+		u_int32_t disableOnlinePFKChange:1;
+		u_int32_t supportPerfTuning:1;
+		u_int32_t supportSSDPatrolRead:1;
+		u_int32_t realTimeScheduler:1;
+
+		u_int32_t supportResetNow:1;
+		u_int32_t supportEmulatedDrives:1;
+		u_int32_t headlessMode:1;
+		u_int32_t dedicatedHotSparesLimited:1;
+
+
+		u_int32_t supportUnevenSpans:1;
+		u_int32_t reserved:11;
+	}	adapterOperations2;
+
+	u_int8_t driverVersion[32];	/* 0x7A8 */
+	u_int8_t maxDAPdCountSpinup60;	/* 0x7C8 */
+	u_int8_t temperatureROC;	/* 0x7C9 */
+	u_int8_t temperatureCtrl;	/* 0x7CA */
+	u_int8_t reserved4;		/* 0x7CB */
+	u_int16_t maxConfigurablePds;	/* 0x7CC */
+
+
+	u_int8_t reserved5[2];		/* 0x7CD reserved */
+
+	struct {
+		u_int32_t peerIsPresent:1;
+		u_int32_t peerIsIncompatible:1;
+
+		u_int32_t hwIncompatible:1;
+		u_int32_t fwVersionMismatch:1;
+		u_int32_t ctrlPropIncompatible:1;
+		u_int32_t premiumFeatureMismatch:1;
+		u_int32_t reserved:26;
+	}	cluster;
+
+	char	clusterId[16];		/* 0x7D4 */
+
+	char	reserved6[4];		/* 0x7E4 RESERVED FOR IOV */
+
+	struct {			/* 0x7E8 */
+		u_int32_t supportPersonalityChange:2;
+		u_int32_t supportThermalPollInterval:1;
+		u_int32_t supportDisableImmediateIO:1;
+		u_int32_t supportT10RebuildAssist:1;
+		u_int32_t supportMaxExtLDs:1;
+		u_int32_t supportCrashDump:1;
+		u_int32_t supportSwZone:1;
+		u_int32_t supportDebugQueue:1;
+		u_int32_t supportNVCacheErase:1;
+		u_int32_t supportForceTo512e:1;
+		u_int32_t supportHOQRebuild:1;
+		u_int32_t supportAllowedOpsforDrvRemoval:1;
+		u_int32_t supportDrvActivityLEDSetting:1;
+		u_int32_t supportNVDRAM:1;
+		u_int32_t supportForceFlash:1;
+		u_int32_t supportDisableSESMonitoring:1;
+		u_int32_t supportCacheBypassModes:1;
+		u_int32_t supportSecurityonJBOD:1;
+		u_int32_t discardCacheDuringLDDelete:1;
+		u_int32_t supportTTYLogCompression:1;
+		u_int32_t supportCPLDUpdate:1;
+		u_int32_t supportDiskCacheSettingForSysPDs:1;
+		u_int32_t supportExtendedSSCSize:1;
+		u_int32_t useSeqNumJbodFP:1;
+		u_int32_t reserved:7;
+	}	adapterOperations3;
+
+	u_int8_t pad[0x800 - 0x7EC];	/* 0x7EC */
+} __packed;
+
+/*
+ * When SCSI mid-layer calls driver's reset routine, driver waits for
+ * MRSAS_RESET_WAIT_TIME seconds for all outstanding IO to complete. Note
+ * that the driver cannot _actually_ abort or reset pending commands. While
+ * it is waiting for the commands to complete, it prints a diagnostic message
+ * every MRSAS_RESET_NOTICE_INTERVAL seconds
+ */
+#define	MRSAS_RESET_WAIT_TIME			180
+#define	MRSAS_INTERNAL_CMD_WAIT_TIME	180
+#define	MRSAS_IOC_INIT_WAIT_TIME		60
+#define	MRSAS_RESET_NOTICE_INTERVAL		5
+#define	MRSAS_IOCTL_CMD					0
+#define	MRSAS_DEFAULT_CMD_TIMEOUT		90
+#define	MRSAS_THROTTLE_QUEUE_DEPTH		16
+
+/*
+ * MSI-x regsiters offset defines
+ */
+#define	MPI2_SUP_REPLY_POST_HOST_INDEX_OFFSET	(0x0000030C)
+#define	MPI2_REPLY_POST_HOST_INDEX_OFFSET		(0x0000006C)
+#define	MR_MAX_REPLY_QUEUES_OFFSET				(0x0000001F)
+#define	MR_MAX_REPLY_QUEUES_EXT_OFFSET			(0x003FC000)
+#define	MR_MAX_REPLY_QUEUES_EXT_OFFSET_SHIFT	14
+#define	MR_MAX_MSIX_REG_ARRAY					16
+
+/*
+ * SYNC CACHE offset define
+ */
+#define MR_CAN_HANDLE_SYNC_CACHE_OFFSET     0X01000000
+
+/*
+ * FW reports the maximum of number of commands that it can accept (maximum
+ * commands that can be outstanding) at any time. The driver must report a
+ * lower number to the mid layer because it can issue a few internal commands
+ * itself (E.g, AEN, abort cmd, IOCTLs etc). The number of commands it needs
+ * is shown below
+ */
+#define	MRSAS_INT_CMDS			32
+#define	MRSAS_SKINNY_INT_CMDS	5
+#define	MRSAS_MAX_MSIX_QUEUES	128
+
+/*
+ * FW can accept both 32 and 64 bit SGLs. We want to allocate 32/64 bit SGLs
+ * based on the size of bus_addr_t
+ */
+#define	IS_DMA64							(sizeof(bus_addr_t) == 8)
+
+#define	MFI_XSCALE_OMR0_CHANGE_INTERRUPT	0x00000001
+#define	MFI_INTR_FLAG_REPLY_MESSAGE			0x00000001
+#define	MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE	0x00000002
+#define	MFI_G2_OUTBOUND_DOORBELL_CHANGE_INTERRUPT	0x00000004
+
+#define	MFI_OB_INTR_STATUS_MASK				0x00000002
+#define	MFI_POLL_TIMEOUT_SECS				60
+
+#define	MFI_REPLY_1078_MESSAGE_INTERRUPT	0x80000000
+#define	MFI_REPLY_GEN2_MESSAGE_INTERRUPT	0x00000001
+#define	MFI_GEN2_ENABLE_INTERRUPT_MASK		0x00000001
+#define	MFI_REPLY_SKINNY_MESSAGE_INTERRUPT	0x40000000
+#define	MFI_SKINNY_ENABLE_INTERRUPT_MASK	(0x00000001)
+#define	MFI_1068_PCSR_OFFSET				0x84
+#define	MFI_1068_FW_HANDSHAKE_OFFSET		0x64
+#define	MFI_1068_FW_READY					0xDDDD0000
+
+typedef union _MFI_CAPABILITIES {
+	struct {
+		u_int32_t support_fp_remote_lun:1;
+		u_int32_t support_additional_msix:1;
+		u_int32_t support_fastpath_wb:1;
+		u_int32_t support_max_255lds:1;
+		u_int32_t support_ndrive_r1_lb:1;
+		u_int32_t support_core_affinity:1;
+		u_int32_t security_protocol_cmds_fw:1;
+		u_int32_t support_ext_queue_depth:1;
+		u_int32_t support_ext_io_size:1;
+		u_int32_t reserved:23;
+	}	mfi_capabilities;
+	u_int32_t reg;
+}	MFI_CAPABILITIES;
+
+#pragma pack(1)
+struct mrsas_sge32 {
+	u_int32_t phys_addr;
+	u_int32_t length;
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct mrsas_sge64 {
+	u_int64_t phys_addr;
+	u_int32_t length;
+};
+
+#pragma pack()
+
+#pragma pack()
+union mrsas_sgl {
+	struct mrsas_sge32 sge32[1];
+	struct mrsas_sge64 sge64[1];
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct mrsas_header {
+	u_int8_t cmd;			/* 00e */
+	u_int8_t sense_len;		/* 01h */
+	u_int8_t cmd_status;		/* 02h */
+	u_int8_t scsi_status;		/* 03h */
+
+	u_int8_t target_id;		/* 04h */
+	u_int8_t lun;			/* 05h */
+	u_int8_t cdb_len;		/* 06h */
+	u_int8_t sge_count;		/* 07h */
+
+	u_int32_t context;		/* 08h */
+	u_int32_t pad_0;		/* 0Ch */
+
+	u_int16_t flags;		/* 10h */
+	u_int16_t timeout;		/* 12h */
+	u_int32_t data_xferlen;		/* 14h */
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct mrsas_init_frame {
+	u_int8_t cmd;			/* 00h */
+	u_int8_t reserved_0;		/* 01h */
+	u_int8_t cmd_status;		/* 02h */
+
+	u_int8_t reserved_1;		/* 03h */
+	MFI_CAPABILITIES driver_operations;	/* 04h */
+	u_int32_t context;		/* 08h */
+	u_int32_t pad_0;		/* 0Ch */
+
+	u_int16_t flags;		/* 10h */
+	u_int16_t reserved_3;		/* 12h */
+	u_int32_t data_xfer_len;	/* 14h */
+
+	u_int32_t queue_info_new_phys_addr_lo;	/* 18h */
+	u_int32_t queue_info_new_phys_addr_hi;	/* 1Ch */
+	u_int32_t queue_info_old_phys_addr_lo;	/* 20h */
+	u_int32_t queue_info_old_phys_addr_hi;	/* 24h */
+	u_int32_t driver_ver_lo;	/* 28h */
+	u_int32_t driver_ver_hi;	/* 2Ch */
+	u_int32_t reserved_4[4];	/* 30h */
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct mrsas_io_frame {
+	u_int8_t cmd;			/* 00h */
+	u_int8_t sense_len;		/* 01h */
+	u_int8_t cmd_status;		/* 02h */
+	u_int8_t scsi_status;		/* 03h */
+
+	u_int8_t target_id;		/* 04h */
+	u_int8_t access_byte;		/* 05h */
+	u_int8_t reserved_0;		/* 06h */
+	u_int8_t sge_count;		/* 07h */
+
+	u_int32_t context;		/* 08h */
+	u_int32_t pad_0;		/* 0Ch */
+
+	u_int16_t flags;		/* 10h */
+	u_int16_t timeout;		/* 12h */
+	u_int32_t lba_count;		/* 14h */
+
+	u_int32_t sense_buf_phys_addr_lo;	/* 18h */
+	u_int32_t sense_buf_phys_addr_hi;	/* 1Ch */
+
+	u_int32_t start_lba_lo;		/* 20h */
+	u_int32_t start_lba_hi;		/* 24h */
+
+	union mrsas_sgl sgl;		/* 28h */
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct mrsas_pthru_frame {
+	u_int8_t cmd;			/* 00h */
+	u_int8_t sense_len;		/* 01h */
+	u_int8_t cmd_status;		/* 02h */
+	u_int8_t scsi_status;		/* 03h */
+
+	u_int8_t target_id;		/* 04h */
+	u_int8_t lun;			/* 05h */
+	u_int8_t cdb_len;		/* 06h */
+	u_int8_t sge_count;		/* 07h */
+
+	u_int32_t context;		/* 08h */
+	u_int32_t pad_0;		/* 0Ch */
+
+	u_int16_t flags;		/* 10h */
+	u_int16_t timeout;		/* 12h */
+	u_int32_t data_xfer_len;	/* 14h */
+
+	u_int32_t sense_buf_phys_addr_lo;	/* 18h */
+	u_int32_t sense_buf_phys_addr_hi;	/* 1Ch */
+
+	u_int8_t cdb[16];		/* 20h */
+	union mrsas_sgl sgl;		/* 30h */
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct mrsas_dcmd_frame {
+	u_int8_t cmd;			/* 00h */
+	u_int8_t reserved_0;		/* 01h */
+	u_int8_t cmd_status;		/* 02h */
+	u_int8_t reserved_1[4];		/* 03h */
+	u_int8_t sge_count;		/* 07h */
+
+	u_int32_t context;		/* 08h */
+	u_int32_t pad_0;		/* 0Ch */
+
+	u_int16_t flags;		/* 10h */
+	u_int16_t timeout;		/* 12h */
+
+	u_int32_t data_xfer_len;	/* 14h */
+	u_int32_t opcode;		/* 18h */
+
+	union {				/* 1Ch */
+		u_int8_t b[12];
+		u_int16_t s[6];
+		u_int32_t w[3];
+	}	mbox;
+
+	union mrsas_sgl sgl;		/* 28h */
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct mrsas_abort_frame {
+	u_int8_t cmd;			/* 00h */
+	u_int8_t reserved_0;		/* 01h */
+	u_int8_t cmd_status;		/* 02h */
+
+	u_int8_t reserved_1;		/* 03h */
+	MFI_CAPABILITIES driver_operations;	/* 04h */
+	u_int32_t context;		/* 08h */
+	u_int32_t pad_0;		/* 0Ch */
+
+	u_int16_t flags;		/* 10h */
+	u_int16_t reserved_3;		/* 12h */
+	u_int32_t reserved_4;		/* 14h */
+
+	u_int32_t abort_context;	/* 18h */
+	u_int32_t pad_1;		/* 1Ch */
+
+	u_int32_t abort_mfi_phys_addr_lo;	/* 20h */
+	u_int32_t abort_mfi_phys_addr_hi;	/* 24h */
+
+	u_int32_t reserved_5[6];	/* 28h */
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct mrsas_smp_frame {
+	u_int8_t cmd;			/* 00h */
+	u_int8_t reserved_1;		/* 01h */
+	u_int8_t cmd_status;		/* 02h */
+	u_int8_t connection_status;	/* 03h */
+
+	u_int8_t reserved_2[3];		/* 04h */
+	u_int8_t sge_count;		/* 07h */
+
+	u_int32_t context;		/* 08h */
+	u_int32_t pad_0;		/* 0Ch */
+
+	u_int16_t flags;		/* 10h */
+	u_int16_t timeout;		/* 12h */
+
+	u_int32_t data_xfer_len;	/* 14h */
+	u_int64_t sas_addr;		/* 18h */
+
+	union {
+		struct mrsas_sge32 sge32[2];	/* [0]: resp [1]: req */
+		struct mrsas_sge64 sge64[2];	/* [0]: resp [1]: req */
+	}	sgl;
+};
+
+#pragma pack()
+
+
+#pragma pack(1)
+struct mrsas_stp_frame {
+	u_int8_t cmd;			/* 00h */
+	u_int8_t reserved_1;		/* 01h */
+	u_int8_t cmd_status;		/* 02h */
+	u_int8_t reserved_2;		/* 03h */
+
+	u_int8_t target_id;		/* 04h */
+	u_int8_t reserved_3[2];		/* 05h */
+	u_int8_t sge_count;		/* 07h */
+
+	u_int32_t context;		/* 08h */
+	u_int32_t pad_0;		/* 0Ch */
+
+	u_int16_t flags;		/* 10h */
+	u_int16_t timeout;		/* 12h */
+
+	u_int32_t data_xfer_len;	/* 14h */
+
+	u_int16_t fis[10];		/* 18h */
+	u_int32_t stp_flags;
+
+	union {
+		struct mrsas_sge32 sge32[2];	/* [0]: resp [1]: data */
+		struct mrsas_sge64 sge64[2];	/* [0]: resp [1]: data */
+	}	sgl;
+};
+
+#pragma pack()
+
+union mrsas_frame {
+	struct mrsas_header hdr;
+	struct mrsas_init_frame init;
+	struct mrsas_io_frame io;
+	struct mrsas_pthru_frame pthru;
+	struct mrsas_dcmd_frame dcmd;
+	struct mrsas_abort_frame abort;
+	struct mrsas_smp_frame smp;
+	struct mrsas_stp_frame stp;
+	u_int8_t raw_bytes[64];
+};
+
+#pragma pack(1)
+union mrsas_evt_class_locale {
+
+	struct {
+		u_int16_t locale;
+		u_int8_t reserved;
+		int8_t	class;
+	} __packed members;
+
+	u_int32_t word;
+
+} __packed;
+
+#pragma pack()
+
+
+#pragma pack(1)
+struct mrsas_evt_log_info {
+	u_int32_t newest_seq_num;
+	u_int32_t oldest_seq_num;
+	u_int32_t clear_seq_num;
+	u_int32_t shutdown_seq_num;
+	u_int32_t boot_seq_num;
+
+} __packed;
+
+#pragma pack()
+
+struct mrsas_progress {
+
+	u_int16_t progress;
+	u_int16_t elapsed_seconds;
+
+} __packed;
+
+struct mrsas_evtarg_ld {
+
+	u_int16_t target_id;
+	u_int8_t ld_index;
+	u_int8_t reserved;
+
+} __packed;
+
+struct mrsas_evtarg_pd {
+	u_int16_t device_id;
+	u_int8_t encl_index;
+	u_int8_t slot_number;
+
+} __packed;
+
+struct mrsas_evt_detail {
+
+	u_int32_t seq_num;
+	u_int32_t time_stamp;
+	u_int32_t code;
+	union mrsas_evt_class_locale cl;
+	u_int8_t arg_type;
+	u_int8_t reserved1[15];
+
+	union {
+		struct {
+			struct mrsas_evtarg_pd pd;
+			u_int8_t cdb_length;
+			u_int8_t sense_length;
+			u_int8_t reserved[2];
+			u_int8_t cdb[16];
+			u_int8_t sense[64];
+		} __packed cdbSense;
+
+		struct mrsas_evtarg_ld ld;
+
+		struct {
+			struct mrsas_evtarg_ld ld;
+			u_int64_t count;
+		} __packed ld_count;
+
+		struct {
+			u_int64_t lba;
+			struct mrsas_evtarg_ld ld;
+		} __packed ld_lba;
+
+		struct {
+			struct mrsas_evtarg_ld ld;
+			u_int32_t prevOwner;
+			u_int32_t newOwner;
+		} __packed ld_owner;
+
+		struct {
+			u_int64_t ld_lba;
+			u_int64_t pd_lba;
+			struct mrsas_evtarg_ld ld;
+			struct mrsas_evtarg_pd pd;
+		} __packed ld_lba_pd_lba;
+
+		struct {
+			struct mrsas_evtarg_ld ld;
+			struct mrsas_progress prog;
+		} __packed ld_prog;
+
+		struct {
+			struct mrsas_evtarg_ld ld;
+			u_int32_t prev_state;
+			u_int32_t new_state;
+		} __packed ld_state;
+
+		struct {
+			u_int64_t strip;
+			struct mrsas_evtarg_ld ld;
+		} __packed ld_strip;
+
+		struct mrsas_evtarg_pd pd;
+
+		struct {
+			struct mrsas_evtarg_pd pd;
+			u_int32_t err;
+		} __packed pd_err;
+
+		struct {
+			u_int64_t lba;
+			struct mrsas_evtarg_pd pd;
+		} __packed pd_lba;
+
+		struct {
+			u_int64_t lba;
+			struct mrsas_evtarg_pd pd;
+			struct mrsas_evtarg_ld ld;
+		} __packed pd_lba_ld;
+
+		struct {
+			struct mrsas_evtarg_pd pd;
+			struct mrsas_progress prog;
+		} __packed pd_prog;
+
+		struct {
+			struct mrsas_evtarg_pd pd;
+			u_int32_t prevState;
+			u_int32_t newState;
+		} __packed pd_state;
+
+		struct {
+			u_int16_t vendorId;
+			u_int16_t deviceId;
+			u_int16_t subVendorId;
+			u_int16_t subDeviceId;
+		} __packed pci;
+
+		u_int32_t rate;
+		char	str[96];
+
+		struct {
+			u_int32_t rtc;
+			u_int32_t elapsedSeconds;
+		} __packed time;
+
+		struct {
+			u_int32_t ecar;
+			u_int32_t elog;
+			char	str[64];
+		} __packed ecc;
+
+		u_int8_t b[96];
+		u_int16_t s[48];
+		u_int32_t w[24];
+		u_int64_t d[12];
+	}	args;
+
+	char	description[128];
+
+} __packed;
+
+struct mrsas_irq_context {
+	struct mrsas_softc *sc;
+	uint32_t MSIxIndex;
+};
+
+enum MEGASAS_OCR_REASON {
+	FW_FAULT_OCR = 0,
+	MFI_DCMD_TIMEOUT_OCR = 1,
+};
+
+/* Controller management info added to support Linux Emulator */
+#define	MAX_MGMT_ADAPTERS               1024
+
+struct mrsas_mgmt_info {
+	u_int16_t count;
+	struct mrsas_softc *sc_ptr[MAX_MGMT_ADAPTERS];
+	int	max_index;
+};
+
+#define	PCI_TYPE0_ADDRESSES             6
+#define	PCI_TYPE1_ADDRESSES             2
+#define	PCI_TYPE2_ADDRESSES             5
+
+typedef struct _MRSAS_DRV_PCI_COMMON_HEADER {
+	u_int16_t vendorID;
+	      //(ro)
+	u_int16_t deviceID;
+	      //(ro)
+	u_int16_t command;
+	      //Device control
+	u_int16_t status;
+	u_int8_t revisionID;
+	      //(ro)
+	u_int8_t progIf;
+	      //(ro)
+	u_int8_t subClass;
+	      //(ro)
+	u_int8_t baseClass;
+	      //(ro)
+	u_int8_t cacheLineSize;
+	      //(ro +)
+	u_int8_t latencyTimer;
+	      //(ro +)
+	u_int8_t headerType;
+	      //(ro)
+	u_int8_t bist;
+	      //Built in self test
+
+	union {
+		struct _MRSAS_DRV_PCI_HEADER_TYPE_0 {
+			u_int32_t baseAddresses[PCI_TYPE0_ADDRESSES];
+			u_int32_t cis;
+			u_int16_t subVendorID;
+			u_int16_t subSystemID;
+			u_int32_t romBaseAddress;
+			u_int8_t capabilitiesPtr;
+			u_int8_t reserved1[3];
+			u_int32_t reserved2;
+			u_int8_t interruptLine;
+			u_int8_t interruptPin;
+			      //(ro)
+			u_int8_t minimumGrant;
+			      //(ro)
+			u_int8_t maximumLatency;
+			      //(ro)
+		}	type0;
+
+		/*
+	         * PCI to PCI Bridge
+	         */
+
+		struct _MRSAS_DRV_PCI_HEADER_TYPE_1 {
+			u_int32_t baseAddresses[PCI_TYPE1_ADDRESSES];
+			u_int8_t primaryBus;
+			u_int8_t secondaryBus;
+			u_int8_t subordinateBus;
+			u_int8_t secondaryLatency;
+			u_int8_t ioBase;
+			u_int8_t ioLimit;
+			u_int16_t secondaryStatus;
+			u_int16_t memoryBase;
+			u_int16_t memoryLimit;
+			u_int16_t prefetchBase;
+			u_int16_t prefetchLimit;
+			u_int32_t prefetchBaseUpper32;
+			u_int32_t prefetchLimitUpper32;
+			u_int16_t ioBaseUpper16;
+			u_int16_t ioLimitUpper16;
+			u_int8_t capabilitiesPtr;
+			u_int8_t reserved1[3];
+			u_int32_t romBaseAddress;
+			u_int8_t interruptLine;
+			u_int8_t interruptPin;
+			u_int16_t bridgeControl;
+		}	type1;
+
+		/*
+	         * PCI to CARDBUS Bridge
+	         */
+
+		struct _MRSAS_DRV_PCI_HEADER_TYPE_2 {
+			u_int32_t socketRegistersBaseAddress;
+			u_int8_t capabilitiesPtr;
+			u_int8_t reserved;
+			u_int16_t secondaryStatus;
+			u_int8_t primaryBus;
+			u_int8_t secondaryBus;
+			u_int8_t subordinateBus;
+			u_int8_t secondaryLatency;
+			struct {
+				u_int32_t base;
+				u_int32_t limit;
+			}	range [PCI_TYPE2_ADDRESSES - 1];
+			u_int8_t interruptLine;
+			u_int8_t interruptPin;
+			u_int16_t bridgeControl;
+		}	type2;
+	}	u;
+
+}	MRSAS_DRV_PCI_COMMON_HEADER, *PMRSAS_DRV_PCI_COMMON_HEADER;
+
+#define	MRSAS_DRV_PCI_COMMON_HEADER_SIZE sizeof(MRSAS_DRV_PCI_COMMON_HEADER)   //64 bytes
+
+typedef struct _MRSAS_DRV_PCI_LINK_CAPABILITY {
+	union {
+		struct {
+			u_int32_t linkSpeed:4;
+			u_int32_t linkWidth:6;
+			u_int32_t aspmSupport:2;
+			u_int32_t losExitLatency:3;
+			u_int32_t l1ExitLatency:3;
+			u_int32_t rsvdp:6;
+			u_int32_t portNumber:8;
+		}	bits;
+
+		u_int32_t asUlong;
+	}	u;
+}	MRSAS_DRV_PCI_LINK_CAPABILITY, *PMRSAS_DRV_PCI_LINK_CAPABILITY;
+
+#define	MRSAS_DRV_PCI_LINK_CAPABILITY_SIZE sizeof(MRSAS_DRV_PCI_LINK_CAPABILITY)
+
+typedef struct _MRSAS_DRV_PCI_LINK_STATUS_CAPABILITY {
+	union {
+		struct {
+			u_int16_t linkSpeed:4;
+			u_int16_t negotiatedLinkWidth:6;
+			u_int16_t linkTrainingError:1;
+			u_int16_t linkTraning:1;
+			u_int16_t slotClockConfig:1;
+			u_int16_t rsvdZ:3;
+		}	bits;
+
+		u_int16_t asUshort;
+	}	u;
+	u_int16_t reserved;
+}	MRSAS_DRV_PCI_LINK_STATUS_CAPABILITY, *PMRSAS_DRV_PCI_LINK_STATUS_CAPABILITY;
+
+#define	MRSAS_DRV_PCI_LINK_STATUS_CAPABILITY_SIZE sizeof(MRSAS_DRV_PCI_LINK_STATUS_CAPABILITY)
+
+
+typedef struct _MRSAS_DRV_PCI_CAPABILITIES {
+	MRSAS_DRV_PCI_LINK_CAPABILITY linkCapability;
+	MRSAS_DRV_PCI_LINK_STATUS_CAPABILITY linkStatusCapability;
+}	MRSAS_DRV_PCI_CAPABILITIES, *PMRSAS_DRV_PCI_CAPABILITIES;
+
+#define	MRSAS_DRV_PCI_CAPABILITIES_SIZE sizeof(MRSAS_DRV_PCI_CAPABILITIES)
+
+/* PCI information */
+typedef struct _MRSAS_DRV_PCI_INFORMATION {
+	u_int32_t busNumber;
+	u_int8_t deviceNumber;
+	u_int8_t functionNumber;
+	u_int8_t interruptVector;
+	u_int8_t reserved1;
+	MRSAS_DRV_PCI_COMMON_HEADER pciHeaderInfo;
+	MRSAS_DRV_PCI_CAPABILITIES capability;
+	u_int32_t domainID;
+	u_int8_t reserved2[28];
+}	MRSAS_DRV_PCI_INFORMATION, *PMRSAS_DRV_PCI_INFORMATION;
+
+/*******************************************************************
+ * per-instance data
+ ********************************************************************/
+struct mrsas_softc {
+	device_t mrsas_dev;
+	struct cdev *mrsas_cdev;
+	struct intr_config_hook mrsas_ich;
+	struct cdev *mrsas_linux_emulator_cdev;
+	uint16_t device_id;
+	struct resource *reg_res;
+	int	reg_res_id;
+	bus_space_tag_t bus_tag;
+	bus_space_handle_t bus_handle;
+	bus_dma_tag_t mrsas_parent_tag;
+	bus_dma_tag_t verbuf_tag;
+	bus_dmamap_t verbuf_dmamap;
+	void   *verbuf_mem;
+	bus_addr_t verbuf_phys_addr;
+	bus_dma_tag_t sense_tag;
+	bus_dmamap_t sense_dmamap;
+	void   *sense_mem;
+	bus_addr_t sense_phys_addr;
+	bus_dma_tag_t io_request_tag;
+	bus_dmamap_t io_request_dmamap;
+	void   *io_request_mem;
+	bus_addr_t io_request_phys_addr;
+	bus_dma_tag_t chain_frame_tag;
+	bus_dmamap_t chain_frame_dmamap;
+	void   *chain_frame_mem;
+	bus_addr_t chain_frame_phys_addr;
+	bus_dma_tag_t reply_desc_tag;
+	bus_dmamap_t reply_desc_dmamap;
+	void   *reply_desc_mem;
+	bus_addr_t reply_desc_phys_addr;
+	bus_dma_tag_t ioc_init_tag;
+	bus_dmamap_t ioc_init_dmamap;
+	void   *ioc_init_mem;
+	bus_addr_t ioc_init_phys_mem;
+	bus_dma_tag_t data_tag;
+	struct cam_sim *sim_0;
+	struct cam_sim *sim_1;
+	struct cam_path *path_0;
+	struct cam_path *path_1;
+	struct mtx sim_lock;
+	struct mtx pci_lock;
+	struct mtx io_lock;
+	struct mtx ioctl_lock;
+	struct mtx mpt_cmd_pool_lock;
+	struct mtx mfi_cmd_pool_lock;
+	struct mtx raidmap_lock;
+	struct mtx aen_lock;
+	struct selinfo mrsas_select;
+	uint32_t mrsas_aen_triggered;
+	uint32_t mrsas_poll_waiting;
+
+	struct sema ioctl_count_sema;
+	uint32_t max_fw_cmds;
+	uint32_t max_num_sge;
+	struct resource *mrsas_irq[MAX_MSIX_COUNT];
+	void   *intr_handle[MAX_MSIX_COUNT];
+	int	irq_id[MAX_MSIX_COUNT];
+	struct mrsas_irq_context irq_context[MAX_MSIX_COUNT];
+	int	msix_vectors;
+	int	msix_enable;
+	uint32_t msix_reg_offset[16];
+	uint8_t	mask_interrupts;
+	uint16_t max_chain_frame_sz;
+	struct mrsas_mpt_cmd **mpt_cmd_list;
+	struct mrsas_mfi_cmd **mfi_cmd_list;
+	TAILQ_HEAD(, mrsas_mpt_cmd) mrsas_mpt_cmd_list_head;
+	TAILQ_HEAD(, mrsas_mfi_cmd) mrsas_mfi_cmd_list_head;
+	bus_addr_t req_frames_desc_phys;
+	u_int8_t *req_frames_desc;
+	u_int8_t *req_desc;
+	bus_addr_t io_request_frames_phys;
+	u_int8_t *io_request_frames;
+	bus_addr_t reply_frames_desc_phys;
+	u_int16_t last_reply_idx[MAX_MSIX_COUNT];
+	u_int32_t reply_q_depth;
+	u_int32_t request_alloc_sz;
+	u_int32_t reply_alloc_sz;
+	u_int32_t io_frames_alloc_sz;
+	u_int32_t chain_frames_alloc_sz;
+	u_int16_t max_sge_in_main_msg;
+	u_int16_t max_sge_in_chain;
+	u_int8_t chain_offset_io_request;
+	u_int8_t chain_offset_mfi_pthru;
+	u_int32_t map_sz;
+	u_int64_t map_id;
+	u_int64_t pd_seq_map_id;
+	struct mrsas_mfi_cmd *map_update_cmd;
+	struct mrsas_mfi_cmd *jbod_seq_cmd;
+	struct mrsas_mfi_cmd *aen_cmd;
+	u_int8_t fast_path_io;
+	void   *chan;
+	void   *ocr_chan;
+	u_int8_t adprecovery;
+	u_int8_t remove_in_progress;
+	u_int8_t ocr_thread_active;
+	u_int8_t do_timedout_reset;
+	u_int32_t reset_in_progress;
+	u_int32_t reset_count;
+	u_int32_t block_sync_cache;
+	u_int8_t fw_sync_cache_support;
+	mrsas_atomic_t target_reset_outstanding;
+#define MRSAS_MAX_TM_TARGETS (MRSAS_MAX_PD + MRSAS_MAX_LD_IDS)
+    struct mrsas_mpt_cmd *target_reset_pool[MRSAS_MAX_TM_TARGETS];
+
+	bus_dma_tag_t jbodmap_tag[2];
+	bus_dmamap_t jbodmap_dmamap[2];
+	void   *jbodmap_mem[2];
+	bus_addr_t jbodmap_phys_addr[2];
+
+	bus_dma_tag_t raidmap_tag[2];
+	bus_dmamap_t raidmap_dmamap[2];
+	void   *raidmap_mem[2];
+	bus_addr_t raidmap_phys_addr[2];
+	bus_dma_tag_t mficmd_frame_tag;
+	bus_dma_tag_t mficmd_sense_tag;
+	bus_dma_tag_t evt_detail_tag;
+	bus_dmamap_t evt_detail_dmamap;
+	struct mrsas_evt_detail *evt_detail_mem;
+	bus_addr_t evt_detail_phys_addr;
+	struct mrsas_ctrl_info *ctrl_info;
+	bus_dma_tag_t ctlr_info_tag;
+	bus_dmamap_t ctlr_info_dmamap;
+	void   *ctlr_info_mem;
+	bus_addr_t ctlr_info_phys_addr;
+	u_int32_t max_sectors_per_req;
+	u_int32_t disableOnlineCtrlReset;
+	mrsas_atomic_t fw_outstanding;
+	u_int32_t mrsas_debug;
+	u_int32_t mrsas_io_timeout;
+	u_int32_t mrsas_fw_fault_check_delay;
+	u_int32_t io_cmds_highwater;
+	u_int8_t UnevenSpanSupport;
+	struct sysctl_ctx_list sysctl_ctx;
+	struct sysctl_oid *sysctl_tree;
+	struct proc *ocr_thread;
+	u_int32_t last_seq_num;
+	bus_dma_tag_t el_info_tag;
+	bus_dmamap_t el_info_dmamap;
+	void   *el_info_mem;
+	bus_addr_t el_info_phys_addr;
+	struct mrsas_pd_list pd_list[MRSAS_MAX_PD];
+	struct mrsas_pd_list local_pd_list[MRSAS_MAX_PD];
+	u_int8_t ld_ids[MRSAS_MAX_LD_IDS];
+	struct taskqueue *ev_tq;
+	struct task ev_task;
+	u_int32_t CurLdCount;
+	u_int64_t reset_flags;
+	int	lb_pending_cmds;
+	LD_LOAD_BALANCE_INFO load_balance_info[MAX_LOGICAL_DRIVES_EXT];
+	LD_SPAN_INFO log_to_span[MAX_LOGICAL_DRIVES_EXT];
+
+	u_int8_t mrsas_gen3_ctrl;
+	u_int8_t secure_jbod_support;
+	u_int8_t use_seqnum_jbod_fp;
+	u_int8_t max256vdSupport;
+	u_int16_t fw_supported_vd_count;
+	u_int16_t fw_supported_pd_count;
+
+	u_int16_t drv_supported_vd_count;
+	u_int16_t drv_supported_pd_count;
+
+	u_int32_t max_map_sz;
+	u_int32_t current_map_sz;
+	u_int32_t old_map_sz;
+	u_int32_t new_map_sz;
+	u_int32_t drv_map_sz;
+
+	/* Non dma-able memory. Driver local copy. */
+	MR_DRV_RAID_MAP_ALL *ld_drv_map[2];
+};
+
+/* Compatibility shims for different OS versions */
+#if __FreeBSD_version >= 800001
+#define	mrsas_kproc_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg) \
+    kproc_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg)
+#define	mrsas_kproc_exit(arg)   kproc_exit(arg)
+#else
+#define	mrsas_kproc_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg) \
+    kthread_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg)
+#define	mrsas_kproc_exit(arg)   kthread_exit(arg)
+#endif
+
+static __inline void
+mrsas_clear_bit(int b, volatile void *p)
+{
+	atomic_clear_int(((volatile int *)p) + (b >> 5), 1 << (b & 0x1f));
+}
+
+static __inline void
+mrsas_set_bit(int b, volatile void *p)
+{
+	atomic_set_int(((volatile int *)p) + (b >> 5), 1 << (b & 0x1f));
+}
+
+static __inline int
+mrsas_test_bit(int b, volatile void *p)
+{
+	return ((volatile int *)p)[b >> 5] & (1 << (b & 0x1f));
+}
+
+#endif					/* MRSAS_H */


Property changes on: trunk/sys/dev/mrsas/mrsas.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/mrsas/mrsas_cam.c
===================================================================
--- trunk/sys/dev/mrsas/mrsas_cam.c	                        (rev 0)
+++ trunk/sys/dev/mrsas/mrsas_cam.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,1687 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2015, AVAGO Tech. All rights reserved. Author: Marian Choy
+ * Copyright (c) 2014, LSI Corp. All rights reserved. Author: Marian Choy
+ * Support: freebsdraid at avagotech.com
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer. 2. Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution. 3. Neither the name of the
+ * <ORGANIZATION> nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/mrsas/mrsas_cam.c 315813 2017-03-23 06:41:13Z mav $");
+
+#include "dev/mrsas/mrsas.h"
+
+#include <cam/cam.h>
+#include <cam/cam_ccb.h>
+#include <cam/cam_sim.h>
+#include <cam/cam_xpt_sim.h>
+#include <cam/cam_debug.h>
+#include <cam/cam_periph.h>
+#include <cam/cam_xpt_periph.h>
+
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_message.h>
+#include <sys/taskqueue.h>
+#include <sys/kernel.h>
+
+
+#include <sys/time.h>			/* XXX for pcpu.h */
+#include <sys/pcpu.h>			/* XXX for PCPU_GET */
+
+#define	smp_processor_id()  PCPU_GET(cpuid)
+
+/*
+ * Function prototypes
+ */
+int	mrsas_cam_attach(struct mrsas_softc *sc);
+int	mrsas_find_io_type(struct cam_sim *sim, union ccb *ccb);
+int	mrsas_bus_scan(struct mrsas_softc *sc);
+int	mrsas_bus_scan_sim(struct mrsas_softc *sc, struct cam_sim *sim);
+int 
+mrsas_map_request(struct mrsas_softc *sc,
+    struct mrsas_mpt_cmd *cmd, union ccb *ccb);
+int
+mrsas_build_ldio_rw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
+    union ccb *ccb);
+int
+mrsas_build_ldio_nonrw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
+    union ccb *ccb);
+int
+mrsas_build_syspdio(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
+    union ccb *ccb, struct cam_sim *sim, u_int8_t fp_possible);
+int
+mrsas_setup_io(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
+    union ccb *ccb, u_int32_t device_id,
+    MRSAS_RAID_SCSI_IO_REQUEST * io_request);
+void	mrsas_xpt_freeze(struct mrsas_softc *sc);
+void	mrsas_xpt_release(struct mrsas_softc *sc);
+void	mrsas_cam_detach(struct mrsas_softc *sc);
+void	mrsas_release_mpt_cmd(struct mrsas_mpt_cmd *cmd);
+void	mrsas_unmap_request(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd);
+void	mrsas_cmd_done(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd);
+void
+mrsas_fire_cmd(struct mrsas_softc *sc, u_int32_t req_desc_lo,
+    u_int32_t req_desc_hi);
+void
+mrsas_set_pd_lba(MRSAS_RAID_SCSI_IO_REQUEST * io_request,
+    u_int8_t cdb_len, struct IO_REQUEST_INFO *io_info, union ccb *ccb,
+    MR_DRV_RAID_MAP_ALL * local_map_ptr, u_int32_t ref_tag,
+    u_int32_t ld_block_size);
+static void mrsas_freeze_simq(struct mrsas_mpt_cmd *cmd, struct cam_sim *sim);
+static void mrsas_cam_poll(struct cam_sim *sim);
+static void mrsas_action(struct cam_sim *sim, union ccb *ccb);
+static void mrsas_scsiio_timeout(void *data);
+static int mrsas_track_scsiio(struct mrsas_softc *sc, target_id_t id, u_int32_t bus_id);
+static void mrsas_tm_response_code(struct mrsas_softc *sc,
+    MPI2_SCSI_TASK_MANAGE_REPLY *mpi_reply);
+static int mrsas_issue_tm(struct mrsas_softc *sc,
+    MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc);
+static void
+mrsas_data_load_cb(void *arg, bus_dma_segment_t *segs,
+    int nseg, int error);
+static int32_t
+mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim,
+    union ccb *ccb);
+struct mrsas_mpt_cmd *mrsas_get_mpt_cmd(struct mrsas_softc *sc);
+MRSAS_REQUEST_DESCRIPTOR_UNION *
+	mrsas_get_request_desc(struct mrsas_softc *sc, u_int16_t index);
+
+extern void
+mrsas_map_mpt_cmd_status(struct mrsas_mpt_cmd *cmd, u_int8_t status,
+    u_int8_t extStatus);
+extern int mrsas_reset_targets(struct mrsas_softc *sc);
+extern u_int16_t MR_TargetIdToLdGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map);
+extern u_int32_t
+MR_LdBlockSizeGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map,
+    struct mrsas_softc *sc);
+extern void mrsas_isr(void *arg);
+extern void mrsas_aen_handler(struct mrsas_softc *sc);
+extern u_int8_t
+MR_BuildRaidContext(struct mrsas_softc *sc,
+    struct IO_REQUEST_INFO *io_info, RAID_CONTEXT * pRAID_Context,
+    MR_DRV_RAID_MAP_ALL * map);
+extern u_int16_t
+MR_LdSpanArrayGet(u_int32_t ld, u_int32_t span,
+    MR_DRV_RAID_MAP_ALL * map);
+extern u_int16_t 
+mrsas_get_updated_dev_handle(struct mrsas_softc *sc,
+    PLD_LOAD_BALANCE_INFO lbInfo, struct IO_REQUEST_INFO *io_info);
+extern u_int8_t
+megasas_get_best_arm(PLD_LOAD_BALANCE_INFO lbInfo, u_int8_t arm,
+    u_int64_t block, u_int32_t count);
+extern int mrsas_complete_cmd(struct mrsas_softc *sc, u_int32_t MSIxIndex);
+extern MR_LD_RAID *MR_LdRaidGet(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map);
+extern void mrsas_disable_intr(struct mrsas_softc *sc);
+extern void mrsas_enable_intr(struct mrsas_softc *sc);
+
+
+/*
+ * mrsas_cam_attach:	Main entry to CAM subsystem
+ * input:				Adapter instance soft state
+ *
+ * This function is called from mrsas_attach() during initialization to perform
+ * SIM allocations and XPT bus registration.  If the kernel version is 7.4 or
+ * earlier, it would also initiate a bus scan.
+ */
+int
+mrsas_cam_attach(struct mrsas_softc *sc)
+{
+	struct cam_devq *devq;
+	int mrsas_cam_depth;
+
+	mrsas_cam_depth = sc->max_fw_cmds - MRSAS_INTERNAL_CMDS;
+
+	if ((devq = cam_simq_alloc(mrsas_cam_depth)) == NULL) {
+		device_printf(sc->mrsas_dev, "Cannot allocate SIM queue\n");
+		return (ENOMEM);
+	}
+	/*
+	 * Create SIM for bus 0 and register, also create path
+	 */
+	sc->sim_0 = cam_sim_alloc(mrsas_action, mrsas_cam_poll, "mrsas", sc,
+	    device_get_unit(sc->mrsas_dev), &sc->sim_lock, mrsas_cam_depth,
+	    mrsas_cam_depth, devq);
+	if (sc->sim_0 == NULL) {
+		cam_simq_free(devq);
+		device_printf(sc->mrsas_dev, "Cannot register SIM\n");
+		return (ENXIO);
+	}
+	/* Initialize taskqueue for Event Handling */
+	TASK_INIT(&sc->ev_task, 0, (void *)mrsas_aen_handler, sc);
+	sc->ev_tq = taskqueue_create("mrsas_taskq", M_NOWAIT | M_ZERO,
+	    taskqueue_thread_enqueue, &sc->ev_tq);
+
+	/* Run the task queue with lowest priority */
+	taskqueue_start_threads(&sc->ev_tq, 1, 255, "%s taskq",
+	    device_get_nameunit(sc->mrsas_dev));
+	mtx_lock(&sc->sim_lock);
+	if (xpt_bus_register(sc->sim_0, sc->mrsas_dev, 0) != CAM_SUCCESS) {
+		cam_sim_free(sc->sim_0, TRUE);	/* passing true frees the devq */
+		mtx_unlock(&sc->sim_lock);
+		return (ENXIO);
+	}
+	if (xpt_create_path(&sc->path_0, NULL, cam_sim_path(sc->sim_0),
+	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+		xpt_bus_deregister(cam_sim_path(sc->sim_0));
+		cam_sim_free(sc->sim_0, TRUE);	/* passing true will free the
+						 * devq */
+		mtx_unlock(&sc->sim_lock);
+		return (ENXIO);
+	}
+	mtx_unlock(&sc->sim_lock);
+
+	/*
+	 * Create SIM for bus 1 and register, also create path
+	 */
+	sc->sim_1 = cam_sim_alloc(mrsas_action, mrsas_cam_poll, "mrsas", sc,
+	    device_get_unit(sc->mrsas_dev), &sc->sim_lock, mrsas_cam_depth,
+	    mrsas_cam_depth, devq);
+	if (sc->sim_1 == NULL) {
+		cam_simq_free(devq);
+		device_printf(sc->mrsas_dev, "Cannot register SIM\n");
+		return (ENXIO);
+	}
+	mtx_lock(&sc->sim_lock);
+	if (xpt_bus_register(sc->sim_1, sc->mrsas_dev, 1) != CAM_SUCCESS) {
+		cam_sim_free(sc->sim_1, TRUE);	/* passing true frees the devq */
+		mtx_unlock(&sc->sim_lock);
+		return (ENXIO);
+	}
+	if (xpt_create_path(&sc->path_1, NULL, cam_sim_path(sc->sim_1),
+	    CAM_TARGET_WILDCARD,
+	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+		xpt_bus_deregister(cam_sim_path(sc->sim_1));
+		cam_sim_free(sc->sim_1, TRUE);
+		mtx_unlock(&sc->sim_lock);
+		return (ENXIO);
+	}
+	mtx_unlock(&sc->sim_lock);
+
+#if (__FreeBSD_version <= 704000)
+	if (mrsas_bus_scan(sc)) {
+		device_printf(sc->mrsas_dev, "Error in bus scan.\n");
+		return (1);
+	}
+#endif
+	return (0);
+}
+
+/*
+ * mrsas_cam_detach:	De-allocates and teardown CAM
+ * input:				Adapter instance soft state
+ *
+ * De-registers and frees the paths and SIMs.
+ */
+void
+mrsas_cam_detach(struct mrsas_softc *sc)
+{
+	if (sc->ev_tq != NULL)
+		taskqueue_free(sc->ev_tq);
+	mtx_lock(&sc->sim_lock);
+	if (sc->path_0)
+		xpt_free_path(sc->path_0);
+	if (sc->sim_0) {
+		xpt_bus_deregister(cam_sim_path(sc->sim_0));
+		cam_sim_free(sc->sim_0, FALSE);
+	}
+	if (sc->path_1)
+		xpt_free_path(sc->path_1);
+	if (sc->sim_1) {
+		xpt_bus_deregister(cam_sim_path(sc->sim_1));
+		cam_sim_free(sc->sim_1, TRUE);
+	}
+	mtx_unlock(&sc->sim_lock);
+}
+
+/*
+ * mrsas_action:	SIM callback entry point
+ * input:			pointer to SIM pointer to CAM Control Block
+ *
+ * This function processes CAM subsystem requests. The type of request is stored
+ * in ccb->ccb_h.func_code.  The preprocessor #ifdef is necessary because
+ * ccb->cpi.maxio is not supported for FreeBSD version 7.4 or earlier.
+ */
+static void
+mrsas_action(struct cam_sim *sim, union ccb *ccb)
+{
+	struct mrsas_softc *sc = (struct mrsas_softc *)cam_sim_softc(sim);
+	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
+	u_int32_t device_id;
+
+	/*
+     * Check if the system going down
+     * or the adapter is in unrecoverable critical error
+     */
+    if (sc->remove_in_progress ||
+        (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR)) {
+        ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
+        xpt_done(ccb);
+        return;
+    }
+
+	switch (ccb->ccb_h.func_code) {
+	case XPT_SCSI_IO:
+		{
+			device_id = ccb_h->target_id;
+
+			/*
+			 * bus 0 is LD, bus 1 is for system-PD
+			 */
+			if (cam_sim_bus(sim) == 1 &&
+			    sc->pd_list[device_id].driveState != MR_PD_STATE_SYSTEM) {
+				ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
+				xpt_done(ccb);
+			} else {
+				if (mrsas_startio(sc, sim, ccb)) {
+					ccb->ccb_h.status |= CAM_REQ_INVALID;
+					xpt_done(ccb);
+				}
+			}
+			break;
+		}
+	case XPT_ABORT:
+		{
+			ccb->ccb_h.status = CAM_UA_ABORT;
+			xpt_done(ccb);
+			break;
+		}
+	case XPT_RESET_BUS:
+		{
+			xpt_done(ccb);
+			break;
+		}
+	case XPT_GET_TRAN_SETTINGS:
+		{
+			ccb->cts.protocol = PROTO_SCSI;
+			ccb->cts.protocol_version = SCSI_REV_2;
+			ccb->cts.transport = XPORT_SPI;
+			ccb->cts.transport_version = 2;
+			ccb->cts.xport_specific.spi.valid = CTS_SPI_VALID_DISC;
+			ccb->cts.xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
+			ccb->cts.proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
+			ccb->cts.proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
+			ccb->ccb_h.status = CAM_REQ_CMP;
+			xpt_done(ccb);
+			break;
+		}
+	case XPT_SET_TRAN_SETTINGS:
+		{
+			ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
+			xpt_done(ccb);
+			break;
+		}
+	case XPT_CALC_GEOMETRY:
+		{
+			cam_calc_geometry(&ccb->ccg, 1);
+			xpt_done(ccb);
+			break;
+		}
+	case XPT_PATH_INQ:
+		{
+			ccb->cpi.version_num = 1;
+			ccb->cpi.hba_inquiry = 0;
+			ccb->cpi.target_sprt = 0;
+#if (__FreeBSD_version >= 902001)
+			ccb->cpi.hba_misc = PIM_UNMAPPED;
+#else
+			ccb->cpi.hba_misc = 0;
+#endif
+			ccb->cpi.hba_eng_cnt = 0;
+			ccb->cpi.max_lun = MRSAS_SCSI_MAX_LUNS;
+			ccb->cpi.unit_number = cam_sim_unit(sim);
+			ccb->cpi.bus_id = cam_sim_bus(sim);
+			ccb->cpi.initiator_id = MRSAS_SCSI_INITIATOR_ID;
+			ccb->cpi.base_transfer_speed = 150000;
+			strlcpy(ccb->cpi.sim_vid, "FreeBSD", SIM_IDLEN);
+			strlcpy(ccb->cpi.hba_vid, "AVAGO", HBA_IDLEN);
+			strlcpy(ccb->cpi.dev_name, cam_sim_name(sim), DEV_IDLEN);
+			ccb->cpi.transport = XPORT_SPI;
+			ccb->cpi.transport_version = 2;
+			ccb->cpi.protocol = PROTO_SCSI;
+			ccb->cpi.protocol_version = SCSI_REV_2;
+			if (ccb->cpi.bus_id == 0)
+				ccb->cpi.max_target = MRSAS_MAX_PD - 1;
+			else
+				ccb->cpi.max_target = MRSAS_MAX_LD_IDS - 1;
+#if (__FreeBSD_version > 704000)
+			ccb->cpi.maxio = sc->max_num_sge * MRSAS_PAGE_SIZE;
+#endif
+			ccb->ccb_h.status = CAM_REQ_CMP;
+			xpt_done(ccb);
+			break;
+		}
+	default:
+		{
+			ccb->ccb_h.status = CAM_REQ_INVALID;
+			xpt_done(ccb);
+			break;
+		}
+	}
+}
+
+/*
+ * mrsas_scsiio_timeout:	Callback function for IO timed out
+ * input:					mpt command context
+ *
+ * This function will execute after timeout value provided by ccb header from
+ * CAM layer, if timer expires. Driver will run timer for all DCDM and LDIO
+ * comming from CAM layer. This function is callback function for IO timeout
+ * and it runs in no-sleep context. Set do_timedout_reset in Adapter context
+ * so that it will execute OCR/Kill adpter from ocr_thread context.
+ */
+static void
+mrsas_scsiio_timeout(void *data)
+{
+	struct mrsas_mpt_cmd *cmd;
+	struct mrsas_softc *sc;
+	u_int32_t target_id;
+
+	if (!data)
+		return;
+
+	cmd = (struct mrsas_mpt_cmd *)data;
+	sc = cmd->sc;
+
+	if (cmd->ccb_ptr == NULL) {
+		printf("command timeout with NULL ccb\n");
+		return;
+	}
+
+	/*
+	 * Below callout is dummy entry so that it will be cancelled from
+	 * mrsas_cmd_done(). Now Controller will go to OCR/Kill Adapter based
+	 * on OCR enable/disable property of Controller from ocr_thread
+	 * context.
+	 */
+#if (__FreeBSD_version >= 1000510)
+	callout_reset_sbt(&cmd->cm_callout, SBT_1S * 180, 0,
+	    mrsas_scsiio_timeout, cmd, 0);
+#else
+	callout_reset(&cmd->cm_callout, (180000 * hz) / 1000,
+	    mrsas_scsiio_timeout, cmd);
+#endif
+
+	if (cmd->ccb_ptr->cpi.bus_id == 0)
+		target_id = cmd->ccb_ptr->ccb_h.target_id;
+	else
+		target_id = (cmd->ccb_ptr->ccb_h.target_id + (MRSAS_MAX_PD - 1));
+
+	/* Save the cmd to be processed for TM, if it is not there in the array */
+	if (sc->target_reset_pool[target_id] == NULL) {
+		sc->target_reset_pool[target_id] = cmd;
+		mrsas_atomic_inc(&sc->target_reset_outstanding);
+	}
+
+	return;
+}
+
+/*
+ * mrsas_startio:	SCSI IO entry point
+ * input:			Adapter instance soft state
+ * 					pointer to CAM Control Block
+ *
+ * This function is the SCSI IO entry point and it initiates IO processing. It
+ * copies the IO and depending if the IO is read/write or inquiry, it would
+ * call mrsas_build_ldio() or mrsas_build_dcdb(), respectively.  It returns 0
+ * if the command is sent to firmware successfully, otherwise it returns 1.
+ */
+static int32_t
+mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim,
+    union ccb *ccb)
+{
+	struct mrsas_mpt_cmd *cmd;
+	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
+	struct ccb_scsiio *csio = &(ccb->csio);
+	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc;
+	u_int8_t cmd_type;
+
+	if ((csio->cdb_io.cdb_bytes[0]) == SYNCHRONIZE_CACHE &&
+		(!sc->fw_sync_cache_support)) {
+		ccb->ccb_h.status = CAM_REQ_CMP;
+		xpt_done(ccb);
+		return (0);
+	}
+	ccb_h->status |= CAM_SIM_QUEUED;
+	cmd = mrsas_get_mpt_cmd(sc);
+
+	if (!cmd) {
+		ccb_h->status |= CAM_REQUEUE_REQ;
+		xpt_done(ccb);
+		return (0);
+	}
+	if ((ccb_h->flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
+		if (ccb_h->flags & CAM_DIR_IN)
+			cmd->flags |= MRSAS_DIR_IN;
+		if (ccb_h->flags & CAM_DIR_OUT)
+			cmd->flags |= MRSAS_DIR_OUT;
+	} else
+		cmd->flags = MRSAS_DIR_NONE;	/* no data */
+
+/* For FreeBSD 9.2 and higher */
+#if (__FreeBSD_version >= 902001)
+	/*
+	 * XXX We don't yet support physical addresses here.
+	 */
+	switch ((ccb->ccb_h.flags & CAM_DATA_MASK)) {
+	case CAM_DATA_PADDR:
+	case CAM_DATA_SG_PADDR:
+		device_printf(sc->mrsas_dev, "%s: physical addresses not supported\n",
+		    __func__);
+		mrsas_release_mpt_cmd(cmd);
+		ccb_h->status = CAM_REQ_INVALID;
+		ccb_h->status &= ~CAM_SIM_QUEUED;
+		goto done;
+	case CAM_DATA_SG:
+		device_printf(sc->mrsas_dev, "%s: scatter gather is not supported\n",
+		    __func__);
+		mrsas_release_mpt_cmd(cmd);
+		ccb_h->status = CAM_REQ_INVALID;
+		goto done;
+	case CAM_DATA_VADDR:
+		if (csio->dxfer_len > (sc->max_num_sge * MRSAS_PAGE_SIZE)) {
+			mrsas_release_mpt_cmd(cmd);
+			ccb_h->status = CAM_REQ_TOO_BIG;
+			goto done;
+		}
+		cmd->length = csio->dxfer_len;
+		if (cmd->length)
+			cmd->data = csio->data_ptr;
+		break;
+	case CAM_DATA_BIO:
+		if (csio->dxfer_len > (sc->max_num_sge * MRSAS_PAGE_SIZE)) {
+			mrsas_release_mpt_cmd(cmd);
+			ccb_h->status = CAM_REQ_TOO_BIG;
+			goto done;
+		}
+		cmd->length = csio->dxfer_len;
+		if (cmd->length)
+			cmd->data = csio->data_ptr;
+		break;
+	default:
+		ccb->ccb_h.status = CAM_REQ_INVALID;
+		goto done;
+	}
+#else
+	if (!(ccb_h->flags & CAM_DATA_PHYS)) {	/* Virtual data address */
+		if (!(ccb_h->flags & CAM_SCATTER_VALID)) {
+			if (csio->dxfer_len > (sc->max_num_sge * MRSAS_PAGE_SIZE)) {
+				mrsas_release_mpt_cmd(cmd);
+				ccb_h->status = CAM_REQ_TOO_BIG;
+				goto done;
+			}
+			cmd->length = csio->dxfer_len;
+			if (cmd->length)
+				cmd->data = csio->data_ptr;
+		} else {
+			mrsas_release_mpt_cmd(cmd);
+			ccb_h->status = CAM_REQ_INVALID;
+			goto done;
+		}
+	} else {			/* Data addresses are physical. */
+		mrsas_release_mpt_cmd(cmd);
+		ccb_h->status = CAM_REQ_INVALID;
+		ccb_h->status &= ~CAM_SIM_QUEUED;
+		goto done;
+	}
+#endif
+	/* save ccb ptr */
+	cmd->ccb_ptr = ccb;
+
+	req_desc = mrsas_get_request_desc(sc, (cmd->index) - 1);
+	if (!req_desc) {
+		device_printf(sc->mrsas_dev, "Cannot get request_descriptor.\n");
+		return (FAIL);
+	}
+	memset(req_desc, 0, sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION));
+	cmd->request_desc = req_desc;
+
+	if (ccb_h->flags & CAM_CDB_POINTER)
+		bcopy(csio->cdb_io.cdb_ptr, cmd->io_request->CDB.CDB32, csio->cdb_len);
+	else
+		bcopy(csio->cdb_io.cdb_bytes, cmd->io_request->CDB.CDB32, csio->cdb_len);
+	mtx_lock(&sc->raidmap_lock);
+
+	/* Check for IO type READ-WRITE targeted for Logical Volume */
+	cmd_type = mrsas_find_io_type(sim, ccb);
+	switch (cmd_type) {
+	case READ_WRITE_LDIO:
+		/* Build READ-WRITE IO for Logical Volume  */
+		if (mrsas_build_ldio_rw(sc, cmd, ccb)) {
+			device_printf(sc->mrsas_dev, "Build RW LDIO failed.\n");
+			mtx_unlock(&sc->raidmap_lock);
+			return (1);
+		}
+		break;
+	case NON_READ_WRITE_LDIO:
+		/* Build NON READ-WRITE IO for Logical Volume  */
+		if (mrsas_build_ldio_nonrw(sc, cmd, ccb)) {
+			device_printf(sc->mrsas_dev, "Build NON-RW LDIO failed.\n");
+			mtx_unlock(&sc->raidmap_lock);
+			return (1);
+		}
+		break;
+	case READ_WRITE_SYSPDIO:
+	case NON_READ_WRITE_SYSPDIO:
+		if (sc->secure_jbod_support &&
+		    (cmd_type == NON_READ_WRITE_SYSPDIO)) {
+			/* Build NON-RW IO for JBOD */
+			if (mrsas_build_syspdio(sc, cmd, ccb, sim, 0)) {
+				device_printf(sc->mrsas_dev,
+				    "Build SYSPDIO failed.\n");
+				mtx_unlock(&sc->raidmap_lock);
+				return (1);
+			}
+		} else {
+			/* Build RW IO for JBOD */
+			if (mrsas_build_syspdio(sc, cmd, ccb, sim, 1)) {
+				device_printf(sc->mrsas_dev,
+				    "Build SYSPDIO failed.\n");
+				mtx_unlock(&sc->raidmap_lock);
+				return (1);
+			}
+		}
+	}
+	mtx_unlock(&sc->raidmap_lock);
+
+	if (cmd->flags == MRSAS_DIR_IN)	/* from device */
+		cmd->io_request->Control |= MPI2_SCSIIO_CONTROL_READ;
+	else if (cmd->flags == MRSAS_DIR_OUT)	/* to device */
+		cmd->io_request->Control |= MPI2_SCSIIO_CONTROL_WRITE;
+
+	cmd->io_request->SGLFlags = MPI2_SGE_FLAGS_64_BIT_ADDRESSING;
+	cmd->io_request->SGLOffset0 = offsetof(MRSAS_RAID_SCSI_IO_REQUEST, SGL) / 4;
+	cmd->io_request->SenseBufferLowAddress = cmd->sense_phys_addr;
+	cmd->io_request->SenseBufferLength = MRSAS_SCSI_SENSE_BUFFERSIZE;
+
+	req_desc = cmd->request_desc;
+	req_desc->SCSIIO.SMID = cmd->index;
+
+	/*
+	 * Start timer for IO timeout. Default timeout value is 90 second.
+	 */
+#if (__FreeBSD_version >= 1000510)
+	callout_reset_sbt(&cmd->cm_callout, SBT_1S * 180, 0,
+	    mrsas_scsiio_timeout, cmd, 0);
+#else
+	callout_reset(&cmd->cm_callout, (180000 * hz) / 1000,
+	    mrsas_scsiio_timeout, cmd);
+#endif
+	mrsas_atomic_inc(&sc->fw_outstanding);
+
+	if (mrsas_atomic_read(&sc->fw_outstanding) > sc->io_cmds_highwater)
+		sc->io_cmds_highwater++;
+
+	mrsas_fire_cmd(sc, req_desc->addr.u.low, req_desc->addr.u.high);
+	return (0);
+
+done:
+	xpt_done(ccb);
+	return (0);
+}
+
+/*
+ * mrsas_find_io_type:	Determines if IO is read/write or inquiry
+ * input:			pointer to CAM Control Block
+ *
+ * This function determines if the IO is read/write or inquiry.  It returns a 1
+ * if the IO is read/write and 0 if it is inquiry.
+ */
+int 
+mrsas_find_io_type(struct cam_sim *sim, union ccb *ccb)
+{
+	struct ccb_scsiio *csio = &(ccb->csio);
+
+	switch (csio->cdb_io.cdb_bytes[0]) {
+	case READ_10:
+	case WRITE_10:
+	case READ_12:
+	case WRITE_12:
+	case READ_6:
+	case WRITE_6:
+	case READ_16:
+	case WRITE_16:
+		return (cam_sim_bus(sim) ?
+		    READ_WRITE_SYSPDIO : READ_WRITE_LDIO);
+	default:
+		return (cam_sim_bus(sim) ?
+		    NON_READ_WRITE_SYSPDIO : NON_READ_WRITE_LDIO);
+	}
+}
+
+/*
+ * mrsas_get_mpt_cmd:	Get a cmd from free command pool
+ * input:				Adapter instance soft state
+ *
+ * This function removes an MPT command from the command free list and
+ * initializes it.
+ */
+struct mrsas_mpt_cmd *
+mrsas_get_mpt_cmd(struct mrsas_softc *sc)
+{
+	struct mrsas_mpt_cmd *cmd = NULL;
+
+	mtx_lock(&sc->mpt_cmd_pool_lock);
+	if (!TAILQ_EMPTY(&sc->mrsas_mpt_cmd_list_head)) {
+		cmd = TAILQ_FIRST(&sc->mrsas_mpt_cmd_list_head);
+		TAILQ_REMOVE(&sc->mrsas_mpt_cmd_list_head, cmd, next);
+	} else {
+		goto out;
+	}
+
+	memset((uint8_t *)cmd->io_request, 0, MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE);
+	cmd->data = NULL;
+	cmd->length = 0;
+	cmd->flags = 0;
+	cmd->error_code = 0;
+	cmd->load_balance = 0;
+	cmd->ccb_ptr = NULL;
+
+out:
+	mtx_unlock(&sc->mpt_cmd_pool_lock);
+	return cmd;
+}
+
+/*
+ * mrsas_release_mpt_cmd:	Return a cmd to free command pool
+ * input:					Command packet for return to free command pool
+ *
+ * This function returns an MPT command to the free command list.
+ */
+void
+mrsas_release_mpt_cmd(struct mrsas_mpt_cmd *cmd)
+{
+	struct mrsas_softc *sc = cmd->sc;
+
+	mtx_lock(&sc->mpt_cmd_pool_lock);
+	cmd->sync_cmd_idx = (u_int32_t)MRSAS_ULONG_MAX;
+	TAILQ_INSERT_HEAD(&(sc->mrsas_mpt_cmd_list_head), cmd, next);
+	mtx_unlock(&sc->mpt_cmd_pool_lock);
+
+	return;
+}
+
+/*
+ * mrsas_get_request_desc:	Get request descriptor from array
+ * input:					Adapter instance soft state
+ * 							SMID index
+ *
+ * This function returns a pointer to the request descriptor.
+ */
+MRSAS_REQUEST_DESCRIPTOR_UNION *
+mrsas_get_request_desc(struct mrsas_softc *sc, u_int16_t index)
+{
+	u_int8_t *p;
+
+	if (index >= sc->max_fw_cmds) {
+		device_printf(sc->mrsas_dev, "Invalid SMID (0x%x)request for desc\n", index);
+		return NULL;
+	}
+	p = sc->req_desc + sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION) * index;
+
+	return (MRSAS_REQUEST_DESCRIPTOR_UNION *) p;
+}
+
+/*
+ * mrsas_build_ldio_rw:	Builds an LDIO command
+ * input:				Adapter instance soft state
+ * 						Pointer to command packet
+ * 						Pointer to CCB
+ *
+ * This function builds the LDIO command packet.  It returns 0 if the command is
+ * built successfully, otherwise it returns a 1.
+ */
+int
+mrsas_build_ldio_rw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
+    union ccb *ccb)
+{
+	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
+	struct ccb_scsiio *csio = &(ccb->csio);
+	u_int32_t device_id;
+	MRSAS_RAID_SCSI_IO_REQUEST *io_request;
+
+	device_id = ccb_h->target_id;
+
+	io_request = cmd->io_request;
+	io_request->RaidContext.VirtualDiskTgtId = device_id;
+	io_request->RaidContext.status = 0;
+	io_request->RaidContext.exStatus = 0;
+
+	/* just the cdb len, other flags zero, and ORed-in later for FP */
+	io_request->IoFlags = csio->cdb_len;
+
+	if (mrsas_setup_io(sc, cmd, ccb, device_id, io_request) != SUCCESS)
+		device_printf(sc->mrsas_dev, "Build ldio or fpio error\n");
+
+	io_request->DataLength = cmd->length;
+
+	if (mrsas_map_request(sc, cmd, ccb) == SUCCESS) {
+		if (cmd->sge_count > sc->max_num_sge) {
+			device_printf(sc->mrsas_dev, "Error: sge_count (0x%x) exceeds"
+			    "max (0x%x) allowed\n", cmd->sge_count, sc->max_num_sge);
+			return (FAIL);
+		}
+		/*
+		 * numSGE store lower 8 bit of sge_count. numSGEExt store
+		 * higher 8 bit of sge_count
+		 */
+		io_request->RaidContext.numSGE = cmd->sge_count;
+		io_request->RaidContext.numSGEExt = (uint8_t)(cmd->sge_count >> 8);
+
+	} else {
+		device_printf(sc->mrsas_dev, "Data map/load failed.\n");
+		return (FAIL);
+	}
+	return (0);
+}
+
+/*
+ * mrsas_setup_io:	Set up data including Fast Path I/O
+ * input:			Adapter instance soft state
+ * 					Pointer to command packet
+ * 					Pointer to CCB
+ *
+ * This function builds the DCDB inquiry command.  It returns 0 if the command
+ * is built successfully, otherwise it returns a 1.
+ */
+int
+mrsas_setup_io(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
+    union ccb *ccb, u_int32_t device_id,
+    MRSAS_RAID_SCSI_IO_REQUEST * io_request)
+{
+	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
+	struct ccb_scsiio *csio = &(ccb->csio);
+	struct IO_REQUEST_INFO io_info;
+	MR_DRV_RAID_MAP_ALL *map_ptr;
+	MR_LD_RAID *raid;
+	u_int8_t fp_possible;
+	u_int32_t start_lba_hi, start_lba_lo, ld_block_size, ld;
+	u_int32_t datalength = 0;
+
+	start_lba_lo = 0;
+	start_lba_hi = 0;
+	fp_possible = 0;
+
+	/*
+	 * READ_6 (0x08) or WRITE_6 (0x0A) cdb
+	 */
+	if (csio->cdb_len == 6) {
+		datalength = (u_int32_t)csio->cdb_io.cdb_bytes[4];
+		start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[1] << 16) |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 8) |
+		    (u_int32_t)csio->cdb_io.cdb_bytes[3];
+		start_lba_lo &= 0x1FFFFF;
+	}
+	/*
+	 * READ_10 (0x28) or WRITE_6 (0x2A) cdb
+	 */
+	else if (csio->cdb_len == 10) {
+		datalength = (u_int32_t)csio->cdb_io.cdb_bytes[8] |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[7] << 8);
+		start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 24) |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[3] << 16) |
+		    (u_int32_t)csio->cdb_io.cdb_bytes[4] << 8 |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[5]);
+	}
+	/*
+	 * READ_12 (0xA8) or WRITE_12 (0xAA) cdb
+	 */
+	else if (csio->cdb_len == 12) {
+		datalength = (u_int32_t)csio->cdb_io.cdb_bytes[6] << 24 |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[7] << 16) |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[8] << 8) |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[9]);
+		start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 24) |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[3] << 16) |
+		    (u_int32_t)csio->cdb_io.cdb_bytes[4] << 8 |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[5]);
+	}
+	/*
+	 * READ_16 (0x88) or WRITE_16 (0xx8A) cdb
+	 */
+	else if (csio->cdb_len == 16) {
+		datalength = (u_int32_t)csio->cdb_io.cdb_bytes[10] << 24 |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[11] << 16) |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[12] << 8) |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[13]);
+		start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[6] << 24) |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[7] << 16) |
+		    (u_int32_t)csio->cdb_io.cdb_bytes[8] << 8 |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[9]);
+		start_lba_hi = ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 24) |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[3] << 16) |
+		    (u_int32_t)csio->cdb_io.cdb_bytes[4] << 8 |
+		    ((u_int32_t)csio->cdb_io.cdb_bytes[5]);
+	}
+	memset(&io_info, 0, sizeof(struct IO_REQUEST_INFO));
+	io_info.ldStartBlock = ((u_int64_t)start_lba_hi << 32) | start_lba_lo;
+	io_info.numBlocks = datalength;
+	io_info.ldTgtId = device_id;
+
+	switch (ccb_h->flags & CAM_DIR_MASK) {
+	case CAM_DIR_IN:
+		io_info.isRead = 1;
+		break;
+	case CAM_DIR_OUT:
+		io_info.isRead = 0;
+		break;
+	case CAM_DIR_NONE:
+	default:
+		mrsas_dprint(sc, MRSAS_TRACE, "From %s : DMA Flag is %d \n", __func__, ccb_h->flags & CAM_DIR_MASK);
+		break;
+	}
+
+	map_ptr = sc->ld_drv_map[(sc->map_id & 1)];
+	ld_block_size = MR_LdBlockSizeGet(device_id, map_ptr, sc);
+
+	ld = MR_TargetIdToLdGet(device_id, map_ptr);
+	if ((ld >= MAX_LOGICAL_DRIVES_EXT) || (!sc->fast_path_io)) {
+		io_request->RaidContext.regLockFlags = 0;
+		fp_possible = 0;
+	} else {
+		if (MR_BuildRaidContext(sc, &io_info, &io_request->RaidContext, map_ptr))
+			fp_possible = io_info.fpOkForIo;
+	}
+
+	raid = MR_LdRaidGet(ld, map_ptr);
+	/* Store the TM capability value in cmd */
+	cmd->tmCapable = raid->capability.tmCapable;
+
+	cmd->request_desc->SCSIIO.MSIxIndex =
+	    sc->msix_vectors ? smp_processor_id() % sc->msix_vectors : 0;
+
+
+	if (fp_possible) {
+		mrsas_set_pd_lba(io_request, csio->cdb_len, &io_info, ccb, map_ptr,
+		    start_lba_lo, ld_block_size);
+		io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
+		cmd->request_desc->SCSIIO.RequestFlags =
+		    (MPI2_REQ_DESCRIPT_FLAGS_FP_IO <<
+		    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+		if (sc->mrsas_gen3_ctrl) {
+			if (io_request->RaidContext.regLockFlags == REGION_TYPE_UNUSED)
+				cmd->request_desc->SCSIIO.RequestFlags =
+				    (MRSAS_REQ_DESCRIPT_FLAGS_NO_LOCK <<
+				    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+			io_request->RaidContext.Type = MPI2_TYPE_CUDA;
+			io_request->RaidContext.nseg = 0x1;
+			io_request->IoFlags |= MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH;
+			io_request->RaidContext.regLockFlags |=
+			    (MR_RL_FLAGS_GRANT_DESTINATION_CUDA |
+			    MR_RL_FLAGS_SEQ_NUM_ENABLE);
+		}
+		if ((sc->load_balance_info[device_id].loadBalanceFlag) &&
+		    (io_info.isRead)) {
+			io_info.devHandle =
+			    mrsas_get_updated_dev_handle(sc,
+			    &sc->load_balance_info[device_id], &io_info);
+			cmd->load_balance = MRSAS_LOAD_BALANCE_FLAG;
+			cmd->pd_r1_lb = io_info.pd_after_lb;
+		} else
+			cmd->load_balance = 0;
+		cmd->request_desc->SCSIIO.DevHandle = io_info.devHandle;
+		io_request->DevHandle = io_info.devHandle;
+	} else {
+		/* Not FP IO */
+		io_request->RaidContext.timeoutValue = map_ptr->raidMap.fpPdIoTimeoutSec;
+		cmd->request_desc->SCSIIO.RequestFlags =
+		    (MRSAS_REQ_DESCRIPT_FLAGS_LD_IO <<
+		    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+		if (sc->mrsas_gen3_ctrl) {
+			if (io_request->RaidContext.regLockFlags == REGION_TYPE_UNUSED)
+				cmd->request_desc->SCSIIO.RequestFlags =
+				    (MRSAS_REQ_DESCRIPT_FLAGS_NO_LOCK <<
+				    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+			io_request->RaidContext.Type = MPI2_TYPE_CUDA;
+			io_request->RaidContext.regLockFlags |=
+			    (MR_RL_FLAGS_GRANT_DESTINATION_CPU0 |
+			    MR_RL_FLAGS_SEQ_NUM_ENABLE);
+			io_request->RaidContext.nseg = 0x1;
+		}
+		io_request->Function = MRSAS_MPI2_FUNCTION_LD_IO_REQUEST;
+		io_request->DevHandle = device_id;
+	}
+	return (0);
+}
+
+/*
+ * mrsas_build_ldio_nonrw:	Builds an LDIO command
+ * input:				Adapter instance soft state
+ * 						Pointer to command packet
+ * 						Pointer to CCB
+ *
+ * This function builds the LDIO command packet.  It returns 0 if the command is
+ * built successfully, otherwise it returns a 1.
+ */
+int
+mrsas_build_ldio_nonrw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
+    union ccb *ccb)
+{
+	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
+	u_int32_t device_id, ld;
+	MR_DRV_RAID_MAP_ALL *map_ptr;
+	MR_LD_RAID *raid;
+	MRSAS_RAID_SCSI_IO_REQUEST *io_request;
+
+	io_request = cmd->io_request;
+	device_id = ccb_h->target_id;
+
+	map_ptr = sc->ld_drv_map[(sc->map_id & 1)];
+	ld = MR_TargetIdToLdGet(device_id, map_ptr);
+	raid = MR_LdRaidGet(ld, map_ptr);
+	/* Store the TM capability value in cmd */
+	cmd->tmCapable = raid->capability.tmCapable;
+
+	/* FW path for LD Non-RW (SCSI management commands) */
+	io_request->Function = MRSAS_MPI2_FUNCTION_LD_IO_REQUEST;
+	io_request->DevHandle = device_id;
+	cmd->request_desc->SCSIIO.RequestFlags =
+	    (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO <<
+	    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+
+	io_request->RaidContext.VirtualDiskTgtId = device_id;
+	io_request->LUN[1] = ccb_h->target_lun & 0xF;
+	io_request->DataLength = cmd->length;
+
+	if (mrsas_map_request(sc, cmd, ccb) == SUCCESS) {
+		if (cmd->sge_count > sc->max_num_sge) {
+			device_printf(sc->mrsas_dev, "Error: sge_count (0x%x) exceeds"
+			    "max (0x%x) allowed\n", cmd->sge_count, sc->max_num_sge);
+			return (1);
+		}
+		/*
+		 * numSGE store lower 8 bit of sge_count. numSGEExt store
+		 * higher 8 bit of sge_count
+		 */
+		io_request->RaidContext.numSGE = cmd->sge_count;
+		io_request->RaidContext.numSGEExt = (uint8_t)(cmd->sge_count >> 8);
+	} else {
+		device_printf(sc->mrsas_dev, "Data map/load failed.\n");
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * mrsas_build_syspdio:	Builds an DCDB command
+ * input:				Adapter instance soft state
+ * 						Pointer to command packet
+ * 						Pointer to CCB
+ *
+ * This function builds the DCDB inquiry command.  It returns 0 if the command
+ * is built successfully, otherwise it returns a 1.
+ */
+int
+mrsas_build_syspdio(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd,
+    union ccb *ccb, struct cam_sim *sim, u_int8_t fp_possible)
+{
+	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
+	u_int32_t device_id;
+	MR_DRV_RAID_MAP_ALL *local_map_ptr;
+	MRSAS_RAID_SCSI_IO_REQUEST *io_request;
+	struct MR_PD_CFG_SEQ_NUM_SYNC *pd_sync;
+
+	io_request = cmd->io_request;
+	device_id = ccb_h->target_id;
+	local_map_ptr = sc->ld_drv_map[(sc->map_id & 1)];
+	io_request->RaidContext.RAIDFlags = MR_RAID_FLAGS_IO_SUB_TYPE_SYSTEM_PD
+	    << MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_SHIFT;
+	io_request->RaidContext.regLockFlags = 0;
+	io_request->RaidContext.regLockRowLBA = 0;
+	io_request->RaidContext.regLockLength = 0;
+
+	/* If FW supports PD sequence number */
+	if (sc->use_seqnum_jbod_fp &&
+	    sc->pd_list[device_id].driveType == 0x00) {
+		//printf("Using Drv seq num\n");
+		pd_sync = (void *)sc->jbodmap_mem[(sc->pd_seq_map_id - 1) & 1];
+		cmd->tmCapable = pd_sync->seq[device_id].capability.tmCapable;
+		io_request->RaidContext.VirtualDiskTgtId = device_id + 255;
+		io_request->RaidContext.configSeqNum = pd_sync->seq[device_id].seqNum;
+		io_request->DevHandle = pd_sync->seq[device_id].devHandle;
+		io_request->RaidContext.regLockFlags |=
+		    (MR_RL_FLAGS_SEQ_NUM_ENABLE | MR_RL_FLAGS_GRANT_DESTINATION_CUDA);
+		io_request->RaidContext.Type = MPI2_TYPE_CUDA;
+		io_request->RaidContext.nseg = 0x1;
+	} else if (sc->fast_path_io) {
+		//printf("Using LD RAID map\n");
+		io_request->RaidContext.VirtualDiskTgtId = device_id;
+		io_request->RaidContext.configSeqNum = 0;
+		local_map_ptr = sc->ld_drv_map[(sc->map_id & 1)];
+		io_request->DevHandle =
+		    local_map_ptr->raidMap.devHndlInfo[device_id].curDevHdl;
+	} else {
+		//printf("Using FW PATH\n");
+		/* Want to send all IO via FW path */
+		io_request->RaidContext.VirtualDiskTgtId = device_id;
+		io_request->RaidContext.configSeqNum = 0;
+		io_request->DevHandle = 0xFFFF;
+	}
+
+	cmd->request_desc->SCSIIO.DevHandle = io_request->DevHandle;
+	cmd->request_desc->SCSIIO.MSIxIndex =
+	    sc->msix_vectors ? smp_processor_id() % sc->msix_vectors : 0;
+
+	if (!fp_possible) {
+		/* system pd firmware path */
+		io_request->Function = MRSAS_MPI2_FUNCTION_LD_IO_REQUEST;
+		cmd->request_desc->SCSIIO.RequestFlags =
+		    (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO <<
+		    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+		io_request->RaidContext.timeoutValue =
+		    local_map_ptr->raidMap.fpPdIoTimeoutSec;
+		io_request->RaidContext.VirtualDiskTgtId = device_id;
+	} else {
+		/* system pd fast path */
+		io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
+		io_request->RaidContext.timeoutValue = local_map_ptr->raidMap.fpPdIoTimeoutSec;
+
+		/*
+		 * NOTE - For system pd RW cmds only IoFlags will be FAST_PATH
+		 * Because the NON RW cmds will now go via FW Queue
+		 * and not the Exception queue
+		 */
+		io_request->IoFlags |= MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH;
+
+		cmd->request_desc->SCSIIO.RequestFlags =
+		    (MPI2_REQ_DESCRIPT_FLAGS_FP_IO <<
+		    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+	}
+
+	io_request->LUN[1] = ccb_h->target_lun & 0xF;
+	io_request->DataLength = cmd->length;
+
+	if (mrsas_map_request(sc, cmd, ccb) == SUCCESS) {
+		if (cmd->sge_count > sc->max_num_sge) {
+			device_printf(sc->mrsas_dev, "Error: sge_count (0x%x) exceeds"
+			    "max (0x%x) allowed\n", cmd->sge_count, sc->max_num_sge);
+			return (1);
+		}
+		/*
+		 * numSGE store lower 8 bit of sge_count. numSGEExt store
+		 * higher 8 bit of sge_count
+		 */
+		io_request->RaidContext.numSGE = cmd->sge_count;
+		io_request->RaidContext.numSGEExt = (uint8_t)(cmd->sge_count >> 8);
+	} else {
+		device_printf(sc->mrsas_dev, "Data map/load failed.\n");
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * mrsas_map_request:	Map and load data
+ * input:				Adapter instance soft state
+ * 						Pointer to command packet
+ *
+ * For data from OS, map and load the data buffer into bus space.  The SG list
+ * is built in the callback.  If the  bus dmamap load is not successful,
+ * cmd->error_code will contain the  error code and a 1 is returned.
+ */
+int 
+mrsas_map_request(struct mrsas_softc *sc,
+    struct mrsas_mpt_cmd *cmd, union ccb *ccb)
+{
+	u_int32_t retcode = 0;
+	struct cam_sim *sim;
+
+	sim = xpt_path_sim(cmd->ccb_ptr->ccb_h.path);
+
+	if (cmd->data != NULL) {
+		/* Map data buffer into bus space */
+		mtx_lock(&sc->io_lock);
+#if (__FreeBSD_version >= 902001)
+		retcode = bus_dmamap_load_ccb(sc->data_tag, cmd->data_dmamap, ccb,
+		    mrsas_data_load_cb, cmd, 0);
+#else
+		retcode = bus_dmamap_load(sc->data_tag, cmd->data_dmamap, cmd->data,
+		    cmd->length, mrsas_data_load_cb, cmd, BUS_DMA_NOWAIT);
+#endif
+		mtx_unlock(&sc->io_lock);
+		if (retcode)
+			device_printf(sc->mrsas_dev, "bus_dmamap_load(): retcode = %d\n", retcode);
+		if (retcode == EINPROGRESS) {
+			device_printf(sc->mrsas_dev, "request load in progress\n");
+			mrsas_freeze_simq(cmd, sim);
+		}
+	}
+	if (cmd->error_code)
+		return (1);
+	return (retcode);
+}
+
+/*
+ * mrsas_unmap_request:	Unmap and unload data
+ * input:				Adapter instance soft state
+ * 						Pointer to command packet
+ *
+ * This function unmaps and unloads data from OS.
+ */
+void
+mrsas_unmap_request(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd)
+{
+	if (cmd->data != NULL) {
+		if (cmd->flags & MRSAS_DIR_IN)
+			bus_dmamap_sync(sc->data_tag, cmd->data_dmamap, BUS_DMASYNC_POSTREAD);
+		if (cmd->flags & MRSAS_DIR_OUT)
+			bus_dmamap_sync(sc->data_tag, cmd->data_dmamap, BUS_DMASYNC_POSTWRITE);
+		mtx_lock(&sc->io_lock);
+		bus_dmamap_unload(sc->data_tag, cmd->data_dmamap);
+		mtx_unlock(&sc->io_lock);
+	}
+}
+
+/*
+ * mrsas_data_load_cb:	Callback entry point
+ * input:				Pointer to command packet as argument
+ * 						Pointer to segment
+ * 						Number of segments Error
+ *
+ * This is the callback function of the bus dma map load.  It builds the SG
+ * list.
+ */
+static void
+mrsas_data_load_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
+{
+	struct mrsas_mpt_cmd *cmd = (struct mrsas_mpt_cmd *)arg;
+	struct mrsas_softc *sc = cmd->sc;
+	MRSAS_RAID_SCSI_IO_REQUEST *io_request;
+	pMpi25IeeeSgeChain64_t sgl_ptr;
+	int i = 0, sg_processed = 0;
+
+	if (error) {
+		cmd->error_code = error;
+		device_printf(sc->mrsas_dev, "mrsas_data_load_cb: error=%d\n", error);
+		if (error == EFBIG) {
+			cmd->ccb_ptr->ccb_h.status = CAM_REQ_TOO_BIG;
+			return;
+		}
+	}
+	if (cmd->flags & MRSAS_DIR_IN)
+		bus_dmamap_sync(cmd->sc->data_tag, cmd->data_dmamap,
+		    BUS_DMASYNC_PREREAD);
+	if (cmd->flags & MRSAS_DIR_OUT)
+		bus_dmamap_sync(cmd->sc->data_tag, cmd->data_dmamap,
+		    BUS_DMASYNC_PREWRITE);
+	if (nseg > sc->max_num_sge) {
+		device_printf(sc->mrsas_dev, "SGE count is too large or 0.\n");
+		return;
+	}
+	io_request = cmd->io_request;
+	sgl_ptr = (pMpi25IeeeSgeChain64_t)&io_request->SGL;
+
+	if (sc->mrsas_gen3_ctrl) {
+		pMpi25IeeeSgeChain64_t sgl_ptr_end = sgl_ptr;
+
+		sgl_ptr_end += sc->max_sge_in_main_msg - 1;
+		sgl_ptr_end->Flags = 0;
+	}
+	if (nseg != 0) {
+		for (i = 0; i < nseg; i++) {
+			sgl_ptr->Address = segs[i].ds_addr;
+			sgl_ptr->Length = segs[i].ds_len;
+			sgl_ptr->Flags = 0;
+			if (sc->mrsas_gen3_ctrl) {
+				if (i == nseg - 1)
+					sgl_ptr->Flags = IEEE_SGE_FLAGS_END_OF_LIST;
+			}
+			sgl_ptr++;
+			sg_processed = i + 1;
+			if ((sg_processed == (sc->max_sge_in_main_msg - 1)) &&
+			    (nseg > sc->max_sge_in_main_msg)) {
+				pMpi25IeeeSgeChain64_t sg_chain;
+
+				if (sc->mrsas_gen3_ctrl) {
+					if ((cmd->io_request->IoFlags & MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH)
+					    != MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH)
+						cmd->io_request->ChainOffset = sc->chain_offset_io_request;
+					else
+						cmd->io_request->ChainOffset = 0;
+				} else
+					cmd->io_request->ChainOffset = sc->chain_offset_io_request;
+				sg_chain = sgl_ptr;
+				if (sc->mrsas_gen3_ctrl)
+					sg_chain->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT;
+				else
+					sg_chain->Flags = (IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR);
+				sg_chain->Length = (sizeof(MPI2_SGE_IO_UNION) * (nseg - sg_processed));
+				sg_chain->Address = cmd->chain_frame_phys_addr;
+				sgl_ptr = (pMpi25IeeeSgeChain64_t)cmd->chain_frame;
+			}
+		}
+	}
+	cmd->sge_count = nseg;
+}
+
+/*
+ * mrsas_freeze_simq:	Freeze SIM queue
+ * input:				Pointer to command packet
+ * 						Pointer to SIM
+ *
+ * This function freezes the sim queue.
+ */
+static void
+mrsas_freeze_simq(struct mrsas_mpt_cmd *cmd, struct cam_sim *sim)
+{
+	union ccb *ccb = (union ccb *)(cmd->ccb_ptr);
+
+	xpt_freeze_simq(sim, 1);
+	ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
+	ccb->ccb_h.status |= CAM_REQUEUE_REQ;
+}
+
+void
+mrsas_xpt_freeze(struct mrsas_softc *sc)
+{
+	xpt_freeze_simq(sc->sim_0, 1);
+	xpt_freeze_simq(sc->sim_1, 1);
+}
+
+void
+mrsas_xpt_release(struct mrsas_softc *sc)
+{
+	xpt_release_simq(sc->sim_0, 1);
+	xpt_release_simq(sc->sim_1, 1);
+}
+
+/*
+ * mrsas_cmd_done:	Perform remaining command completion
+ * input:			Adapter instance soft state  Pointer to command packet
+ *
+ * This function calls ummap request and releases the MPT command.
+ */
+void
+mrsas_cmd_done(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd)
+{
+	mrsas_unmap_request(sc, cmd);
+	
+	mtx_lock(&sc->sim_lock);
+	callout_stop(&cmd->cm_callout);
+	xpt_done(cmd->ccb_ptr);
+	cmd->ccb_ptr = NULL;
+	mtx_unlock(&sc->sim_lock);
+	mrsas_release_mpt_cmd(cmd);
+}
+
+/*
+ * mrsas_cam_poll:	Polling entry point
+ * input:			Pointer to SIM
+ *
+ * This is currently a stub function.
+ */
+static void
+mrsas_cam_poll(struct cam_sim *sim)
+{
+	int i;
+	struct mrsas_softc *sc = (struct mrsas_softc *)cam_sim_softc(sim);
+
+	if (sc->msix_vectors != 0){
+		for (i=0; i<sc->msix_vectors; i++){
+			mrsas_complete_cmd(sc, i);
+		}
+	} else {
+		mrsas_complete_cmd(sc, 0);
+	}
+}
+
+/*
+ * mrsas_bus_scan:	Perform bus scan
+ * input:			Adapter instance soft state
+ *
+ * This mrsas_bus_scan function is needed for FreeBSD 7.x.  Also, it should not
+ * be called in FreeBSD 8.x and later versions, where the bus scan is
+ * automatic.
+ */
+int
+mrsas_bus_scan(struct mrsas_softc *sc)
+{
+	union ccb *ccb_0;
+	union ccb *ccb_1;
+
+	if ((ccb_0 = xpt_alloc_ccb()) == NULL) {
+		return (ENOMEM);
+	}
+	if ((ccb_1 = xpt_alloc_ccb()) == NULL) {
+		xpt_free_ccb(ccb_0);
+		return (ENOMEM);
+	}
+	mtx_lock(&sc->sim_lock);
+	if (xpt_create_path(&ccb_0->ccb_h.path, xpt_periph, cam_sim_path(sc->sim_0),
+	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+		xpt_free_ccb(ccb_0);
+		xpt_free_ccb(ccb_1);
+		mtx_unlock(&sc->sim_lock);
+		return (EIO);
+	}
+	if (xpt_create_path(&ccb_1->ccb_h.path, xpt_periph, cam_sim_path(sc->sim_1),
+	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+		xpt_free_ccb(ccb_0);
+		xpt_free_ccb(ccb_1);
+		mtx_unlock(&sc->sim_lock);
+		return (EIO);
+	}
+	mtx_unlock(&sc->sim_lock);
+	xpt_rescan(ccb_0);
+	xpt_rescan(ccb_1);
+
+	return (0);
+}
+
+/*
+ * mrsas_bus_scan_sim:	Perform bus scan per SIM
+ * input:				adapter instance soft state
+ *
+ * This function will be called from Event handler on LD creation/deletion,
+ * JBOD on/off.
+ */
+int
+mrsas_bus_scan_sim(struct mrsas_softc *sc, struct cam_sim *sim)
+{
+	union ccb *ccb;
+
+	if ((ccb = xpt_alloc_ccb()) == NULL) {
+		return (ENOMEM);
+	}
+	mtx_lock(&sc->sim_lock);
+	if (xpt_create_path(&ccb->ccb_h.path, xpt_periph, cam_sim_path(sim),
+	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+		xpt_free_ccb(ccb);
+		mtx_unlock(&sc->sim_lock);
+		return (EIO);
+	}
+	mtx_unlock(&sc->sim_lock);
+	xpt_rescan(ccb);
+
+	return (0);
+}
+
+/*
+ * mrsas_track_scsiio:  Track IOs for a given target in the mpt_cmd_list
+ * input:           Adapter instance soft state
+ *                  Target ID of target
+ *                  Bus ID of the target
+ *
+ * This function checks for any pending IO in the whole mpt_cmd_list pool
+ * with the bus_id and target_id passed in arguments. If some IO is found
+ * that means target reset is not successfully completed.
+ *
+ * Returns FAIL if IOs pending to the target device, else return SUCCESS
+ */
+static int
+mrsas_track_scsiio(struct mrsas_softc *sc, target_id_t tgt_id, u_int32_t bus_id)
+{
+	int i;
+	struct mrsas_mpt_cmd *mpt_cmd = NULL;
+
+	for (i = 0 ; i < sc->max_fw_cmds; i++) {
+		mpt_cmd = sc->mpt_cmd_list[i];
+
+	/*
+	 * Check if the target_id and bus_id is same as the timeout IO
+	 */
+	if (mpt_cmd->ccb_ptr) {
+		/* bus_id = 1 denotes a VD */
+		if (bus_id == 1)
+			tgt_id = (mpt_cmd->ccb_ptr->ccb_h.target_id - (MRSAS_MAX_PD - 1));
+
+			if (mpt_cmd->ccb_ptr->cpi.bus_id == bus_id &&
+			    mpt_cmd->ccb_ptr->ccb_h.target_id == tgt_id) {
+				device_printf(sc->mrsas_dev,
+				    "IO commands pending to target id %d\n", tgt_id);
+				return FAIL;
+			}
+		}
+	}
+
+	return SUCCESS;
+}
+
+#if TM_DEBUG
+/*
+ * mrsas_tm_response_code: Prints TM response code received from FW
+ * input:           Adapter instance soft state
+ *                  MPI reply returned from firmware
+ *
+ * Returns nothing.
+ */
+static void
+mrsas_tm_response_code(struct mrsas_softc *sc,
+	MPI2_SCSI_TASK_MANAGE_REPLY *mpi_reply)
+{
+	char *desc;
+
+	switch (mpi_reply->ResponseCode) {
+	case MPI2_SCSITASKMGMT_RSP_TM_COMPLETE:
+		desc = "task management request completed";
+		break;
+	case MPI2_SCSITASKMGMT_RSP_INVALID_FRAME:
+		desc = "invalid frame";
+		break;
+	case MPI2_SCSITASKMGMT_RSP_TM_NOT_SUPPORTED:
+		desc = "task management request not supported";
+		break;
+	case MPI2_SCSITASKMGMT_RSP_TM_FAILED:
+		desc = "task management request failed";
+		break;
+	case MPI2_SCSITASKMGMT_RSP_TM_SUCCEEDED:
+		desc = "task management request succeeded";
+		break;
+	case MPI2_SCSITASKMGMT_RSP_TM_INVALID_LUN:
+		desc = "invalid lun";
+		break;
+	case 0xA:
+		desc = "overlapped tag attempted";
+		break;
+	case MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC:
+		desc = "task queued, however not sent to target";
+		break;
+	default:
+		desc = "unknown";
+		break;
+	}
+	device_printf(sc->mrsas_dev, "response_code(%01x): %s\n",
+	    mpi_reply->ResponseCode, desc);
+	device_printf(sc->mrsas_dev,
+	    "TerminationCount/DevHandle/Function/TaskType/IOCStat/IOCLoginfo\n"
+	    "0x%x/0x%x/0x%x/0x%x/0x%x/0x%x\n",
+	    mpi_reply->TerminationCount, mpi_reply->DevHandle,
+	    mpi_reply->Function, mpi_reply->TaskType,
+	    mpi_reply->IOCStatus, mpi_reply->IOCLogInfo);
+}
+#endif
+
+/*
+ * mrsas_issue_tm:  Fires the TM command to FW and waits for completion
+ * input:           Adapter instance soft state
+ *                  reqest descriptor compiled by mrsas_reset_targets
+ *
+ * Returns FAIL if TM command TIMEDOUT from FW else SUCCESS.
+ */
+static int
+mrsas_issue_tm(struct mrsas_softc *sc,
+	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc)
+{
+	int sleep_stat;
+
+	mrsas_fire_cmd(sc, req_desc->addr.u.low, req_desc->addr.u.high);
+	sleep_stat = msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO, "tm_sleep", 50*hz);
+
+	if (sleep_stat == EWOULDBLOCK) {
+		device_printf(sc->mrsas_dev, "tm cmd TIMEDOUT\n");
+		return FAIL;
+	}
+
+	return SUCCESS;
+}
+
+/*
+ * mrsas_reset_targets : Gathers info to fire a target reset command
+ * input:           Adapter instance soft state
+ *
+ * This function compiles data for a target reset command to be fired to the FW
+ * and then traverse the target_reset_pool to see targets with TIMEDOUT IOs.
+ *
+ * Returns SUCCESS or FAIL
+ */
+int mrsas_reset_targets(struct mrsas_softc *sc)
+{
+	struct mrsas_mpt_cmd *tm_mpt_cmd = NULL;
+	struct mrsas_mpt_cmd *tgt_mpt_cmd = NULL;
+	MR_TASK_MANAGE_REQUEST *mr_request;
+	MPI2_SCSI_TASK_MANAGE_REQUEST *tm_mpi_request;
+	MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc;
+	int retCode = FAIL, count, i, outstanding;
+	u_int32_t MSIxIndex, bus_id;
+	target_id_t tgt_id;
+#if TM_DEBUG
+	MPI2_SCSI_TASK_MANAGE_REPLY *mpi_reply;
+#endif
+
+	outstanding = mrsas_atomic_read(&sc->fw_outstanding);
+
+	if (!outstanding) {
+		device_printf(sc->mrsas_dev, "NO IOs pending...\n");
+		mrsas_atomic_set(&sc->target_reset_outstanding, 0);
+		retCode = SUCCESS;
+		goto return_status;
+	} else if (sc->adprecovery != MRSAS_HBA_OPERATIONAL) {
+		device_printf(sc->mrsas_dev, "Controller is not operational\n");
+		goto return_status;
+	} else {
+		/* Some more error checks will be added in future */
+	}
+
+	/* Get an mpt frame and an index to fire the TM cmd */
+	tm_mpt_cmd = mrsas_get_mpt_cmd(sc);
+	if (!tm_mpt_cmd) {
+		retCode = FAIL;
+		goto return_status;
+	}
+
+	req_desc = mrsas_get_request_desc(sc, (tm_mpt_cmd->index) - 1);
+	if (!req_desc) {
+		device_printf(sc->mrsas_dev, "Cannot get request_descriptor for tm.\n");
+		retCode = FAIL;
+		goto release_mpt;
+	}
+	memset(req_desc, 0, sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION));
+
+	req_desc->HighPriority.SMID = tm_mpt_cmd->index;
+	req_desc->HighPriority.RequestFlags =
+	    (MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY <<
+	    MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+	req_desc->HighPriority.MSIxIndex =  0;
+	req_desc->HighPriority.LMID = 0;
+	req_desc->HighPriority.Reserved1 = 0;
+	tm_mpt_cmd->request_desc = req_desc;
+
+	mr_request = (MR_TASK_MANAGE_REQUEST *) tm_mpt_cmd->io_request;
+	memset(mr_request, 0, sizeof(MR_TASK_MANAGE_REQUEST));
+
+	tm_mpi_request = (MPI2_SCSI_TASK_MANAGE_REQUEST *) &mr_request->TmRequest;
+	tm_mpi_request->Function = MPI2_FUNCTION_SCSI_TASK_MGMT;
+	tm_mpi_request->TaskType = MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET;
+	tm_mpi_request->TaskMID = 0; /* smid task */
+	tm_mpi_request->LUN[1] = 0;
+
+	/* Traverse the tm_mpt pool to get valid entries */
+	for (i = 0 ; i < MRSAS_MAX_TM_TARGETS; i++) {
+		if(!sc->target_reset_pool[i]) {
+			continue;
+		} else {
+			tgt_mpt_cmd = sc->target_reset_pool[i];
+		}
+
+		tgt_id = i;
+
+		/* See if the target is tm capable or NOT */
+		if (!tgt_mpt_cmd->tmCapable) {
+			device_printf(sc->mrsas_dev, "Task management NOT SUPPORTED for "
+			    "CAM target:%d\n", tgt_id);
+
+			retCode = FAIL;
+			goto release_mpt;
+		}
+
+		tm_mpi_request->DevHandle = tgt_mpt_cmd->io_request->DevHandle;
+
+		if (i < (MRSAS_MAX_PD - 1)) {
+			mr_request->uTmReqReply.tmReqFlags.isTMForPD = 1;
+			bus_id = 0;
+		} else {
+			mr_request->uTmReqReply.tmReqFlags.isTMForLD = 1;
+			bus_id = 1;
+		}
+
+		device_printf(sc->mrsas_dev, "TM will be fired for "
+		    "CAM target:%d and bus_id %d\n", tgt_id, bus_id);
+
+		sc->ocr_chan = (void *)&tm_mpt_cmd;
+		retCode = mrsas_issue_tm(sc, req_desc);
+		if (retCode == FAIL)
+			goto release_mpt;
+
+#if TM_DEBUG
+		mpi_reply =
+		    (MPI2_SCSI_TASK_MANAGE_REPLY *) &mr_request->uTmReqReply.TMReply;
+		mrsas_tm_response_code(sc, mpi_reply);
+#endif
+		mrsas_atomic_dec(&sc->target_reset_outstanding);
+		sc->target_reset_pool[i] = NULL;
+
+		/* Check for pending cmds in the mpt_cmd_pool with the tgt_id */
+		mrsas_disable_intr(sc);
+		/* Wait for 1 second to complete parallel ISR calling same
+		 * mrsas_complete_cmd()
+		 */
+		msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO, "mrsas_reset_wakeup",
+		   1 * hz);
+		count = sc->msix_vectors > 0 ? sc->msix_vectors : 1;
+		mtx_unlock(&sc->sim_lock);
+		for (MSIxIndex = 0; MSIxIndex < count; MSIxIndex++)
+		    mrsas_complete_cmd(sc, MSIxIndex);
+		mtx_lock(&sc->sim_lock);
+		retCode = mrsas_track_scsiio(sc, tgt_id, bus_id);
+		mrsas_enable_intr(sc);
+
+		if (retCode == FAIL)
+			goto release_mpt;
+	}
+
+	device_printf(sc->mrsas_dev, "Number of targets outstanding "
+	    "after reset: %d\n", mrsas_atomic_read(&sc->target_reset_outstanding));
+
+release_mpt:
+	mrsas_release_mpt_cmd(tm_mpt_cmd);
+return_status:
+	device_printf(sc->mrsas_dev, "target reset %s!!\n",
+		(retCode == SUCCESS) ? "SUCCESS" : "FAIL");
+
+	return retCode;
+}
+


Property changes on: trunk/sys/dev/mrsas/mrsas_cam.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/mrsas/mrsas_fp.c
===================================================================
--- trunk/sys/dev/mrsas/mrsas_fp.c	                        (rev 0)
+++ trunk/sys/dev/mrsas/mrsas_fp.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,1541 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2015, AVAGO Tech. All rights reserved. Author: Marian Choy
+ * Copyright (c) 2014, LSI Corp. All rights reserved. Author: Marian Choy
+ * Support: freebsdraid at avagotech.com
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer. 2. Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution. 3. Neither the name of the
+ * <ORGANIZATION> nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing
+ * official policies,either expressed or implied, of the FreeBSD Project.
+ *
+ * Send feedback to: <megaraidfbsd at avagotech.com> Mail to: AVAGO TECHNOLOGIES, 1621
+ * Barber Lane, Milpitas, CA 95035 ATTN: MegaRaid FreeBSD
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/mrsas/mrsas_fp.c 310264 2016-12-19 13:14:39Z kadesai $");
+
+#include <dev/mrsas/mrsas.h>
+
+#include <cam/cam.h>
+#include <cam/cam_ccb.h>
+#include <cam/cam_sim.h>
+#include <cam/cam_xpt_sim.h>
+#include <cam/cam_debug.h>
+#include <cam/cam_periph.h>
+#include <cam/cam_xpt_periph.h>
+
+
+/*
+ * Function prototypes
+ */
+u_int8_t MR_ValidateMapInfo(struct mrsas_softc *sc);
+u_int8_t 
+mrsas_get_best_arm_pd(struct mrsas_softc *sc,
+    PLD_LOAD_BALANCE_INFO lbInfo, struct IO_REQUEST_INFO *io_info);
+u_int8_t
+MR_BuildRaidContext(struct mrsas_softc *sc,
+    struct IO_REQUEST_INFO *io_info,
+    RAID_CONTEXT * pRAID_Context, MR_DRV_RAID_MAP_ALL * map);
+u_int8_t
+MR_GetPhyParams(struct mrsas_softc *sc, u_int32_t ld,
+    u_int64_t stripRow, u_int16_t stripRef, struct IO_REQUEST_INFO *io_info,
+    RAID_CONTEXT * pRAID_Context,
+    MR_DRV_RAID_MAP_ALL * map);
+u_int16_t MR_TargetIdToLdGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map);
+u_int32_t MR_LdBlockSizeGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map);
+u_int16_t MR_GetLDTgtId(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map);
+u_int16_t 
+mrsas_get_updated_dev_handle(struct mrsas_softc *sc,
+    PLD_LOAD_BALANCE_INFO lbInfo, struct IO_REQUEST_INFO *io_info);
+u_int32_t mega_mod64(u_int64_t dividend, u_int32_t divisor);
+u_int32_t
+MR_GetSpanBlock(u_int32_t ld, u_int64_t row, u_int64_t *span_blk,
+    MR_DRV_RAID_MAP_ALL * map, int *div_error);
+u_int64_t mega_div64_32(u_int64_t dividend, u_int32_t divisor);
+void 
+mrsas_update_load_balance_params(struct mrsas_softc *sc,
+    MR_DRV_RAID_MAP_ALL * map, PLD_LOAD_BALANCE_INFO lbInfo);
+void
+mrsas_set_pd_lba(MRSAS_RAID_SCSI_IO_REQUEST * io_request,
+    u_int8_t cdb_len, struct IO_REQUEST_INFO *io_info, union ccb *ccb,
+    MR_DRV_RAID_MAP_ALL * local_map_ptr, u_int32_t ref_tag,
+    u_int32_t ld_block_size);
+static u_int16_t
+MR_LdSpanArrayGet(u_int32_t ld, u_int32_t span,
+    MR_DRV_RAID_MAP_ALL * map);
+static u_int16_t MR_PdDevHandleGet(u_int32_t pd, MR_DRV_RAID_MAP_ALL * map);
+static u_int16_t
+MR_ArPdGet(u_int32_t ar, u_int32_t arm,
+    MR_DRV_RAID_MAP_ALL * map);
+static MR_LD_SPAN *
+MR_LdSpanPtrGet(u_int32_t ld, u_int32_t span,
+    MR_DRV_RAID_MAP_ALL * map);
+static u_int8_t
+MR_LdDataArmGet(u_int32_t ld, u_int32_t armIdx,
+    MR_DRV_RAID_MAP_ALL * map);
+static MR_SPAN_BLOCK_INFO *
+MR_LdSpanInfoGet(u_int32_t ld,
+    MR_DRV_RAID_MAP_ALL * map);
+MR_LD_RAID *MR_LdRaidGet(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map);
+void	MR_PopulateDrvRaidMap(struct mrsas_softc *sc);
+
+
+/*
+ * Spanset related function prototypes Added for PRL11 configuration (Uneven
+ * span support)
+ */
+void	mr_update_span_set(MR_DRV_RAID_MAP_ALL * map, PLD_SPAN_INFO ldSpanInfo);
+static u_int8_t
+mr_spanset_get_phy_params(struct mrsas_softc *sc, u_int32_t ld,
+    u_int64_t stripRow, u_int16_t stripRef, struct IO_REQUEST_INFO *io_info,
+    RAID_CONTEXT * pRAID_Context, MR_DRV_RAID_MAP_ALL * map);
+static u_int64_t
+get_row_from_strip(struct mrsas_softc *sc, u_int32_t ld,
+    u_int64_t strip, MR_DRV_RAID_MAP_ALL * map);
+static u_int32_t
+mr_spanset_get_span_block(struct mrsas_softc *sc,
+    u_int32_t ld, u_int64_t row, u_int64_t *span_blk,
+    MR_DRV_RAID_MAP_ALL * map, int *div_error);
+static u_int8_t
+get_arm(struct mrsas_softc *sc, u_int32_t ld, u_int8_t span,
+    u_int64_t stripe, MR_DRV_RAID_MAP_ALL * map);
+
+
+/*
+ * Spanset related defines Added for PRL11 configuration(Uneven span support)
+ */
+#define	SPAN_ROW_SIZE(map, ld, index_) MR_LdSpanPtrGet(ld, index_, map)->spanRowSize
+#define	SPAN_ROW_DATA_SIZE(map_, ld, index_)	\
+	MR_LdSpanPtrGet(ld, index_, map)->spanRowDataSize
+#define	SPAN_INVALID	0xff
+#define	SPAN_DEBUG		0
+
+/*
+ * Related Defines
+ */
+
+typedef u_int64_t REGION_KEY;
+typedef u_int32_t REGION_LEN;
+
+#define	MR_LD_STATE_OPTIMAL		3
+#define	FALSE					0
+#define	TRUE					1
+
+#define	LB_PENDING_CMDS_DEFAULT 4
+
+
+/*
+ * Related Macros
+ */
+
+#define	ABS_DIFF(a,b)   ( ((a) > (b)) ? ((a) - (b)) : ((b) - (a)) )
+
+#define	swap32(x) \
+  ((unsigned int)( \
+    (((unsigned int)(x) & (unsigned int)0x000000ffUL) << 24) | \
+    (((unsigned int)(x) & (unsigned int)0x0000ff00UL) <<  8) | \
+    (((unsigned int)(x) & (unsigned int)0x00ff0000UL) >>  8) | \
+    (((unsigned int)(x) & (unsigned int)0xff000000UL) >> 24) ))
+
+
+/*
+ * In-line functions for mod and divide of 64-bit dividend and 32-bit
+ * divisor. Assumes a check for a divisor of zero is not possible.
+ *
+ * @param dividend:	Dividend
+ * @param divisor:	Divisor
+ * @return			remainder
+ */
+
+#define	mega_mod64(dividend, divisor) ({ \
+int remainder; \
+remainder = ((u_int64_t) (dividend)) % (u_int32_t) (divisor); \
+remainder;})
+
+#define	mega_div64_32(dividend, divisor) ({ \
+int quotient; \
+quotient = ((u_int64_t) (dividend)) / (u_int32_t) (divisor); \
+quotient;})
+
+
+/*
+ * Various RAID map access functions.  These functions access the various
+ * parts of the RAID map and returns the appropriate parameters.
+ */
+
+MR_LD_RAID *
+MR_LdRaidGet(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map)
+{
+	return (&map->raidMap.ldSpanMap[ld].ldRaid);
+}
+
+u_int16_t
+MR_GetLDTgtId(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map)
+{
+	return (map->raidMap.ldSpanMap[ld].ldRaid.targetId);
+}
+
+static u_int16_t
+MR_LdSpanArrayGet(u_int32_t ld, u_int32_t span, MR_DRV_RAID_MAP_ALL * map)
+{
+	return map->raidMap.ldSpanMap[ld].spanBlock[span].span.arrayRef;
+}
+
+static u_int8_t
+MR_LdDataArmGet(u_int32_t ld, u_int32_t armIdx, MR_DRV_RAID_MAP_ALL * map)
+{
+	return map->raidMap.ldSpanMap[ld].dataArmMap[armIdx];
+}
+
+static u_int16_t
+MR_PdDevHandleGet(u_int32_t pd, MR_DRV_RAID_MAP_ALL * map)
+{
+	return map->raidMap.devHndlInfo[pd].curDevHdl;
+}
+
+static u_int16_t
+MR_ArPdGet(u_int32_t ar, u_int32_t arm, MR_DRV_RAID_MAP_ALL * map)
+{
+	return map->raidMap.arMapInfo[ar].pd[arm];
+}
+
+static MR_LD_SPAN *
+MR_LdSpanPtrGet(u_int32_t ld, u_int32_t span, MR_DRV_RAID_MAP_ALL * map)
+{
+	return &map->raidMap.ldSpanMap[ld].spanBlock[span].span;
+}
+
+static MR_SPAN_BLOCK_INFO *
+MR_LdSpanInfoGet(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map)
+{
+	return &map->raidMap.ldSpanMap[ld].spanBlock[0];
+}
+
+u_int16_t
+MR_TargetIdToLdGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map)
+{
+	return map->raidMap.ldTgtIdToLd[ldTgtId];
+}
+
+u_int32_t
+MR_LdBlockSizeGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map)
+{
+	MR_LD_RAID *raid;
+	u_int32_t ld, ldBlockSize = MRSAS_SCSIBLOCKSIZE;
+
+	ld = MR_TargetIdToLdGet(ldTgtId, map);
+
+	/*
+	 * Check if logical drive was removed.
+	 */
+	if (ld >= MAX_LOGICAL_DRIVES)
+		return ldBlockSize;
+
+	raid = MR_LdRaidGet(ld, map);
+	ldBlockSize = raid->logicalBlockLength;
+	if (!ldBlockSize)
+		ldBlockSize = MRSAS_SCSIBLOCKSIZE;
+
+	return ldBlockSize;
+}
+
+/*
+ * This function will Populate Driver Map using firmware raid map
+ */
+void
+MR_PopulateDrvRaidMap(struct mrsas_softc *sc)
+{
+	MR_FW_RAID_MAP_ALL *fw_map_old = NULL;
+	MR_FW_RAID_MAP *pFwRaidMap = NULL;
+	unsigned int i;
+
+	MR_DRV_RAID_MAP_ALL *drv_map = sc->ld_drv_map[(sc->map_id & 1)];
+	MR_DRV_RAID_MAP *pDrvRaidMap = &drv_map->raidMap;
+
+	if (sc->max256vdSupport) {
+		memcpy(sc->ld_drv_map[sc->map_id & 1],
+		    sc->raidmap_mem[sc->map_id & 1],
+		    sc->current_map_sz);
+		/*
+		 * New Raid map will not set totalSize, so keep expected
+		 * value for legacy code in ValidateMapInfo
+		 */
+		pDrvRaidMap->totalSize = sizeof(MR_FW_RAID_MAP_EXT);
+	} else {
+		fw_map_old = (MR_FW_RAID_MAP_ALL *) sc->raidmap_mem[(sc->map_id & 1)];
+		pFwRaidMap = &fw_map_old->raidMap;
+
+#if VD_EXT_DEBUG
+		for (i = 0; i < pFwRaidMap->ldCount; i++) {
+			device_printf(sc->mrsas_dev,
+			    "Index 0x%x Target Id 0x%x Seq Num 0x%x Size 0/%lx\n", i,
+			    fw_map_old->raidMap.ldSpanMap[i].ldRaid.targetId,
+			    fw_map_old->raidMap.ldSpanMap[i].ldRaid.seqNum,
+			    fw_map_old->raidMap.ldSpanMap[i].ldRaid.size);
+		}
+#endif
+
+		memset(drv_map, 0, sc->drv_map_sz);
+		pDrvRaidMap->totalSize = pFwRaidMap->totalSize;
+		pDrvRaidMap->ldCount = pFwRaidMap->ldCount;
+		pDrvRaidMap->fpPdIoTimeoutSec =
+		    pFwRaidMap->fpPdIoTimeoutSec;
+
+		for (i = 0; i < MAX_RAIDMAP_LOGICAL_DRIVES + MAX_RAIDMAP_VIEWS; i++) {
+			pDrvRaidMap->ldTgtIdToLd[i] =
+			    (u_int8_t)pFwRaidMap->ldTgtIdToLd[i];
+		}
+
+		for (i = 0; i < pDrvRaidMap->ldCount; i++) {
+			pDrvRaidMap->ldSpanMap[i] =
+			    pFwRaidMap->ldSpanMap[i];
+
+#if VD_EXT_DEBUG
+			device_printf(sc->mrsas_dev, "pFwRaidMap->ldSpanMap[%d].ldRaid.targetId 0x%x "
+			    "pFwRaidMap->ldSpanMap[%d].ldRaid.seqNum 0x%x size 0x%x\n",
+			    i, i, pFwRaidMap->ldSpanMap[i].ldRaid.targetId,
+			    pFwRaidMap->ldSpanMap[i].ldRaid.seqNum,
+			    (u_int32_t)pFwRaidMap->ldSpanMap[i].ldRaid.rowSize);
+			device_printf(sc->mrsas_dev, "pDrvRaidMap->ldSpanMap[%d].ldRaid.targetId 0x%x"
+			    "pDrvRaidMap->ldSpanMap[%d].ldRaid.seqNum 0x%x size 0x%x\n", i, i,
+			    pDrvRaidMap->ldSpanMap[i].ldRaid.targetId,
+			    pDrvRaidMap->ldSpanMap[i].ldRaid.seqNum,
+			    (u_int32_t)pDrvRaidMap->ldSpanMap[i].ldRaid.rowSize);
+			device_printf(sc->mrsas_dev, "drv raid map all %p raid map %p LD RAID MAP %p/%p\n",
+			    drv_map, pDrvRaidMap,
+			    &pFwRaidMap->ldSpanMap[i].ldRaid, &pDrvRaidMap->ldSpanMap[i].ldRaid);
+#endif
+		}
+
+		memcpy(pDrvRaidMap->arMapInfo, pFwRaidMap->arMapInfo,
+		    sizeof(MR_ARRAY_INFO) * MAX_RAIDMAP_ARRAYS);
+		memcpy(pDrvRaidMap->devHndlInfo, pFwRaidMap->devHndlInfo,
+		    sizeof(MR_DEV_HANDLE_INFO) *
+		    MAX_RAIDMAP_PHYSICAL_DEVICES);
+	}
+}
+
+/*
+ * MR_ValidateMapInfo:	Validate RAID map
+ * input:				Adapter instance soft state
+ *
+ * This function checks and validates the loaded RAID map. It returns 0 if
+ * successful, and 1 otherwise.
+ */
+u_int8_t
+MR_ValidateMapInfo(struct mrsas_softc *sc)
+{
+	if (!sc) {
+		return 1;
+	}
+	MR_PopulateDrvRaidMap(sc);
+
+	MR_DRV_RAID_MAP_ALL *drv_map = sc->ld_drv_map[(sc->map_id & 1)];
+	MR_DRV_RAID_MAP *pDrvRaidMap = &drv_map->raidMap;
+
+	u_int32_t expected_map_size;
+
+	drv_map = sc->ld_drv_map[(sc->map_id & 1)];
+	pDrvRaidMap = &drv_map->raidMap;
+	PLD_SPAN_INFO ldSpanInfo = (PLD_SPAN_INFO) & sc->log_to_span;
+
+	if (sc->max256vdSupport)
+		expected_map_size = sizeof(MR_FW_RAID_MAP_EXT);
+	else
+		expected_map_size =
+		    (sizeof(MR_FW_RAID_MAP) - sizeof(MR_LD_SPAN_MAP)) +
+		    (sizeof(MR_LD_SPAN_MAP) * pDrvRaidMap->ldCount);
+
+	if (pDrvRaidMap->totalSize != expected_map_size) {
+		device_printf(sc->mrsas_dev, "map size %x not matching ld count\n", expected_map_size);
+		device_printf(sc->mrsas_dev, "span map= %x\n", (unsigned int)sizeof(MR_LD_SPAN_MAP));
+		device_printf(sc->mrsas_dev, "pDrvRaidMap->totalSize=%x\n", pDrvRaidMap->totalSize);
+		return 1;
+	}
+	if (sc->UnevenSpanSupport) {
+		mr_update_span_set(drv_map, ldSpanInfo);
+	}
+	mrsas_update_load_balance_params(sc, drv_map, sc->load_balance_info);
+
+	return 0;
+}
+
+/*
+ *
+ * Function to print info about span set created in driver from FW raid map
+ *
+ * Inputs:		map
+ * ldSpanInfo:	ld map span info per HBA instance
+ *
+ *
+ */
+#if SPAN_DEBUG
+static int
+getSpanInfo(MR_DRV_RAID_MAP_ALL * map, PLD_SPAN_INFO ldSpanInfo)
+{
+
+	u_int8_t span;
+	u_int32_t element;
+	MR_LD_RAID *raid;
+	LD_SPAN_SET *span_set;
+	MR_QUAD_ELEMENT *quad;
+	int ldCount;
+	u_int16_t ld;
+
+	for (ldCount = 0; ldCount < MAX_LOGICAL_DRIVES; ldCount++) {
+		ld = MR_TargetIdToLdGet(ldCount, map);
+		if (ld >= MAX_LOGICAL_DRIVES) {
+			continue;
+		}
+		raid = MR_LdRaidGet(ld, map);
+		printf("LD %x: span_depth=%x\n", ld, raid->spanDepth);
+		for (span = 0; span < raid->spanDepth; span++)
+			printf("Span=%x, number of quads=%x\n", span,
+			    map->raidMap.ldSpanMap[ld].spanBlock[span].
+			    block_span_info.noElements);
+		for (element = 0; element < MAX_QUAD_DEPTH; element++) {
+			span_set = &(ldSpanInfo[ld].span_set[element]);
+			if (span_set->span_row_data_width == 0)
+				break;
+
+			printf("Span Set %x: width=%x, diff=%x\n", element,
+			    (unsigned int)span_set->span_row_data_width,
+			    (unsigned int)span_set->diff);
+			printf("logical LBA start=0x%08lx, end=0x%08lx\n",
+			    (long unsigned int)span_set->log_start_lba,
+			    (long unsigned int)span_set->log_end_lba);
+			printf("span row start=0x%08lx, end=0x%08lx\n",
+			    (long unsigned int)span_set->span_row_start,
+			    (long unsigned int)span_set->span_row_end);
+			printf("data row start=0x%08lx, end=0x%08lx\n",
+			    (long unsigned int)span_set->data_row_start,
+			    (long unsigned int)span_set->data_row_end);
+			printf("data strip start=0x%08lx, end=0x%08lx\n",
+			    (long unsigned int)span_set->data_strip_start,
+			    (long unsigned int)span_set->data_strip_end);
+
+			for (span = 0; span < raid->spanDepth; span++) {
+				if (map->raidMap.ldSpanMap[ld].spanBlock[span].
+				    block_span_info.noElements >= element + 1) {
+					quad = &map->raidMap.ldSpanMap[ld].
+					    spanBlock[span].block_span_info.
+					    quad[element];
+					printf("Span=%x, Quad=%x, diff=%x\n", span,
+					    element, quad->diff);
+					printf("offset_in_span=0x%08lx\n",
+					    (long unsigned int)quad->offsetInSpan);
+					printf("logical start=0x%08lx, end=0x%08lx\n",
+					    (long unsigned int)quad->logStart,
+					    (long unsigned int)quad->logEnd);
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+#endif
+/*
+ *
+ * This routine calculates the Span block for given row using spanset.
+ *
+ * Inputs :	HBA instance
+ * ld:		Logical drive number
+ * row:		Row number
+ * map:		LD map
+ *
+ * Outputs :	span	- Span number block
+ * 						- Absolute Block number in the physical disk
+ * 				div_error    - Devide error code.
+ */
+
+u_int32_t
+mr_spanset_get_span_block(struct mrsas_softc *sc, u_int32_t ld, u_int64_t row,
+    u_int64_t *span_blk, MR_DRV_RAID_MAP_ALL * map, int *div_error)
+{
+	MR_LD_RAID *raid = MR_LdRaidGet(ld, map);
+	LD_SPAN_SET *span_set;
+	MR_QUAD_ELEMENT *quad;
+	u_int32_t span, info;
+	PLD_SPAN_INFO ldSpanInfo = sc->log_to_span;
+
+	for (info = 0; info < MAX_QUAD_DEPTH; info++) {
+		span_set = &(ldSpanInfo[ld].span_set[info]);
+
+		if (span_set->span_row_data_width == 0)
+			break;
+		if (row > span_set->data_row_end)
+			continue;
+
+		for (span = 0; span < raid->spanDepth; span++)
+			if (map->raidMap.ldSpanMap[ld].spanBlock[span].
+			    block_span_info.noElements >= info + 1) {
+				quad = &map->raidMap.ldSpanMap[ld].
+				    spanBlock[span].
+				    block_span_info.quad[info];
+				if (quad->diff == 0) {
+					*div_error = 1;
+					return span;
+				}
+				if (quad->logStart <= row &&
+				    row <= quad->logEnd &&
+				    (mega_mod64(row - quad->logStart,
+				    quad->diff)) == 0) {
+					if (span_blk != NULL) {
+						u_int64_t blk;
+
+						blk = mega_div64_32
+						    ((row - quad->logStart),
+						    quad->diff);
+						blk = (blk + quad->offsetInSpan)
+						    << raid->stripeShift;
+						*span_blk = blk;
+					}
+					return span;
+				}
+			}
+	}
+	return SPAN_INVALID;
+}
+
+/*
+ *
+ * This routine calculates the row for given strip using spanset.
+ *
+ * Inputs :	HBA instance
+ * ld:		Logical drive number
+ * Strip:	Strip
+ * map:		LD map
+ *
+ * Outputs :	row - row associated with strip
+ */
+
+static u_int64_t
+get_row_from_strip(struct mrsas_softc *sc,
+    u_int32_t ld, u_int64_t strip, MR_DRV_RAID_MAP_ALL * map)
+{
+	MR_LD_RAID *raid = MR_LdRaidGet(ld, map);
+	LD_SPAN_SET *span_set;
+	PLD_SPAN_INFO ldSpanInfo = sc->log_to_span;
+	u_int32_t info, strip_offset, span, span_offset;
+	u_int64_t span_set_Strip, span_set_Row;
+
+	for (info = 0; info < MAX_QUAD_DEPTH; info++) {
+		span_set = &(ldSpanInfo[ld].span_set[info]);
+
+		if (span_set->span_row_data_width == 0)
+			break;
+		if (strip > span_set->data_strip_end)
+			continue;
+
+		span_set_Strip = strip - span_set->data_strip_start;
+		strip_offset = mega_mod64(span_set_Strip,
+		    span_set->span_row_data_width);
+		span_set_Row = mega_div64_32(span_set_Strip,
+		    span_set->span_row_data_width) * span_set->diff;
+		for (span = 0, span_offset = 0; span < raid->spanDepth; span++)
+			if (map->raidMap.ldSpanMap[ld].spanBlock[span].
+			    block_span_info.noElements >= info + 1) {
+				if (strip_offset >=
+				    span_set->strip_offset[span])
+					span_offset++;
+				else
+					break;
+			}
+		mrsas_dprint(sc, MRSAS_PRL11, "AVAGO Debug : Strip 0x%llx, span_set_Strip 0x%llx, span_set_Row 0x%llx "
+		    "data width 0x%llx span offset 0x%llx\n", (unsigned long long)strip,
+		    (unsigned long long)span_set_Strip,
+		    (unsigned long long)span_set_Row,
+		    (unsigned long long)span_set->span_row_data_width, (unsigned long long)span_offset);
+		mrsas_dprint(sc, MRSAS_PRL11, "AVAGO Debug : For strip 0x%llx row is 0x%llx\n", (unsigned long long)strip,
+		    (unsigned long long)span_set->data_row_start +
+		    (unsigned long long)span_set_Row + (span_offset - 1));
+		return (span_set->data_row_start + span_set_Row + (span_offset - 1));
+	}
+	return -1LLU;
+}
+
+
+/*
+ *
+ * This routine calculates the Start Strip for given row using spanset.
+ *
+ * Inputs:	HBA instance
+ * ld:		Logical drive number
+ * row:		Row number
+ * map:		LD map
+ *
+ * Outputs :	Strip - Start strip associated with row
+ */
+
+static u_int64_t
+get_strip_from_row(struct mrsas_softc *sc,
+    u_int32_t ld, u_int64_t row, MR_DRV_RAID_MAP_ALL * map)
+{
+	MR_LD_RAID *raid = MR_LdRaidGet(ld, map);
+	LD_SPAN_SET *span_set;
+	MR_QUAD_ELEMENT *quad;
+	PLD_SPAN_INFO ldSpanInfo = sc->log_to_span;
+	u_int32_t span, info;
+	u_int64_t strip;
+
+	for (info = 0; info < MAX_QUAD_DEPTH; info++) {
+		span_set = &(ldSpanInfo[ld].span_set[info]);
+
+		if (span_set->span_row_data_width == 0)
+			break;
+		if (row > span_set->data_row_end)
+			continue;
+
+		for (span = 0; span < raid->spanDepth; span++)
+			if (map->raidMap.ldSpanMap[ld].spanBlock[span].
+			    block_span_info.noElements >= info + 1) {
+				quad = &map->raidMap.ldSpanMap[ld].
+				    spanBlock[span].block_span_info.quad[info];
+				if (quad->logStart <= row &&
+				    row <= quad->logEnd &&
+				    mega_mod64((row - quad->logStart),
+				    quad->diff) == 0) {
+					strip = mega_div64_32
+					    (((row - span_set->data_row_start)
+					    - quad->logStart),
+					    quad->diff);
+					strip *= span_set->span_row_data_width;
+					strip += span_set->data_strip_start;
+					strip += span_set->strip_offset[span];
+					return strip;
+				}
+			}
+	}
+	mrsas_dprint(sc, MRSAS_PRL11, "AVAGO Debug - get_strip_from_row: returns invalid "
+	    "strip for ld=%x, row=%lx\n", ld, (long unsigned int)row);
+	return -1;
+}
+
+/*
+ * *****************************************************************************
+ *
+ *
+ * This routine calculates the Physical Arm for given strip using spanset.
+ *
+ * Inputs :	HBA instance
+ * 			Logical drive number
+ * 			Strip
+ * 			LD map
+ *
+ * Outputs :	Phys Arm - Phys Arm associated with strip
+ */
+
+static u_int32_t
+get_arm_from_strip(struct mrsas_softc *sc,
+    u_int32_t ld, u_int64_t strip, MR_DRV_RAID_MAP_ALL * map)
+{
+	MR_LD_RAID *raid = MR_LdRaidGet(ld, map);
+	LD_SPAN_SET *span_set;
+	PLD_SPAN_INFO ldSpanInfo = sc->log_to_span;
+	u_int32_t info, strip_offset, span, span_offset;
+
+	for (info = 0; info < MAX_QUAD_DEPTH; info++) {
+		span_set = &(ldSpanInfo[ld].span_set[info]);
+
+		if (span_set->span_row_data_width == 0)
+			break;
+		if (strip > span_set->data_strip_end)
+			continue;
+
+		strip_offset = (u_int32_t)mega_mod64
+		    ((strip - span_set->data_strip_start),
+		    span_set->span_row_data_width);
+
+		for (span = 0, span_offset = 0; span < raid->spanDepth; span++)
+			if (map->raidMap.ldSpanMap[ld].spanBlock[span].
+			    block_span_info.noElements >= info + 1) {
+				if (strip_offset >= span_set->strip_offset[span])
+					span_offset = span_set->strip_offset[span];
+				else
+					break;
+			}
+		mrsas_dprint(sc, MRSAS_PRL11, "AVAGO PRL11: get_arm_from_strip: "
+		    "for ld=0x%x strip=0x%lx arm is  0x%x\n", ld,
+		    (long unsigned int)strip, (strip_offset - span_offset));
+		return (strip_offset - span_offset);
+	}
+
+	mrsas_dprint(sc, MRSAS_PRL11, "AVAGO Debug: - get_arm_from_strip: returns invalid arm"
+	    " for ld=%x strip=%lx\n", ld, (long unsigned int)strip);
+
+	return -1;
+}
+
+
+/* This Function will return Phys arm */
+u_int8_t
+get_arm(struct mrsas_softc *sc, u_int32_t ld, u_int8_t span, u_int64_t stripe,
+    MR_DRV_RAID_MAP_ALL * map)
+{
+	MR_LD_RAID *raid = MR_LdRaidGet(ld, map);
+
+	/* Need to check correct default value */
+	u_int32_t arm = 0;
+
+	switch (raid->level) {
+	case 0:
+	case 5:
+	case 6:
+		arm = mega_mod64(stripe, SPAN_ROW_SIZE(map, ld, span));
+		break;
+	case 1:
+		/* start with logical arm */
+		arm = get_arm_from_strip(sc, ld, stripe, map);
+		arm *= 2;
+		break;
+	}
+
+	return arm;
+}
+
+/*
+ *
+ * This routine calculates the arm, span and block for the specified stripe and
+ * reference in stripe using spanset
+ *
+ * Inputs :
+ * sc - HBA instance
+ * ld - Logical drive number
+ * stripRow: Stripe number
+ * stripRef: Reference in stripe
+ *
+ * Outputs :	span - Span number block - Absolute Block
+ * number in the physical disk
+ */
+static u_int8_t
+mr_spanset_get_phy_params(struct mrsas_softc *sc, u_int32_t ld, u_int64_t stripRow,
+    u_int16_t stripRef, struct IO_REQUEST_INFO *io_info,
+    RAID_CONTEXT * pRAID_Context, MR_DRV_RAID_MAP_ALL * map)
+{
+	MR_LD_RAID *raid = MR_LdRaidGet(ld, map);
+	u_int32_t pd, arRef;
+	u_int8_t physArm, span;
+	u_int64_t row;
+	u_int8_t retval = TRUE;
+	u_int64_t *pdBlock = &io_info->pdBlock;
+	u_int16_t *pDevHandle = &io_info->devHandle;
+	u_int32_t logArm, rowMod, armQ, arm;
+
+	/* Get row and span from io_info for Uneven Span IO. */
+	row = io_info->start_row;
+	span = io_info->start_span;
+
+
+	if (raid->level == 6) {
+		logArm = get_arm_from_strip(sc, ld, stripRow, map);
+		rowMod = mega_mod64(row, SPAN_ROW_SIZE(map, ld, span));
+		armQ = SPAN_ROW_SIZE(map, ld, span) - 1 - rowMod;
+		arm = armQ + 1 + logArm;
+		if (arm >= SPAN_ROW_SIZE(map, ld, span))
+			arm -= SPAN_ROW_SIZE(map, ld, span);
+		physArm = (u_int8_t)arm;
+	} else
+		/* Calculate the arm */
+		physArm = get_arm(sc, ld, span, stripRow, map);
+
+
+	arRef = MR_LdSpanArrayGet(ld, span, map);
+	pd = MR_ArPdGet(arRef, physArm, map);
+
+	if (pd != MR_PD_INVALID)
+		*pDevHandle = MR_PdDevHandleGet(pd, map);
+	else {
+		*pDevHandle = MR_PD_INVALID;
+		if ((raid->level >= 5) && ((!sc->mrsas_gen3_ctrl) || (sc->mrsas_gen3_ctrl &&
+		    raid->regTypeReqOnRead != REGION_TYPE_UNUSED)))
+			pRAID_Context->regLockFlags = REGION_TYPE_EXCLUSIVE;
+		else if (raid->level == 1) {
+			pd = MR_ArPdGet(arRef, physArm + 1, map);
+			if (pd != MR_PD_INVALID)
+				*pDevHandle = MR_PdDevHandleGet(pd, map);
+		}
+	}
+
+	*pdBlock += stripRef + MR_LdSpanPtrGet(ld, span, map)->startBlk;
+	pRAID_Context->spanArm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm;
+	io_info->span_arm = pRAID_Context->spanArm;
+	return retval;
+}
+
+/*
+ * MR_BuildRaidContext:	Set up Fast path RAID context
+ *
+ * This function will initiate command processing.  The start/end row and strip
+ * information is calculated then the lock is acquired. This function will
+ * return 0 if region lock was acquired OR return num strips.
+ */
+u_int8_t
+MR_BuildRaidContext(struct mrsas_softc *sc, struct IO_REQUEST_INFO *io_info,
+    RAID_CONTEXT * pRAID_Context, MR_DRV_RAID_MAP_ALL * map)
+{
+	MR_LD_RAID *raid;
+	u_int32_t ld, stripSize, stripe_mask;
+	u_int64_t endLba, endStrip, endRow, start_row, start_strip;
+	REGION_KEY regStart;
+	REGION_LEN regSize;
+	u_int8_t num_strips, numRows;
+	u_int16_t ref_in_start_stripe, ref_in_end_stripe;
+	u_int64_t ldStartBlock;
+	u_int32_t numBlocks, ldTgtId;
+	u_int8_t isRead, stripIdx;
+	u_int8_t retval = 0;
+	u_int8_t startlba_span = SPAN_INVALID;
+	u_int64_t *pdBlock = &io_info->pdBlock;
+	int error_code = 0;
+
+	ldStartBlock = io_info->ldStartBlock;
+	numBlocks = io_info->numBlocks;
+	ldTgtId = io_info->ldTgtId;
+	isRead = io_info->isRead;
+
+	io_info->IoforUnevenSpan = 0;
+	io_info->start_span = SPAN_INVALID;
+
+	ld = MR_TargetIdToLdGet(ldTgtId, map);
+	raid = MR_LdRaidGet(ld, map);
+
+	if (raid->rowDataSize == 0) {
+		if (MR_LdSpanPtrGet(ld, 0, map)->spanRowDataSize == 0)
+			return FALSE;
+		else if (sc->UnevenSpanSupport) {
+			io_info->IoforUnevenSpan = 1;
+		} else {
+			mrsas_dprint(sc, MRSAS_PRL11, "AVAGO Debug: raid->rowDataSize is 0, but has SPAN[0] rowDataSize = 0x%0x,"
+			    " but there is _NO_ UnevenSpanSupport\n",
+			    MR_LdSpanPtrGet(ld, 0, map)->spanRowDataSize);
+			return FALSE;
+		}
+	}
+	stripSize = 1 << raid->stripeShift;
+	stripe_mask = stripSize - 1;
+	/*
+	 * calculate starting row and stripe, and number of strips and rows
+	 */
+	start_strip = ldStartBlock >> raid->stripeShift;
+	ref_in_start_stripe = (u_int16_t)(ldStartBlock & stripe_mask);
+	endLba = ldStartBlock + numBlocks - 1;
+	ref_in_end_stripe = (u_int16_t)(endLba & stripe_mask);
+	endStrip = endLba >> raid->stripeShift;
+	num_strips = (u_int8_t)(endStrip - start_strip + 1);	/* End strip */
+	if (io_info->IoforUnevenSpan) {
+		start_row = get_row_from_strip(sc, ld, start_strip, map);
+		endRow = get_row_from_strip(sc, ld, endStrip, map);
+		if (raid->spanDepth == 1) {
+			startlba_span = 0;
+			*pdBlock = start_row << raid->stripeShift;
+		} else {
+			startlba_span = (u_int8_t)mr_spanset_get_span_block(sc, ld, start_row,
+			    pdBlock, map, &error_code);
+			if (error_code == 1) {
+				mrsas_dprint(sc, MRSAS_PRL11, "AVAGO Debug: return from %s %d. Send IO w/o region lock.\n",
+				    __func__, __LINE__);
+				return FALSE;
+			}
+		}
+		if (startlba_span == SPAN_INVALID) {
+			mrsas_dprint(sc, MRSAS_PRL11, "AVAGO Debug: return from %s %d for row 0x%llx,"
+			    "start strip %llx endSrip %llx\n", __func__,
+			    __LINE__, (unsigned long long)start_row,
+			    (unsigned long long)start_strip,
+			    (unsigned long long)endStrip);
+			return FALSE;
+		}
+		io_info->start_span = startlba_span;
+		io_info->start_row = start_row;
+		mrsas_dprint(sc, MRSAS_PRL11, "AVAGO Debug: Check Span number from %s %d for row 0x%llx, "
+		    " start strip 0x%llx endSrip 0x%llx span 0x%x\n",
+		    __func__, __LINE__, (unsigned long long)start_row,
+		    (unsigned long long)start_strip,
+		    (unsigned long long)endStrip, startlba_span);
+		mrsas_dprint(sc, MRSAS_PRL11, "AVAGO Debug : 1. start_row 0x%llx endRow 0x%llx Start span 0x%x\n",
+		    (unsigned long long)start_row, (unsigned long long)endRow, startlba_span);
+	} else {
+		start_row = mega_div64_32(start_strip, raid->rowDataSize);
+		endRow = mega_div64_32(endStrip, raid->rowDataSize);
+	}
+
+	numRows = (u_int8_t)(endRow - start_row + 1);	/* get the row count */
+
+	/*
+	 * Calculate region info.  (Assume region at start of first row, and
+	 * assume this IO needs the full row - will adjust if not true.)
+	 */
+	regStart = start_row << raid->stripeShift;
+	regSize = stripSize;
+
+	/* Check if we can send this I/O via FastPath */
+	if (raid->capability.fpCapable) {
+		if (isRead)
+			io_info->fpOkForIo = (raid->capability.fpReadCapable &&
+			    ((num_strips == 1) ||
+			    raid->capability.fpReadAcrossStripe));
+		else
+			io_info->fpOkForIo = (raid->capability.fpWriteCapable &&
+			    ((num_strips == 1) ||
+			    raid->capability.fpWriteAcrossStripe));
+	} else
+		io_info->fpOkForIo = FALSE;
+
+	if (numRows == 1) {
+		if (num_strips == 1) {
+			regStart += ref_in_start_stripe;
+			regSize = numBlocks;
+		}
+	} else if (io_info->IoforUnevenSpan == 0) {
+		/*
+		 * For Even span region lock optimization. If the start strip
+		 * is the last in the start row
+		 */
+		if (start_strip == (start_row + 1) * raid->rowDataSize - 1) {
+			regStart += ref_in_start_stripe;
+			/*
+			 * initialize count to sectors from startRef to end
+			 * of strip
+			 */
+			regSize = stripSize - ref_in_start_stripe;
+		}
+		/* add complete rows in the middle of the transfer */
+		if (numRows > 2)
+			regSize += (numRows - 2) << raid->stripeShift;
+
+		/* if IO ends within first strip of last row */
+		if (endStrip == endRow * raid->rowDataSize)
+			regSize += ref_in_end_stripe + 1;
+		else
+			regSize += stripSize;
+	} else {
+		if (start_strip == (get_strip_from_row(sc, ld, start_row, map) +
+		    SPAN_ROW_DATA_SIZE(map, ld, startlba_span) - 1)) {
+			regStart += ref_in_start_stripe;
+			/*
+			 * initialize count to sectors from startRef to end
+			 * of strip
+			 */
+			regSize = stripSize - ref_in_start_stripe;
+		}
+		/* add complete rows in the middle of the transfer */
+		if (numRows > 2)
+			regSize += (numRows - 2) << raid->stripeShift;
+
+		/* if IO ends within first strip of last row */
+		if (endStrip == get_strip_from_row(sc, ld, endRow, map))
+			regSize += ref_in_end_stripe + 1;
+		else
+			regSize += stripSize;
+	}
+	pRAID_Context->timeoutValue = map->raidMap.fpPdIoTimeoutSec;
+	if (sc->mrsas_gen3_ctrl)
+		pRAID_Context->regLockFlags = (isRead) ? raid->regTypeReqOnRead : raid->regTypeReqOnWrite;
+	else
+		pRAID_Context->regLockFlags = (isRead) ? REGION_TYPE_SHARED_READ : raid->regTypeReqOnWrite;
+	pRAID_Context->VirtualDiskTgtId = raid->targetId;
+	pRAID_Context->regLockRowLBA = regStart;
+	pRAID_Context->regLockLength = regSize;
+	pRAID_Context->configSeqNum = raid->seqNum;
+
+	/*
+	 * Get Phy Params only if FP capable, or else leave it to MR firmware
+	 * to do the calculation.
+	 */
+	if (io_info->fpOkForIo) {
+		retval = io_info->IoforUnevenSpan ?
+		    mr_spanset_get_phy_params(sc, ld, start_strip,
+		    ref_in_start_stripe, io_info, pRAID_Context, map) :
+		    MR_GetPhyParams(sc, ld, start_strip,
+		    ref_in_start_stripe, io_info, pRAID_Context, map);
+		/* If IO on an invalid Pd, then FP is not possible */
+		if (io_info->devHandle == MR_PD_INVALID)
+			io_info->fpOkForIo = FALSE;
+		return retval;
+	} else if (isRead) {
+		for (stripIdx = 0; stripIdx < num_strips; stripIdx++) {
+			retval = io_info->IoforUnevenSpan ?
+			    mr_spanset_get_phy_params(sc, ld, start_strip + stripIdx,
+			    ref_in_start_stripe, io_info, pRAID_Context, map) :
+			    MR_GetPhyParams(sc, ld, start_strip + stripIdx,
+			    ref_in_start_stripe, io_info, pRAID_Context, map);
+			if (!retval)
+				return TRUE;
+		}
+	}
+#if SPAN_DEBUG
+	/* Just for testing what arm we get for strip. */
+	get_arm_from_strip(sc, ld, start_strip, map);
+#endif
+	return TRUE;
+}
+
+/*
+ *
+ * This routine pepare spanset info from Valid Raid map and store it into local
+ * copy of ldSpanInfo per instance data structure.
+ *
+ * Inputs :	LD map
+ * 			ldSpanInfo per HBA instance
+ *
+ */
+void
+mr_update_span_set(MR_DRV_RAID_MAP_ALL * map, PLD_SPAN_INFO ldSpanInfo)
+{
+	u_int8_t span, count;
+	u_int32_t element, span_row_width;
+	u_int64_t span_row;
+	MR_LD_RAID *raid;
+	LD_SPAN_SET *span_set, *span_set_prev;
+	MR_QUAD_ELEMENT *quad;
+	int ldCount;
+	u_int16_t ld;
+
+	for (ldCount = 0; ldCount < MAX_LOGICAL_DRIVES; ldCount++) {
+		ld = MR_TargetIdToLdGet(ldCount, map);
+		if (ld >= MAX_LOGICAL_DRIVES)
+			continue;
+		raid = MR_LdRaidGet(ld, map);
+		for (element = 0; element < MAX_QUAD_DEPTH; element++) {
+			for (span = 0; span < raid->spanDepth; span++) {
+				if (map->raidMap.ldSpanMap[ld].spanBlock[span].
+				    block_span_info.noElements < element + 1)
+					continue;
+				/* TO-DO */
+				span_set = &(ldSpanInfo[ld].span_set[element]);
+				quad = &map->raidMap.ldSpanMap[ld].
+				    spanBlock[span].block_span_info.quad[element];
+
+				span_set->diff = quad->diff;
+
+				for (count = 0, span_row_width = 0;
+				    count < raid->spanDepth; count++) {
+					if (map->raidMap.ldSpanMap[ld].spanBlock[count].
+					    block_span_info.noElements >= element + 1) {
+						span_set->strip_offset[count] = span_row_width;
+						span_row_width +=
+						    MR_LdSpanPtrGet(ld, count, map)->spanRowDataSize;
+#if SPAN_DEBUG
+						printf("AVAGO Debug span %x rowDataSize %x\n", count,
+						    MR_LdSpanPtrGet(ld, count, map)->spanRowDataSize);
+#endif
+					}
+				}
+
+				span_set->span_row_data_width = span_row_width;
+				span_row = mega_div64_32(((quad->logEnd -
+				    quad->logStart) + quad->diff), quad->diff);
+
+				if (element == 0) {
+					span_set->log_start_lba = 0;
+					span_set->log_end_lba =
+					    ((span_row << raid->stripeShift) * span_row_width) - 1;
+
+					span_set->span_row_start = 0;
+					span_set->span_row_end = span_row - 1;
+
+					span_set->data_strip_start = 0;
+					span_set->data_strip_end = (span_row * span_row_width) - 1;
+
+					span_set->data_row_start = 0;
+					span_set->data_row_end = (span_row * quad->diff) - 1;
+				} else {
+					span_set_prev = &(ldSpanInfo[ld].span_set[element - 1]);
+					span_set->log_start_lba = span_set_prev->log_end_lba + 1;
+					span_set->log_end_lba = span_set->log_start_lba +
+					    ((span_row << raid->stripeShift) * span_row_width) - 1;
+
+					span_set->span_row_start = span_set_prev->span_row_end + 1;
+					span_set->span_row_end =
+					    span_set->span_row_start + span_row - 1;
+
+					span_set->data_strip_start =
+					    span_set_prev->data_strip_end + 1;
+					span_set->data_strip_end = span_set->data_strip_start +
+					    (span_row * span_row_width) - 1;
+
+					span_set->data_row_start = span_set_prev->data_row_end + 1;
+					span_set->data_row_end = span_set->data_row_start +
+					    (span_row * quad->diff) - 1;
+				}
+				break;
+			}
+			if (span == raid->spanDepth)
+				break;	/* no quads remain */
+		}
+	}
+#if SPAN_DEBUG
+	getSpanInfo(map, ldSpanInfo);	/* to get span set info */
+#endif
+}
+
+/*
+ * mrsas_update_load_balance_params:	Update load balance parmas
+ * Inputs:
+ * sc - driver softc instance
+ * drv_map - driver RAID map
+ * lbInfo - Load balance info
+ *
+ * This function updates the load balance parameters for the LD config of a two
+ * drive optimal RAID-1.
+ */
+void
+mrsas_update_load_balance_params(struct mrsas_softc *sc,
+    MR_DRV_RAID_MAP_ALL * drv_map, PLD_LOAD_BALANCE_INFO lbInfo)
+{
+	int ldCount;
+	u_int16_t ld;
+	MR_LD_RAID *raid;
+
+	if (sc->lb_pending_cmds > 128 || sc->lb_pending_cmds < 1)
+		sc->lb_pending_cmds = LB_PENDING_CMDS_DEFAULT;
+
+	for (ldCount = 0; ldCount < MAX_LOGICAL_DRIVES_EXT; ldCount++) {
+		ld = MR_TargetIdToLdGet(ldCount, drv_map);
+		if (ld >= MAX_LOGICAL_DRIVES_EXT) {
+			lbInfo[ldCount].loadBalanceFlag = 0;
+			continue;
+		}
+		raid = MR_LdRaidGet(ld, drv_map);
+		if ((raid->level != 1) ||
+		    (raid->ldState != MR_LD_STATE_OPTIMAL)) {
+			lbInfo[ldCount].loadBalanceFlag = 0;
+			continue;
+		}
+		lbInfo[ldCount].loadBalanceFlag = 1;
+	}
+}
+
+
+/*
+ * mrsas_set_pd_lba:	Sets PD LBA
+ * input:				io_request pointer
+ * 						CDB length
+ * 						io_info pointer
+ * 						Pointer to CCB
+ * 						Local RAID map pointer
+ * 						Start block of IO Block Size
+ *
+ * Used to set the PD logical block address in CDB for FP IOs.
+ */
+void
+mrsas_set_pd_lba(MRSAS_RAID_SCSI_IO_REQUEST * io_request, u_int8_t cdb_len,
+    struct IO_REQUEST_INFO *io_info, union ccb *ccb,
+    MR_DRV_RAID_MAP_ALL * local_map_ptr, u_int32_t ref_tag,
+    u_int32_t ld_block_size)
+{
+	MR_LD_RAID *raid;
+	u_int32_t ld;
+	u_int64_t start_blk = io_info->pdBlock;
+	u_int8_t *cdb = io_request->CDB.CDB32;
+	u_int32_t num_blocks = io_info->numBlocks;
+	u_int8_t opcode = 0, flagvals = 0, groupnum = 0, control = 0;
+	struct ccb_hdr *ccb_h = &(ccb->ccb_h);
+
+	/* Check if T10 PI (DIF) is enabled for this LD */
+	ld = MR_TargetIdToLdGet(io_info->ldTgtId, local_map_ptr);
+	raid = MR_LdRaidGet(ld, local_map_ptr);
+	if (raid->capability.ldPiMode == MR_PROT_INFO_TYPE_CONTROLLER) {
+		memset(cdb, 0, sizeof(io_request->CDB.CDB32));
+		cdb[0] = MRSAS_SCSI_VARIABLE_LENGTH_CMD;
+		cdb[7] = MRSAS_SCSI_ADDL_CDB_LEN;
+
+		if (ccb_h->flags == CAM_DIR_OUT)
+			cdb[9] = MRSAS_SCSI_SERVICE_ACTION_READ32;
+		else
+			cdb[9] = MRSAS_SCSI_SERVICE_ACTION_WRITE32;
+		cdb[10] = MRSAS_RD_WR_PROTECT_CHECK_ALL;
+
+		/* LBA */
+		cdb[12] = (u_int8_t)((start_blk >> 56) & 0xff);
+		cdb[13] = (u_int8_t)((start_blk >> 48) & 0xff);
+		cdb[14] = (u_int8_t)((start_blk >> 40) & 0xff);
+		cdb[15] = (u_int8_t)((start_blk >> 32) & 0xff);
+		cdb[16] = (u_int8_t)((start_blk >> 24) & 0xff);
+		cdb[17] = (u_int8_t)((start_blk >> 16) & 0xff);
+		cdb[18] = (u_int8_t)((start_blk >> 8) & 0xff);
+		cdb[19] = (u_int8_t)(start_blk & 0xff);
+
+		/* Logical block reference tag */
+		io_request->CDB.EEDP32.PrimaryReferenceTag = swap32(ref_tag);
+		io_request->CDB.EEDP32.PrimaryApplicationTagMask = 0xffff;
+		io_request->IoFlags = 32;	/* Specify 32-byte cdb */
+
+		/* Transfer length */
+		cdb[28] = (u_int8_t)((num_blocks >> 24) & 0xff);
+		cdb[29] = (u_int8_t)((num_blocks >> 16) & 0xff);
+		cdb[30] = (u_int8_t)((num_blocks >> 8) & 0xff);
+		cdb[31] = (u_int8_t)(num_blocks & 0xff);
+
+		/* set SCSI IO EEDP Flags */
+		if (ccb_h->flags == CAM_DIR_OUT) {
+			io_request->EEDPFlags =
+			    MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG |
+			    MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
+			    MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP |
+			    MPI2_SCSIIO_EEDPFLAGS_CHECK_APPTAG |
+			    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
+		} else {
+			io_request->EEDPFlags =
+			    MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG |
+			    MPI2_SCSIIO_EEDPFLAGS_INSERT_OP;
+		}
+		io_request->Control |= (0x4 << 26);
+		io_request->EEDPBlockSize = ld_block_size;
+	} else {
+		/* Some drives don't support 16/12 byte CDB's, convert to 10 */
+		if (((cdb_len == 12) || (cdb_len == 16)) &&
+		    (start_blk <= 0xffffffff)) {
+			if (cdb_len == 16) {
+				opcode = cdb[0] == READ_16 ? READ_10 : WRITE_10;
+				flagvals = cdb[1];
+				groupnum = cdb[14];
+				control = cdb[15];
+			} else {
+				opcode = cdb[0] == READ_12 ? READ_10 : WRITE_10;
+				flagvals = cdb[1];
+				groupnum = cdb[10];
+				control = cdb[11];
+			}
+
+			memset(cdb, 0, sizeof(io_request->CDB.CDB32));
+
+			cdb[0] = opcode;
+			cdb[1] = flagvals;
+			cdb[6] = groupnum;
+			cdb[9] = control;
+
+			/* Transfer length */
+			cdb[8] = (u_int8_t)(num_blocks & 0xff);
+			cdb[7] = (u_int8_t)((num_blocks >> 8) & 0xff);
+
+			io_request->IoFlags = 10;	/* Specify 10-byte cdb */
+			cdb_len = 10;
+		} else if ((cdb_len < 16) && (start_blk > 0xffffffff)) {
+			/* Convert to 16 byte CDB for large LBA's */
+			switch (cdb_len) {
+			case 6:
+				opcode = cdb[0] == READ_6 ? READ_16 : WRITE_16;
+				control = cdb[5];
+				break;
+			case 10:
+				opcode = cdb[0] == READ_10 ? READ_16 : WRITE_16;
+				flagvals = cdb[1];
+				groupnum = cdb[6];
+				control = cdb[9];
+				break;
+			case 12:
+				opcode = cdb[0] == READ_12 ? READ_16 : WRITE_16;
+				flagvals = cdb[1];
+				groupnum = cdb[10];
+				control = cdb[11];
+				break;
+			}
+
+			memset(cdb, 0, sizeof(io_request->CDB.CDB32));
+
+			cdb[0] = opcode;
+			cdb[1] = flagvals;
+			cdb[14] = groupnum;
+			cdb[15] = control;
+
+			/* Transfer length */
+			cdb[13] = (u_int8_t)(num_blocks & 0xff);
+			cdb[12] = (u_int8_t)((num_blocks >> 8) & 0xff);
+			cdb[11] = (u_int8_t)((num_blocks >> 16) & 0xff);
+			cdb[10] = (u_int8_t)((num_blocks >> 24) & 0xff);
+
+			io_request->IoFlags = 16;	/* Specify 16-byte cdb */
+			cdb_len = 16;
+		} else if ((cdb_len == 6) && (start_blk > 0x1fffff)) {
+			/* convert to 10 byte CDB */
+			opcode = cdb[0] == READ_6 ? READ_10 : WRITE_10;
+			control = cdb[5];
+
+			memset(cdb, 0, sizeof(io_request->CDB.CDB32));
+			cdb[0] = opcode;
+			cdb[9] = control;
+
+			/* Set transfer length */
+			cdb[8] = (u_int8_t)(num_blocks & 0xff);
+			cdb[7] = (u_int8_t)((num_blocks >> 8) & 0xff);
+
+			/* Specify 10-byte cdb */
+			cdb_len = 10;
+		}
+		/* Fall through normal case, just load LBA here */
+		u_int8_t val = cdb[1] & 0xE0;
+
+		switch (cdb_len) {
+		case 6:
+			cdb[3] = (u_int8_t)(start_blk & 0xff);
+			cdb[2] = (u_int8_t)((start_blk >> 8) & 0xff);
+			cdb[1] = val | ((u_int8_t)(start_blk >> 16) & 0x1f);
+			break;
+		case 10:
+			cdb[5] = (u_int8_t)(start_blk & 0xff);
+			cdb[4] = (u_int8_t)((start_blk >> 8) & 0xff);
+			cdb[3] = (u_int8_t)((start_blk >> 16) & 0xff);
+			cdb[2] = (u_int8_t)((start_blk >> 24) & 0xff);
+			break;
+		case 16:
+			cdb[9] = (u_int8_t)(start_blk & 0xff);
+			cdb[8] = (u_int8_t)((start_blk >> 8) & 0xff);
+			cdb[7] = (u_int8_t)((start_blk >> 16) & 0xff);
+			cdb[6] = (u_int8_t)((start_blk >> 24) & 0xff);
+			cdb[5] = (u_int8_t)((start_blk >> 32) & 0xff);
+			cdb[4] = (u_int8_t)((start_blk >> 40) & 0xff);
+			cdb[3] = (u_int8_t)((start_blk >> 48) & 0xff);
+			cdb[2] = (u_int8_t)((start_blk >> 56) & 0xff);
+			break;
+		}
+	}
+}
+
+/*
+ * mrsas_get_best_arm_pd:	Determine the best spindle arm
+ * Inputs:
+ *    sc - HBA instance
+ *    lbInfo - Load balance info
+ *    io_info - IO request info
+ *
+ * This function determines and returns the best arm by looking at the
+ * parameters of the last PD access.
+ */
+u_int8_t 
+mrsas_get_best_arm_pd(struct mrsas_softc *sc,
+    PLD_LOAD_BALANCE_INFO lbInfo, struct IO_REQUEST_INFO *io_info)
+{
+	MR_LD_RAID *raid;
+	MR_DRV_RAID_MAP_ALL *drv_map;
+	u_int16_t pend0, pend1, ld;
+	u_int64_t diff0, diff1;
+	u_int8_t bestArm, pd0, pd1, span, arm;
+	u_int32_t arRef, span_row_size;
+
+	u_int64_t block = io_info->ldStartBlock;
+	u_int32_t count = io_info->numBlocks;
+
+	span = ((io_info->span_arm & RAID_CTX_SPANARM_SPAN_MASK)
+	    >> RAID_CTX_SPANARM_SPAN_SHIFT);
+	arm = (io_info->span_arm & RAID_CTX_SPANARM_ARM_MASK);
+
+	drv_map = sc->ld_drv_map[(sc->map_id & 1)];
+	ld = MR_TargetIdToLdGet(io_info->ldTgtId, drv_map);
+	raid = MR_LdRaidGet(ld, drv_map);
+	span_row_size = sc->UnevenSpanSupport ?
+	    SPAN_ROW_SIZE(drv_map, ld, span) : raid->rowSize;
+
+	arRef = MR_LdSpanArrayGet(ld, span, drv_map);
+	pd0 = MR_ArPdGet(arRef, arm, drv_map);
+	pd1 = MR_ArPdGet(arRef, (arm + 1) >= span_row_size ?
+	    (arm + 1 - span_row_size) : arm + 1, drv_map);
+
+	/* get the pending cmds for the data and mirror arms */
+	pend0 = mrsas_atomic_read(&lbInfo->scsi_pending_cmds[pd0]);
+	pend1 = mrsas_atomic_read(&lbInfo->scsi_pending_cmds[pd1]);
+
+	/* Determine the disk whose head is nearer to the req. block */
+	diff0 = ABS_DIFF(block, lbInfo->last_accessed_block[pd0]);
+	diff1 = ABS_DIFF(block, lbInfo->last_accessed_block[pd1]);
+	bestArm = (diff0 <= diff1 ? arm : arm ^ 1);
+
+	if ((bestArm == arm && pend0 > pend1 + sc->lb_pending_cmds) ||
+	    (bestArm != arm && pend1 > pend0 + sc->lb_pending_cmds))
+		bestArm ^= 1;
+
+	/* Update the last accessed block on the correct pd */
+	lbInfo->last_accessed_block[bestArm == arm ? pd0 : pd1] = block + count - 1;
+	io_info->span_arm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) | bestArm;
+	io_info->pd_after_lb = (bestArm == arm) ? pd0 : pd1;
+#if SPAN_DEBUG
+	if (arm != bestArm)
+		printf("AVAGO Debug R1 Load balance occur - span 0x%x arm 0x%x bestArm 0x%x "
+		    "io_info->span_arm 0x%x\n",
+		    span, arm, bestArm, io_info->span_arm);
+#endif
+
+	return io_info->pd_after_lb;
+}
+
+/*
+ * mrsas_get_updated_dev_handle:	Get the update dev handle
+ * Inputs:
+ *	sc - Adapter instance soft state
+ *	lbInfo - Load balance info
+ *	io_info - io_info pointer
+ *
+ * This function determines and returns the updated dev handle.
+ */
+u_int16_t 
+mrsas_get_updated_dev_handle(struct mrsas_softc *sc,
+    PLD_LOAD_BALANCE_INFO lbInfo, struct IO_REQUEST_INFO *io_info)
+{
+	u_int8_t arm_pd;
+	u_int16_t devHandle;
+	MR_DRV_RAID_MAP_ALL *drv_map;
+
+	drv_map = sc->ld_drv_map[(sc->map_id & 1)];
+
+	/* get best new arm */
+	arm_pd = mrsas_get_best_arm_pd(sc, lbInfo, io_info);
+	devHandle = MR_PdDevHandleGet(arm_pd, drv_map);
+	mrsas_atomic_inc(&lbInfo->scsi_pending_cmds[arm_pd]);
+
+	return devHandle;
+}
+
+/*
+ * MR_GetPhyParams:	Calculates arm, span, and block
+ * Inputs:			Adapter soft state
+ * 					Logical drive number (LD)
+ * 					Stripe number(stripRow)
+ * 					Reference in stripe (stripRef)
+ *
+ * Outputs:			Absolute Block number in the physical disk
+ *
+ * This routine calculates the arm, span and block for the specified stripe and
+ * reference in stripe.
+ */
+u_int8_t
+MR_GetPhyParams(struct mrsas_softc *sc, u_int32_t ld,
+    u_int64_t stripRow,
+    u_int16_t stripRef, struct IO_REQUEST_INFO *io_info,
+    RAID_CONTEXT * pRAID_Context, MR_DRV_RAID_MAP_ALL * map)
+{
+	MR_LD_RAID *raid = MR_LdRaidGet(ld, map);
+	u_int32_t pd, arRef;
+	u_int8_t physArm, span;
+	u_int64_t row;
+	u_int8_t retval = TRUE;
+	int error_code = 0;
+	u_int64_t *pdBlock = &io_info->pdBlock;
+	u_int16_t *pDevHandle = &io_info->devHandle;
+	u_int32_t rowMod, armQ, arm, logArm;
+
+	row = mega_div64_32(stripRow, raid->rowDataSize);
+
+	if (raid->level == 6) {
+		/* logical arm within row */
+		logArm = mega_mod64(stripRow, raid->rowDataSize);
+		if (raid->rowSize == 0)
+			return FALSE;
+		rowMod = mega_mod64(row, raid->rowSize);	/* get logical row mod */
+		armQ = raid->rowSize - 1 - rowMod;	/* index of Q drive */
+		arm = armQ + 1 + logArm;/* data always logically follows Q */
+		if (arm >= raid->rowSize)	/* handle wrap condition */
+			arm -= raid->rowSize;
+		physArm = (u_int8_t)arm;
+	} else {
+		if (raid->modFactor == 0)
+			return FALSE;
+		physArm = MR_LdDataArmGet(ld, mega_mod64(stripRow, raid->modFactor), map);
+	}
+
+	if (raid->spanDepth == 1) {
+		span = 0;
+		*pdBlock = row << raid->stripeShift;
+	} else {
+		span = (u_int8_t)MR_GetSpanBlock(ld, row, pdBlock, map, &error_code);
+		if (error_code == 1)
+			return FALSE;
+	}
+
+	/* Get the array on which this span is present */
+	arRef = MR_LdSpanArrayGet(ld, span, map);
+
+	pd = MR_ArPdGet(arRef, physArm, map);	/* Get the Pd. */
+
+	if (pd != MR_PD_INVALID)
+		/* Get dev handle from Pd */
+		*pDevHandle = MR_PdDevHandleGet(pd, map);
+	else {
+		*pDevHandle = MR_PD_INVALID;	/* set dev handle as invalid. */
+		if ((raid->level >= 5) && ((!sc->mrsas_gen3_ctrl) || (sc->mrsas_gen3_ctrl &&
+		    raid->regTypeReqOnRead != REGION_TYPE_UNUSED)))
+			pRAID_Context->regLockFlags = REGION_TYPE_EXCLUSIVE;
+		else if (raid->level == 1) {
+			/* Get Alternate Pd. */
+			pd = MR_ArPdGet(arRef, physArm + 1, map);
+			if (pd != MR_PD_INVALID)
+				/* Get dev handle from Pd. */
+				*pDevHandle = MR_PdDevHandleGet(pd, map);
+		}
+	}
+
+	*pdBlock += stripRef + MR_LdSpanPtrGet(ld, span, map)->startBlk;
+	pRAID_Context->spanArm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm;
+	io_info->span_arm = pRAID_Context->spanArm;
+	return retval;
+}
+
+/*
+ * MR_GetSpanBlock:	Calculates span block
+ * Inputs:			LD
+ * 					row PD
+ * 					span block
+ * 					RAID map pointer
+ *
+ * Outputs:			Span number Error code
+ *
+ * This routine calculates the span from the span block info.
+ */
+u_int32_t
+MR_GetSpanBlock(u_int32_t ld, u_int64_t row, u_int64_t *span_blk,
+    MR_DRV_RAID_MAP_ALL * map, int *div_error)
+{
+	MR_SPAN_BLOCK_INFO *pSpanBlock = MR_LdSpanInfoGet(ld, map);
+	MR_QUAD_ELEMENT *quad;
+	MR_LD_RAID *raid = MR_LdRaidGet(ld, map);
+	u_int32_t span, j;
+	u_int64_t blk, debugBlk;
+
+	for (span = 0; span < raid->spanDepth; span++, pSpanBlock++) {
+		for (j = 0; j < pSpanBlock->block_span_info.noElements; j++) {
+			quad = &pSpanBlock->block_span_info.quad[j];
+			if (quad->diff == 0) {
+				*div_error = 1;
+				return span;
+			}
+			if (quad->logStart <= row && row <= quad->logEnd &&
+			    (mega_mod64(row - quad->logStart, quad->diff)) == 0) {
+				if (span_blk != NULL) {
+					blk = mega_div64_32((row - quad->logStart), quad->diff);
+					debugBlk = blk;
+					blk = (blk + quad->offsetInSpan) << raid->stripeShift;
+					*span_blk = blk;
+				}
+				return span;
+			}
+		}
+	}
+	return span;
+}


Property changes on: trunk/sys/dev/mrsas/mrsas_fp.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/mrsas/mrsas_ioctl.c
===================================================================
--- trunk/sys/dev/mrsas/mrsas_ioctl.c	                        (rev 0)
+++ trunk/sys/dev/mrsas/mrsas_ioctl.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,531 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2015, AVAGO Tech. All rights reserved. Author: Marian Choy
+ * Copyright (c) 2014, LSI Corp. All rights reserved. Author: Marian Choy
+ * Support: freebsdraid at avagotech.com
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer. 2. Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution. 3. Neither the name of the
+ * <ORGANIZATION> nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing
+ * official policies,either expressed or implied, of the FreeBSD Project.
+ *
+ * Send feedback to: <megaraidfbsd at avagotech.com> Mail to: AVAGO TECHNOLOGIES, 1621
+ * Barber Lane, Milpitas, CA 95035 ATTN: MegaRaid FreeBSD
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/mrsas/mrsas_ioctl.c 300736 2016-05-26 12:00:14Z kadesai $");
+
+#include <dev/mrsas/mrsas.h>
+#include <dev/mrsas/mrsas_ioctl.h>
+
+/*
+ * Function prototypes
+ */
+int	mrsas_alloc_mfi_cmds(struct mrsas_softc *sc);
+int	mrsas_passthru(struct mrsas_softc *sc, void *arg, u_long ioctlCmd);
+void	mrsas_free_ioc_cmd(struct mrsas_softc *sc);
+void	mrsas_free_frame(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd);
+void   *mrsas_alloc_frame(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd);
+static int mrsas_create_frame_pool(struct mrsas_softc *sc);
+static void
+mrsas_alloc_cb(void *arg, bus_dma_segment_t *segs,
+    int nsegs, int error);
+
+extern struct mrsas_mfi_cmd *mrsas_get_mfi_cmd(struct mrsas_softc *sc);
+extern void mrsas_release_mfi_cmd(struct mrsas_mfi_cmd *cmd);
+extern int
+mrsas_issue_blocked_cmd(struct mrsas_softc *sc,
+    struct mrsas_mfi_cmd *cmd);
+
+/*
+ * mrsas_passthru:	Handle pass-through commands
+ * input:			Adapter instance soft state argument pointer
+ *
+ * This function is called from mrsas_ioctl() to handle pass-through and ioctl
+ * commands to Firmware.
+ */
+int
+mrsas_passthru(struct mrsas_softc *sc, void *arg, u_long ioctlCmd)
+{
+	struct mrsas_iocpacket *user_ioc = (struct mrsas_iocpacket *)arg;
+
+#ifdef COMPAT_FREEBSD32
+	struct mrsas_iocpacket32 *user_ioc32 = (struct mrsas_iocpacket32 *)arg;
+
+#endif
+	union mrsas_frame *in_cmd = (union mrsas_frame *)&(user_ioc->frame.raw);
+	struct mrsas_mfi_cmd *cmd = NULL;
+	bus_dma_tag_t ioctl_data_tag[MAX_IOCTL_SGE];
+	bus_dmamap_t ioctl_data_dmamap[MAX_IOCTL_SGE];
+	void *ioctl_data_mem[MAX_IOCTL_SGE];
+	bus_addr_t ioctl_data_phys_addr[MAX_IOCTL_SGE];
+	bus_dma_tag_t ioctl_sense_tag = 0;
+	bus_dmamap_t ioctl_sense_dmamap = 0;
+	void *ioctl_sense_mem = 0;
+	bus_addr_t ioctl_sense_phys_addr = 0;
+	int i, ioctl_data_size = 0, ioctl_sense_size, ret = 0;
+	struct mrsas_sge32 *kern_sge32;
+	unsigned long *sense_ptr;
+	uint8_t *iov_base_ptrin = NULL;
+	size_t iov_len = 0;
+
+	/*
+	 * Check for NOP from MegaCli... MegaCli can issue a DCMD of 0.  In
+	 * this case do nothing and return 0 to it as status.
+	 */
+	if (in_cmd->dcmd.opcode == 0) {
+		device_printf(sc->mrsas_dev, "In %s() Got a NOP\n", __func__);
+		user_ioc->frame.hdr.cmd_status = MFI_STAT_OK;
+		return (0);
+	}
+	/* Validate SGL length */
+	if (user_ioc->sge_count > MAX_IOCTL_SGE) {
+		device_printf(sc->mrsas_dev, "In %s() SGL is too long (%d > 8).\n",
+		    __func__, user_ioc->sge_count);
+		return (ENOENT);
+	}
+	/* Get a command */
+	cmd = mrsas_get_mfi_cmd(sc);
+	if (!cmd) {
+		device_printf(sc->mrsas_dev, "Failed to get a free cmd for IOCTL\n");
+		return (ENOMEM);
+	}
+	/*
+	 * User's IOCTL packet has 2 frames (maximum). Copy those two frames
+	 * into our cmd's frames. cmd->frame's context will get overwritten
+	 * when we copy from user's frames. So set that value alone
+	 * separately
+	 */
+	memcpy(cmd->frame, user_ioc->frame.raw, 2 * MEGAMFI_FRAME_SIZE);
+	cmd->frame->hdr.context = cmd->index;
+	cmd->frame->hdr.pad_0 = 0;
+	cmd->frame->hdr.flags &= ~(MFI_FRAME_IEEE | MFI_FRAME_SGL64 |
+	    MFI_FRAME_SENSE64);
+
+	/*
+	 * The management interface between applications and the fw uses MFI
+	 * frames. E.g, RAID configuration changes, LD property changes etc
+	 * are accomplishes through different kinds of MFI frames. The driver
+	 * needs to care only about substituting user buffers with kernel
+	 * buffers in SGLs. The location of SGL is embedded in the struct
+	 * iocpacket itself.
+	 */
+	kern_sge32 = (struct mrsas_sge32 *)
+	    ((unsigned long)cmd->frame + user_ioc->sgl_off);
+
+	memset(ioctl_data_tag, 0, (sizeof(bus_dma_tag_t) * MAX_IOCTL_SGE));
+	memset(ioctl_data_dmamap, 0, (sizeof(bus_dmamap_t) * MAX_IOCTL_SGE));
+	memset(ioctl_data_mem, 0, (sizeof(void *) * MAX_IOCTL_SGE));
+	memset(ioctl_data_phys_addr, 0, (sizeof(bus_addr_t) * MAX_IOCTL_SGE));
+
+	/*
+	 * For each user buffer, create a mirror buffer and copy in
+	 */
+	for (i = 0; i < user_ioc->sge_count; i++) {
+		if (ioctlCmd == MRSAS_IOC_FIRMWARE_PASS_THROUGH64) {
+			if (!user_ioc->sgl[i].iov_len)
+				continue;
+			ioctl_data_size = user_ioc->sgl[i].iov_len;
+#ifdef COMPAT_FREEBSD32
+		} else {
+			if (!user_ioc32->sgl[i].iov_len)
+				continue;
+			ioctl_data_size = user_ioc32->sgl[i].iov_len;
+#endif
+		}
+		if (bus_dma_tag_create(sc->mrsas_parent_tag,
+		    1, 0,
+		    BUS_SPACE_MAXADDR_32BIT,
+		    BUS_SPACE_MAXADDR,
+		    NULL, NULL,
+		    ioctl_data_size,
+		    1,
+		    ioctl_data_size,
+		    BUS_DMA_ALLOCNOW,
+		    NULL, NULL,
+		    &ioctl_data_tag[i])) {
+			device_printf(sc->mrsas_dev, "Cannot allocate ioctl data tag\n");
+			ret = ENOMEM;
+			goto out;
+		}
+		if (bus_dmamem_alloc(ioctl_data_tag[i], (void **)&ioctl_data_mem[i],
+		    (BUS_DMA_NOWAIT | BUS_DMA_ZERO), &ioctl_data_dmamap[i])) {
+			device_printf(sc->mrsas_dev, "Cannot allocate ioctl data mem\n");
+			ret = ENOMEM;
+			goto out;
+		}
+		if (bus_dmamap_load(ioctl_data_tag[i], ioctl_data_dmamap[i],
+		    ioctl_data_mem[i], ioctl_data_size, mrsas_alloc_cb,
+		    &ioctl_data_phys_addr[i], BUS_DMA_NOWAIT)) {
+			device_printf(sc->mrsas_dev, "Cannot load ioctl data mem\n");
+			ret = ENOMEM;
+			goto out;
+		}
+		/* Save the physical address and length */
+		kern_sge32[i].phys_addr = (u_int32_t)ioctl_data_phys_addr[i];
+
+		if (ioctlCmd == MRSAS_IOC_FIRMWARE_PASS_THROUGH64) {
+			kern_sge32[i].length = user_ioc->sgl[i].iov_len;
+
+			iov_base_ptrin = user_ioc->sgl[i].iov_base;
+			iov_len = user_ioc->sgl[i].iov_len;
+#ifdef COMPAT_FREEBSD32
+		} else {
+			kern_sge32[i].length = user_ioc32->sgl[i].iov_len;
+
+			iov_base_ptrin = PTRIN(user_ioc32->sgl[i].iov_base);
+			iov_len = user_ioc32->sgl[i].iov_len;
+#endif
+		}
+
+		/* Copy in data from user space */
+		ret = copyin(iov_base_ptrin, ioctl_data_mem[i], iov_len);
+		if (ret) {
+			device_printf(sc->mrsas_dev, "IOCTL copyin failed!\n");
+			goto out;
+		}
+	}
+
+	ioctl_sense_size = user_ioc->sense_len;
+
+	if (user_ioc->sense_len) {
+		if (bus_dma_tag_create(sc->mrsas_parent_tag,
+		    1, 0,
+		    BUS_SPACE_MAXADDR_32BIT,
+		    BUS_SPACE_MAXADDR,
+		    NULL, NULL,
+		    ioctl_sense_size,
+		    1,
+		    ioctl_sense_size,
+		    BUS_DMA_ALLOCNOW,
+		    NULL, NULL,
+		    &ioctl_sense_tag)) {
+			device_printf(sc->mrsas_dev, "Cannot allocate ioctl sense tag\n");
+			ret = ENOMEM;
+			goto out;
+		}
+		if (bus_dmamem_alloc(ioctl_sense_tag, (void **)&ioctl_sense_mem,
+		    (BUS_DMA_NOWAIT | BUS_DMA_ZERO), &ioctl_sense_dmamap)) {
+			device_printf(sc->mrsas_dev, "Cannot allocate ioctl sense mem\n");
+			ret = ENOMEM;
+			goto out;
+		}
+		if (bus_dmamap_load(ioctl_sense_tag, ioctl_sense_dmamap,
+		    ioctl_sense_mem, ioctl_sense_size, mrsas_alloc_cb,
+		    &ioctl_sense_phys_addr, BUS_DMA_NOWAIT)) {
+			device_printf(sc->mrsas_dev, "Cannot load ioctl sense mem\n");
+			ret = ENOMEM;
+			goto out;
+		}
+		sense_ptr =
+		    (unsigned long *)((unsigned long)cmd->frame + user_ioc->sense_off);
+		*sense_ptr = ioctl_sense_phys_addr;
+	}
+	/*
+	 * Set the sync_cmd flag so that the ISR knows not to complete this
+	 * cmd to the SCSI mid-layer
+	 */
+	cmd->sync_cmd = 1;
+	ret = mrsas_issue_blocked_cmd(sc, cmd);
+	if (ret == ETIMEDOUT) {
+		mrsas_dprint(sc, MRSAS_OCR,
+		    "IOCTL command is timed out, initiating OCR\n");
+		sc->do_timedout_reset = MFI_DCMD_TIMEOUT_OCR;
+		ret = EAGAIN;
+		goto out;
+	}
+	cmd->sync_cmd = 0;
+
+	/*
+	 * copy out the kernel buffers to user buffers
+	 */
+	for (i = 0; i < user_ioc->sge_count; i++) {
+		if (ioctlCmd == MRSAS_IOC_FIRMWARE_PASS_THROUGH64) {
+			iov_base_ptrin = user_ioc->sgl[i].iov_base;
+			iov_len = user_ioc->sgl[i].iov_len;
+#ifdef COMPAT_FREEBSD32
+		} else {
+			iov_base_ptrin = PTRIN(user_ioc32->sgl[i].iov_base);
+			iov_len = user_ioc32->sgl[i].iov_len;
+#endif
+		}
+
+		ret = copyout(ioctl_data_mem[i], iov_base_ptrin, iov_len);
+		if (ret) {
+			device_printf(sc->mrsas_dev, "IOCTL copyout failed!\n");
+			goto out;
+		}
+	}
+
+	/*
+	 * copy out the sense
+	 */
+	if (user_ioc->sense_len) {
+		/*
+		 * sense_buff points to the location that has the user sense
+		 * buffer address
+		 */
+		sense_ptr = (unsigned long *)((unsigned long)user_ioc->frame.raw +
+		    user_ioc->sense_off);
+		ret = copyout(ioctl_sense_mem, (unsigned long *)*sense_ptr,
+		    user_ioc->sense_len);
+		if (ret) {
+			device_printf(sc->mrsas_dev, "IOCTL sense copyout failed!\n");
+			goto out;
+		}
+	}
+	/*
+	 * Return command status to user space
+	 */
+	memcpy(&user_ioc->frame.hdr.cmd_status, &cmd->frame->hdr.cmd_status,
+	    sizeof(u_int8_t));
+
+out:
+	/*
+	 * Release sense buffer
+	 */
+	if (user_ioc->sense_len) {
+		if (ioctl_sense_phys_addr)
+			bus_dmamap_unload(ioctl_sense_tag, ioctl_sense_dmamap);
+		if (ioctl_sense_mem != NULL)
+			bus_dmamem_free(ioctl_sense_tag, ioctl_sense_mem, ioctl_sense_dmamap);
+		if (ioctl_sense_tag != NULL)
+			bus_dma_tag_destroy(ioctl_sense_tag);
+	}
+	/*
+	 * Release data buffers
+	 */
+	for (i = 0; i < user_ioc->sge_count; i++) {
+		if (ioctlCmd == MRSAS_IOC_FIRMWARE_PASS_THROUGH64) {
+			if (!user_ioc->sgl[i].iov_len)
+				continue;
+#ifdef COMPAT_FREEBSD32
+		} else {
+			if (!user_ioc32->sgl[i].iov_len)
+				continue;
+#endif
+		}
+		if (ioctl_data_phys_addr[i])
+			bus_dmamap_unload(ioctl_data_tag[i], ioctl_data_dmamap[i]);
+		if (ioctl_data_mem[i] != NULL)
+			bus_dmamem_free(ioctl_data_tag[i], ioctl_data_mem[i],
+			    ioctl_data_dmamap[i]);
+		if (ioctl_data_tag[i] != NULL)
+			bus_dma_tag_destroy(ioctl_data_tag[i]);
+	}
+	/* Free command */
+	mrsas_release_mfi_cmd(cmd);
+
+	return (ret);
+}
+
+/*
+ * mrsas_alloc_mfi_cmds:	Allocates the command packets
+ * input:					Adapter instance soft state
+ *
+ * Each IOCTL or passthru command that is issued to the FW are wrapped in a
+ * local data structure called mrsas_mfi_cmd.  The frame embedded in this
+ * mrsas_mfi is issued to FW. The array is used only to look up the
+ * mrsas_mfi_cmd given the context. The free commands are maintained in a
+ * linked list.
+ */
+int
+mrsas_alloc_mfi_cmds(struct mrsas_softc *sc)
+{
+	int i, j;
+	u_int32_t max_cmd;
+	struct mrsas_mfi_cmd *cmd;
+
+	max_cmd = MRSAS_MAX_MFI_CMDS;
+
+	/*
+	 * sc->mfi_cmd_list is an array of struct mrsas_mfi_cmd pointers.
+	 * Allocate the dynamic array first and then allocate individual
+	 * commands.
+	 */
+	sc->mfi_cmd_list = malloc(sizeof(struct mrsas_mfi_cmd *) * max_cmd, M_MRSAS, M_NOWAIT);
+	if (!sc->mfi_cmd_list) {
+		device_printf(sc->mrsas_dev, "Cannot alloc memory for mfi_cmd cmd_list.\n");
+		return (ENOMEM);
+	}
+	memset(sc->mfi_cmd_list, 0, sizeof(struct mrsas_mfi_cmd *) * max_cmd);
+	for (i = 0; i < max_cmd; i++) {
+		sc->mfi_cmd_list[i] = malloc(sizeof(struct mrsas_mfi_cmd),
+		    M_MRSAS, M_NOWAIT);
+		if (!sc->mfi_cmd_list[i]) {
+			for (j = 0; j < i; j++)
+				free(sc->mfi_cmd_list[j], M_MRSAS);
+			free(sc->mfi_cmd_list, M_MRSAS);
+			sc->mfi_cmd_list = NULL;
+			return (ENOMEM);
+		}
+	}
+
+	for (i = 0; i < max_cmd; i++) {
+		cmd = sc->mfi_cmd_list[i];
+		memset(cmd, 0, sizeof(struct mrsas_mfi_cmd));
+		cmd->index = i;
+		cmd->ccb_ptr = NULL;
+		cmd->sc = sc;
+		TAILQ_INSERT_TAIL(&(sc->mrsas_mfi_cmd_list_head), cmd, next);
+	}
+
+	/* create a frame pool and assign one frame to each command */
+	if (mrsas_create_frame_pool(sc)) {
+		device_printf(sc->mrsas_dev, "Cannot allocate DMA frame pool.\n");
+		/* Free the frames */
+		for (i = 0; i < MRSAS_MAX_MFI_CMDS; i++) {
+			cmd = sc->mfi_cmd_list[i];
+			mrsas_free_frame(sc, cmd);
+		}
+		if (sc->mficmd_frame_tag != NULL)
+			bus_dma_tag_destroy(sc->mficmd_frame_tag);
+		return (ENOMEM);
+	}
+	return (0);
+}
+
+/*
+ * mrsas_create_frame_pool:	Creates DMA pool for cmd frames
+ * input:					Adapter soft state
+ *
+ * Each command packet has an embedded DMA memory buffer that is used for
+ * filling MFI frame and the SG list that immediately follows the frame. This
+ * function creates those DMA memory buffers for each command packet by using
+ * PCI pool facility. pad_0 is initialized to 0 to prevent corrupting value
+ * of context and could cause FW crash.
+ */
+static int
+mrsas_create_frame_pool(struct mrsas_softc *sc)
+{
+	int i;
+	struct mrsas_mfi_cmd *cmd;
+
+	if (bus_dma_tag_create(sc->mrsas_parent_tag,
+	    1, 0,
+	    BUS_SPACE_MAXADDR_32BIT,
+	    BUS_SPACE_MAXADDR,
+	    NULL, NULL,
+	    MRSAS_MFI_FRAME_SIZE,
+	    1,
+	    MRSAS_MFI_FRAME_SIZE,
+	    BUS_DMA_ALLOCNOW,
+	    NULL, NULL,
+	    &sc->mficmd_frame_tag)) {
+		device_printf(sc->mrsas_dev, "Cannot create MFI frame tag\n");
+		return (ENOMEM);
+	}
+	for (i = 0; i < MRSAS_MAX_MFI_CMDS; i++) {
+		cmd = sc->mfi_cmd_list[i];
+		cmd->frame = mrsas_alloc_frame(sc, cmd);
+		if (cmd->frame == NULL) {
+			device_printf(sc->mrsas_dev, "Cannot alloc MFI frame memory\n");
+			return (ENOMEM);
+		}
+		/*
+		 * For MFI controllers.
+		 * max_num_sge = 60
+		 * max_sge_sz  = 16 byte (sizeof megasas_sge_skinny)
+		 * Totl 960 byte (15 MFI frame of 64 byte)
+		 *
+		 * Fusion adapter require only 3 extra frame.
+		 * max_num_sge = 16 (defined as MAX_IOCTL_SGE)
+		 * max_sge_sz  = 12 byte (sizeof  megasas_sge64)
+		 * Total 192 byte (3 MFI frame of 64 byte)
+		 */
+		memset(cmd->frame, 0, MRSAS_MFI_FRAME_SIZE);
+		cmd->frame->io.context = cmd->index;
+		cmd->frame->io.pad_0 = 0;
+	}
+
+	return (0);
+}
+
+/*
+ * mrsas_alloc_frame:	Allocates MFI Frames
+ * input:				Adapter soft state
+ *
+ * Create bus DMA memory tag and dmamap and load memory for MFI frames. Returns
+ * virtual memory pointer to allocated region.
+ */
+void   *
+mrsas_alloc_frame(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd)
+{
+	u_int32_t frame_size = MRSAS_MFI_FRAME_SIZE;
+
+	if (bus_dmamem_alloc(sc->mficmd_frame_tag, (void **)&cmd->frame_mem,
+	    BUS_DMA_NOWAIT, &cmd->frame_dmamap)) {
+		device_printf(sc->mrsas_dev, "Cannot alloc MFI frame memory\n");
+		return (NULL);
+	}
+	if (bus_dmamap_load(sc->mficmd_frame_tag, cmd->frame_dmamap,
+	    cmd->frame_mem, frame_size, mrsas_alloc_cb,
+	    &cmd->frame_phys_addr, BUS_DMA_NOWAIT)) {
+		device_printf(sc->mrsas_dev, "Cannot load IO request memory\n");
+		return (NULL);
+	}
+	return (cmd->frame_mem);
+}
+
+/*
+ * mrsas_alloc_cb:	Callback function of bus_dmamap_load()
+ * input:			callback argument,
+ * 					machine dependent type that describes DMA segments,
+ * 					number of segments,
+ * 					error code.
+ *
+ * This function is for the driver to receive mapping information resultant of
+ * the bus_dmamap_load(). The information is actually not being used, but the
+ * address is saved anyway.
+ */
+static void
+mrsas_alloc_cb(void *arg, bus_dma_segment_t *segs,
+    int nsegs, int error)
+{
+	bus_addr_t *addr;
+
+	addr = arg;
+	*addr = segs[0].ds_addr;
+}
+
+/*
+ * mrsas_free_frames:	Frees memory for  MFI frames
+ * input:				Adapter soft state
+ *
+ * Deallocates MFI frames memory.  Called from mrsas_free_mem() during detach
+ * and error case during creation of frame pool.
+ */
+void
+mrsas_free_frame(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd)
+{
+	if (cmd->frame_phys_addr)
+		bus_dmamap_unload(sc->mficmd_frame_tag, cmd->frame_dmamap);
+	if (cmd->frame_mem != NULL)
+		bus_dmamem_free(sc->mficmd_frame_tag, cmd->frame_mem, cmd->frame_dmamap);
+}


Property changes on: trunk/sys/dev/mrsas/mrsas_ioctl.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/mrsas/mrsas_ioctl.h
===================================================================
--- trunk/sys/dev/mrsas/mrsas_ioctl.h	                        (rev 0)
+++ trunk/sys/dev/mrsas/mrsas_ioctl.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,129 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2015, AVAGO Tech. All rights reserved. Author: Marian Choy
+ * Copyright (c) 2014, LSI Corp. All rights reserved. Author: Marian Choy
+ * Support: freebsdraid at avagotech.com
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer. 2. Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution. 3. Neither the name of the
+ * <ORGANIZATION> nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing
+ * official policies,either expressed or implied, of the FreeBSD Project.
+ *
+ * Send feedback to: <megaraidfbsd at avagotech.com> Mail to: AVAGO TECHNOLOGIES, 1621
+ * Barber Lane, Milpitas, CA 95035 ATTN: MegaRaid FreeBSD
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/mrsas/mrsas_ioctl.h 284267 2015-06-11 14:11:41Z kadesai $");
+
+#ifndef MRSAS_IOCTL_H
+#define	MRSAS_IOCTL_H
+
+#ifndef _IOWR
+#include <sys/ioccom.h>
+#endif					/* !_IOWR */
+
+#ifdef COMPAT_FREEBSD32
+/* Compilation error FIX */
+#if (__FreeBSD_version <= 900000)
+#include <sys/socket.h>
+#endif
+#include <sys/mount.h>
+#include <compat/freebsd32/freebsd32.h>
+#endif
+
+/*
+ * We need to use the same values as the mfi driver until MegaCli adds
+ * support for this (mrsas) driver: M is for MegaRAID. (This is typically the
+ * vendor or product initial) 1 arbitrary. (This may be used to segment kinds
+ * of commands.  (1-9 status, 10-20 policy, etc.) struct mrsas_iocpacket
+ * (sizeof() this parameter will be used.) These three values are encoded
+ * into a somewhat unique, 32-bit value.
+ */
+
+#define	MRSAS_IOC_GET_PCI_INFO				_IOR('M', 7, MRSAS_DRV_PCI_INFORMATION)
+#define	MRSAS_IOC_FIRMWARE_PASS_THROUGH64	_IOWR('M', 1, struct mrsas_iocpacket)
+#ifdef COMPAT_FREEBSD32
+#define	MRSAS_IOC_FIRMWARE_PASS_THROUGH32	_IOWR('M', 1, struct mrsas_iocpacket32)
+#endif
+
+#define	MRSAS_IOC_SCAN_BUS		_IO('M',  10)
+
+#define	MRSAS_LINUX_CMD32		0xc1144d01
+
+#define	MAX_IOCTL_SGE			16
+#define	MFI_FRAME_DIR_READ		0x0010
+#define	MFI_CMD_LD_SCSI_IO		0x03
+
+#define	INQUIRY_CMD				0x12
+#define	INQUIRY_CMDLEN			6
+#define	INQUIRY_REPLY_LEN		96
+#define	INQUIRY_VENDOR			8	/* Offset in reply data to
+						 * vendor name */
+#define	SCSI_SENSE_BUFFERSIZE	96
+
+#define	MEGAMFI_RAW_FRAME_SIZE	128
+
+
+#pragma pack(1)
+struct mrsas_iocpacket {
+	u_int16_t host_no;
+	u_int16_t __pad1;
+	u_int32_t sgl_off;
+	u_int32_t sge_count;
+	u_int32_t sense_off;
+	u_int32_t sense_len;
+	union {
+		u_int8_t raw[MEGAMFI_RAW_FRAME_SIZE];
+		struct mrsas_header hdr;
+	}	frame;
+	struct iovec sgl[MAX_IOCTL_SGE];
+};
+
+#pragma pack()
+
+#ifdef COMPAT_FREEBSD32
+#pragma pack(1)
+struct mrsas_iocpacket32 {
+	u_int16_t host_no;
+	u_int16_t __pad1;
+	u_int32_t sgl_off;
+	u_int32_t sge_count;
+	u_int32_t sense_off;
+	u_int32_t sense_len;
+	union {
+		u_int8_t raw[MEGAMFI_RAW_FRAME_SIZE];
+		struct mrsas_header hdr;
+	}	frame;
+	struct iovec32 sgl[MAX_IOCTL_SGE];
+};
+
+#pragma pack()
+#endif					/* COMPAT_FREEBSD32 */
+
+#endif					/* MRSAS_IOCTL_H */


Property changes on: trunk/sys/dev/mrsas/mrsas_ioctl.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/mrsas/mrsas_linux.c
===================================================================
--- trunk/sys/dev/mrsas/mrsas_linux.c	                        (rev 0)
+++ trunk/sys/dev/mrsas/mrsas_linux.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,139 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2015, AVAGO Tech. All rights reserved. Author: Kashyap Desai,
+ * Copyright (c) 2014, LSI Corp. All rights reserved. Author: Kashyap Desai,
+ * Sibananda Sahu Support: freebsdraid at avagotech.com
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer. 2. Redistributions
+ * in binary form must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution. 3. Neither the name of the
+ * <ORGANIZATION> nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing
+ * official policies,either expressed or implied, of the FreeBSD Project.
+ *
+ * Send feedback to: <megaraidfbsd at avagotech.com> Mail to: AVAGO TECHNOLOGIES, 1621
+ * Barber Lane, Milpitas, CA 95035 ATTN: MegaRaid FreeBSD
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/mrsas/mrsas_linux.c 284267 2015-06-11 14:11:41Z kadesai $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#if (__FreeBSD_version > 900000)
+#include <sys/capability.h>
+#endif
+
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <machine/bus.h>
+
+#if defined(__amd64__)			/* Assume amd64 wants 32 bit Linux */
+#include <machine/../linux32/linux.h>
+#include <machine/../linux32/linux32_proto.h>
+#else
+#include <machine/../linux/linux.h>
+#include <machine/../linux/linux_proto.h>
+#endif
+#include <compat/linux/linux_ioctl.h>
+#include <compat/linux/linux_util.h>
+
+#include <dev/mrsas/mrsas.h>
+#include <dev/mrsas/mrsas_ioctl.h>
+
+/* There are multiple ioctl number ranges that need to be handled */
+#define	MRSAS_LINUX_IOCTL_MIN  0x4d00
+#define	MRSAS_LINUX_IOCTL_MAX  0x4d01
+
+static linux_ioctl_function_t mrsas_linux_ioctl;
+static struct linux_ioctl_handler mrsas_linux_handler = {mrsas_linux_ioctl,
+	MRSAS_LINUX_IOCTL_MIN,
+MRSAS_LINUX_IOCTL_MAX};
+
+SYSINIT(mrsas_register, SI_SUB_KLD, SI_ORDER_MIDDLE,
+    linux_ioctl_register_handler, &mrsas_linux_handler);
+SYSUNINIT(mrsas_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE,
+    linux_ioctl_unregister_handler, &mrsas_linux_handler);
+
+static struct linux_device_handler mrsas_device_handler =
+{"mrsas", "megaraid_sas", "mrsas0", "megaraid_sas_ioctl_node", -1, 0, 1};
+
+SYSINIT(mrsas_register2, SI_SUB_KLD, SI_ORDER_MIDDLE,
+    linux_device_register_handler, &mrsas_device_handler);
+SYSUNINIT(mrsas_unregister2, SI_SUB_KLD, SI_ORDER_MIDDLE,
+    linux_device_unregister_handler, &mrsas_device_handler);
+
+static int
+mrsas_linux_modevent(module_t mod __unused, int cmd __unused, void *data __unused)
+{
+	return (0);
+}
+
+/*
+ * mrsas_linux_ioctl:	linux emulator IOCtl commands entry point.
+ *
+ * This function is the entry point for IOCtls from linux binaries.
+ * It calls the mrsas_ioctl function for processing
+ * depending on the IOCTL command received.
+ */
+static int
+mrsas_linux_ioctl(struct thread *p, struct linux_ioctl_args *args)
+{
+#if (__FreeBSD_version >= 1000000)
+	cap_rights_t rights;
+
+#endif
+	struct file *fp;
+	int error;
+	u_long cmd = args->cmd;
+
+	if (cmd != MRSAS_LINUX_CMD32) {
+		error = ENOTSUP;
+		goto END;
+	}
+#if (__FreeBSD_version >= 1000000)
+	error = fget(p, args->fd, cap_rights_init(&rights, CAP_IOCTL), &fp);
+#elif (__FreeBSD_version <= 900000)
+	error = fget(p, args->fd, &fp);
+#else					/* For FreeBSD version greater than
+					 * 9.0.0 but less than 10.0.0 */
+	error = fget(p, args->fd, CAP_IOCTL, &fp);
+#endif
+	if (error != 0)
+		goto END;
+
+	error = fo_ioctl(fp, cmd, (caddr_t)args->arg, p->td_ucred, p);
+	fdrop(fp, p);
+END:
+	return (error);
+}
+
+DEV_MODULE(mrsas_linux, mrsas_linux_modevent, NULL);
+MODULE_DEPEND(mrsas, linux, 1, 1, 1);


Property changes on: trunk/sys/dev/mrsas/mrsas_linux.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/mse/mse.c
===================================================================
--- trunk/sys/dev/mse/mse.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mse/mse.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 M. Warner Losh
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/mse/mse.c 191320 2009-04-20 15:15:24Z ed $
  */
 
 /*

Modified: trunk/sys/dev/mse/mse_cbus.c
===================================================================
--- trunk/sys/dev/mse/mse_cbus.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mse/mse_cbus.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 M. Warner Losh
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/mse/mse_cbus.c 158651 2006-05-16 14:37:58Z phk $
  */
 
 /*-

Modified: trunk/sys/dev/mse/mse_isa.c
===================================================================
--- trunk/sys/dev/mse/mse_isa.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mse/mse_isa.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 M. Warner Losh
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/mse/mse_isa.c 158651 2006-05-16 14:37:58Z phk $
  */
 
 /*-

Modified: trunk/sys/dev/mse/msevar.h
===================================================================
--- trunk/sys/dev/mse/msevar.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mse/msevar.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 M. Warner Losh
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/mse/msevar.h 144280 2005-03-29 09:22:40Z imp $
  */
 
 /*-

Modified: trunk/sys/dev/mxge/eth_z8e.h
===================================================================
--- trunk/sys/dev/mxge/eth_z8e.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/eth_z8e.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*******************************************************************************
 
 Copyright (c) 2006-2012, Myricom Inc.
@@ -25,7 +26,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$MidnightBSD$
+$FreeBSD: stable/10/sys/dev/mxge/eth_z8e.h 236212 2012-05-29 00:53:51Z gallatin $
 ***************************************************************************/
 
 static unsigned int eth_z8e_uncompressed_length = 377284 ;

Modified: trunk/sys/dev/mxge/ethp_z8e.h
===================================================================
--- trunk/sys/dev/mxge/ethp_z8e.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/ethp_z8e.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*******************************************************************************
 
 Copyright (c) 2006-2012, Myricom Inc.
@@ -25,7 +26,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$MidnightBSD$
+$FreeBSD: stable/10/sys/dev/mxge/ethp_z8e.h 236212 2012-05-29 00:53:51Z gallatin $
 ***************************************************************************/
 
 static unsigned int ethp_z8e_uncompressed_length = 387604 ;

Modified: trunk/sys/dev/mxge/if_mxge.c
===================================================================
--- trunk/sys/dev/mxge/if_mxge.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/if_mxge.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /******************************************************************************
 
 Copyright (c) 2006-2013, Myricom Inc.
@@ -28,7 +29,7 @@
 ***************************************************************************/
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mxge/if_mxge.c 329834 2018-02-22 19:40:03Z rpokala $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -127,7 +128,8 @@
   DEVMETHOD(device_attach, mxge_attach),
   DEVMETHOD(device_detach, mxge_detach),
   DEVMETHOD(device_shutdown, mxge_shutdown),
-  {0, 0}
+
+  DEVMETHOD_END
 };
 
 static driver_t mxge_driver =
@@ -1466,15 +1468,15 @@
 	/* random information */
 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 		       "firmware_version",
-		       CTLFLAG_RD, &sc->fw_version,
+		       CTLFLAG_RD, sc->fw_version,
 		       0, "firmware version");
 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 		       "serial_number",
-		       CTLFLAG_RD, &sc->serial_number_string,
+		       CTLFLAG_RD, sc->serial_number_string,
 		       0, "serial number");
 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 		       "product_code",
-		       CTLFLAG_RD, &sc->product_code_string,
+		       CTLFLAG_RD, sc->product_code_string,
 		       0, "product_code");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 		       "pcie_link_width",
@@ -2700,8 +2702,12 @@
 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 		mxge_vlan_tag_remove(m, &csum);
 	}
+	/* flowid only valid if RSS hashing is enabled */
+	if (sc->num_slices > 1) {
+		m->m_pkthdr.flowid = (ss - sc->ss);
+		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
+	}
 	/* if the checksum is valid, mark it in the mbuf header */
-	
 	if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
 	    (0 == mxge_rx_csum(m, csum))) {
 		/* Tell the stack that the  checksum is good */
@@ -2714,11 +2720,6 @@
 			return;
 #endif
 	}
-	/* flowid only valid if RSS hashing is enabled */
-	if (sc->num_slices > 1) {
-		m->m_pkthdr.flowid = (ss - sc->ss);
-		m->m_flags |= M_FLOWID;
-	}
 	/* pass the frame up the stack */
 	(*ifp->if_input)(ifp, m);
 }
@@ -2769,6 +2770,11 @@
 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 		mxge_vlan_tag_remove(m, &csum);
 	}
+	/* flowid only valid if RSS hashing is enabled */
+	if (sc->num_slices > 1) {
+		m->m_pkthdr.flowid = (ss - sc->ss);
+		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
+	}
 	/* if the checksum is valid, mark it in the mbuf header */
 	if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
 	    (0 == mxge_rx_csum(m, csum))) {
@@ -2782,11 +2788,6 @@
 			return;
 #endif
 	}
-	/* flowid only valid if RSS hashing is enabled */
-	if (sc->num_slices > 1) {
-		m->m_pkthdr.flowid = (ss - sc->ss);
-		m->m_flags |= M_FLOWID;
-	}
 	/* pass the frame up the stack */
 	(*ifp->if_input)(ifp, m);
 }
@@ -2946,7 +2947,7 @@
 	}
 
 	for (i = 0; i < 3; i++, ptr++) {
-		ptr = index(ptr, '-');
+		ptr = strchr(ptr, '-');
 		if (ptr == NULL) {
 			device_printf(sc->dev,
 				      "only %d dashes in PC?!?\n", i);
@@ -3138,7 +3139,7 @@
 			sc->link_state = stats->link_up;
 			if (sc->link_state) {
 				if_link_state_change(sc->ifp, LINK_STATE_UP);
-				 sc->ifp->if_baudrate = IF_Gbps(10UL);
+				if_initbaudrate(sc->ifp, IF_Gbps(10));
 				if (mxge_verbose)
 					device_printf(sc->dev, "link up\n");
 			} else {
@@ -3417,7 +3418,7 @@
 		return err;
 	}
 
-	/* now allocate TX resouces */
+	/* now allocate TX resources */
 
 #ifndef IFNET_BUF_RING
 	/* only use a single TX ring for now */
@@ -3826,7 +3827,7 @@
 {
 	device_t dev = sc->dev;
 	int reg;
-	uint16_t cmd, lnk, pectl;
+	uint16_t lnk, pectl;
 
 	/* find the PCIe link width and set max read request to 4KB*/
 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
@@ -3846,9 +3847,6 @@
 
 	/* Enable DMA and Memory space access */
 	pci_enable_busmaster(dev);
-	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
-	cmd |= PCIM_CMD_MEMEN;
-	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
 }
 
 static uint32_t
@@ -4171,11 +4169,6 @@
 
 	err = 0;
 	switch (command) {
-	case SIOCSIFADDR:
-	case SIOCGIFADDR:
-		err = ether_ioctl(ifp, command, data);
-		break;
-
 	case SIOCSIFMTU:
 		err = mxge_change_mtu(sc, ifr->ifr_mtu);
 		break;
@@ -4299,7 +4292,8 @@
                 break;
 
 	default:
-		err = ENOTTY;
+		err = ether_ioctl(ifp, command, data);
+		break;
         }
 	return err;
 }
@@ -4888,7 +4882,7 @@
 		goto abort_with_rings;
 	}
 
-	ifp->if_baudrate = IF_Gbps(10UL);
+	if_initbaudrate(ifp, IF_Gbps(10));
 	ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
 		IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
 		IFCAP_RXCSUM_IPV6;

Modified: trunk/sys/dev/mxge/if_mxge_var.h
===================================================================
--- trunk/sys/dev/mxge/if_mxge_var.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/if_mxge_var.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*******************************************************************************
 
 Copyright (c) 2006-2013, Myricom Inc.
@@ -25,7 +26,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$MidnightBSD$
+$FreeBSD: stable/10/sys/dev/mxge/if_mxge_var.h 247160 2013-02-22 19:23:33Z gallatin $
 
 ***************************************************************************/
 
@@ -50,6 +51,19 @@
 #define IFNET_BUF_RING 1
 #endif
 
+#if (__FreeBSD_version < 1000020)
+#undef IF_Kbps
+#undef IF_Mbps
+#undef IF_Gbps
+#define	IF_Kbps(x)	((uintmax_t)(x) * 1000)	/* kilobits/sec. */
+#define	IF_Mbps(x)	(IF_Kbps((x) * 1000))	/* megabits/sec. */
+#define	IF_Gbps(x)	(IF_Mbps((x) * 1000))	/* gigabits/sec. */
+static __inline void
+if_initbaudrate(struct ifnet *ifp, uintmax_t baud)
+{
+	ifp->if_baudrate = baud;
+}
+#endif
 #ifndef VLAN_CAPABILITIES
 #define VLAN_CAPABILITIES(ifp)
 #define mxge_vlans_active(sc) (sc)->ifp->if_nvlans

Modified: trunk/sys/dev/mxge/mcp_gen_header.h
===================================================================
--- trunk/sys/dev/mxge/mcp_gen_header.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/mcp_gen_header.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*******************************************************************************
 
 Copyright (c) 2006-2007, Myricom Inc.
@@ -25,7 +26,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$MidnightBSD$
+$FreeBSD: stable/10/sys/dev/mxge/mcp_gen_header.h 171405 2007-07-12 16:04:55Z gallatin $
 ***************************************************************************/
 
 #ifndef _mcp_gen_header_h
@@ -51,7 +52,7 @@
    mcp_gen_header_t gen_mcp_header = {
       .header_length = sizeof(mcp_gen_header_t),
       .mcp_type = MCP_TYPE_XXX,
-      .version = "something $Id: mcp_gen_header.h,v 1.2 2013-01-08 03:53:24 laffer1 Exp $",
+      .version = "something $Id: mcp_gen_header.h,v 1.1 2005/12/23 02:10:44 gallatin Exp $",
       .mcp_globals = (unsigned)&Globals
    };
 */

Modified: trunk/sys/dev/mxge/mxge_eth_z8e.c
===================================================================
--- trunk/sys/dev/mxge/mxge_eth_z8e.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/mxge_eth_z8e.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,8 +1,9 @@
+/* $MidnightBSD$ */
 /*
  * from: FreeBSD: src/sys/tools/fw_stub.awk,v 1.6 2007/03/02 11:42:53 flz
  */
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mxge/mxge_eth_z8e.c 241394 2012-10-10 08:36:38Z kevlo $");
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>

Modified: trunk/sys/dev/mxge/mxge_ethp_z8e.c
===================================================================
--- trunk/sys/dev/mxge/mxge_ethp_z8e.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/mxge_ethp_z8e.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,8 +1,9 @@
+/* $MidnightBSD$ */
 /*
  * from: FreeBSD: src/sys/tools/fw_stub.awk,v 1.6 2007/03/02 11:42:53 flz
  */
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mxge/mxge_ethp_z8e.c 241394 2012-10-10 08:36:38Z kevlo $");
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>

Modified: trunk/sys/dev/mxge/mxge_mcp.h
===================================================================
--- trunk/sys/dev/mxge/mxge_mcp.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/mxge_mcp.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*******************************************************************************
 
 Copyright (c) 2006-2009, Myricom Inc.
@@ -25,7 +26,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$MidnightBSD$
+$FreeBSD: stable/10/sys/dev/mxge/mxge_mcp.h 261455 2014-02-04 03:36:42Z eadler $
 ***************************************************************************/
 
 #ifndef _myri10ge_mcp_h
@@ -226,7 +227,7 @@
      a power of 2 number of entries.  */
 
   MXGEFW_CMD_SET_INTRQ_SIZE = 13, 	/* in bytes */
-#define MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK  (1 << 31)
+#define MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK  (1U << 31)
 
   /* command to bring ethernet interface up.  Above parameters
      (plus mtu & mac address) must have been exchanged prior

Modified: trunk/sys/dev/mxge/mxge_rss_eth_z8e.c
===================================================================
--- trunk/sys/dev/mxge/mxge_rss_eth_z8e.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/mxge_rss_eth_z8e.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,8 +1,9 @@
+/* $MidnightBSD$ */
 /*
  * from: FreeBSD: src/sys/tools/fw_stub.awk,v 1.6 2007/03/02 11:42:53 flz
  */
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mxge/mxge_rss_eth_z8e.c 241394 2012-10-10 08:36:38Z kevlo $");
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>

Modified: trunk/sys/dev/mxge/mxge_rss_ethp_z8e.c
===================================================================
--- trunk/sys/dev/mxge/mxge_rss_ethp_z8e.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/mxge_rss_ethp_z8e.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,8 +1,9 @@
+/* $MidnightBSD$ */
 /*
  * from: FreeBSD: src/sys/tools/fw_stub.awk,v 1.6 2007/03/02 11:42:53 flz
  */
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/mxge/mxge_rss_ethp_z8e.c 241394 2012-10-10 08:36:38Z kevlo $");
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>

Modified: trunk/sys/dev/mxge/rss_eth_z8e.h
===================================================================
--- trunk/sys/dev/mxge/rss_eth_z8e.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/rss_eth_z8e.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*******************************************************************************
 
 Copyright (c) 2006-2012, Myricom Inc.
@@ -25,7 +26,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$MidnightBSD$
+$FreeBSD: stable/10/sys/dev/mxge/rss_eth_z8e.h 236212 2012-05-29 00:53:51Z gallatin $
 ***************************************************************************/
 
 static unsigned int rss_eth_z8e_uncompressed_length = 534724 ;

Modified: trunk/sys/dev/mxge/rss_ethp_z8e.h
===================================================================
--- trunk/sys/dev/mxge/rss_ethp_z8e.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/mxge/rss_ethp_z8e.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*******************************************************************************
 
 Copyright (c) 2006-2012, Myricom Inc.
@@ -25,7 +26,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$MidnightBSD$
+$FreeBSD: stable/10/sys/dev/mxge/rss_ethp_z8e.h 236212 2012-05-29 00:53:51Z gallatin $
 ***************************************************************************/
 
 static unsigned int rss_ethp_z8e_uncompressed_length = 544468 ;

Modified: trunk/sys/dev/my/if_my.c
===================================================================
--- trunk/sys/dev/my/if_my.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/my/if_my.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Written by: yen_cw at myson.com.tw
  * Copyright (c) 2002 Myson Technology Inc.
@@ -28,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/my/if_my.c 266921 2014-05-31 11:08:22Z brueffer $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -80,11 +81,6 @@
 
 #include <dev/my/if_myreg.h>
 
-#ifndef lint
-static          const char rcsid[] =
-"$Id: if_my.c,v 1.2 2013-01-08 03:53:24 laffer1 Exp $";
-#endif
-
 /*
  * Various supported device vendors/types and their names.
  */
@@ -153,7 +149,7 @@
 	DEVMETHOD(device_detach, my_detach),
 	DEVMETHOD(device_shutdown, my_shutdown),
 
-	{0, 0}
+	DEVMETHOD_END
 };
 
 static driver_t my_driver = {
@@ -662,10 +658,8 @@
 my_setmode_mii(struct my_softc * sc, int media)
 {
 	u_int16_t       bmcr;
-	struct ifnet   *ifp;
 
 	MY_LOCK_ASSERT(sc);
-	ifp = sc->my_ifp;
 	/*
 	 * If an autoneg session is in progress, stop it.
 	 */
@@ -895,7 +889,6 @@
 	}
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
-	ifp->if_mtu = ETHERMTU;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = my_ioctl;
 	ifp->if_start = my_start;

Modified: trunk/sys/dev/my/if_myreg.h
===================================================================
--- trunk/sys/dev/my/if_myreg.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/my/if_myreg.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002 Myson Technology Inc.
  * All rights reserved.
@@ -25,7 +26,7 @@
  *
  * Written by: yen_cw at myson.com.tw  available at: http://www.myson.com.tw/
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/my/if_myreg.h 199540 2009-11-19 18:43:43Z jhb $
  *
  * Myson MTD80x register definitions.
  *

Added: trunk/sys/dev/nand/nand.c
===================================================================
--- trunk/sys/dev/nand/nand.c	                        (rev 0)
+++ trunk/sys/dev/nand/nand.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,834 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nand.c 259371 2013-12-14 00:54:05Z ian $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/callout.h>
+#include <sys/sysctl.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandbus.h>
+#include <dev/nand/nand_ecc_pos.h>
+#include "nfc_if.h"
+#include "nand_if.h"
+#include "nandbus_if.h"
+#include <machine/stdarg.h>
+
+#define NAND_RESET_DELAY	1000	/* tRST */
+#define NAND_ERASE_DELAY	3000	/* tBERS */
+#define NAND_PROG_DELAY		700	/* tPROG */
+#define NAND_READ_DELAY		50	/* tR */
+
+#define BIT0(x) ((x) & 0x1)
+#define BIT1(x) (BIT0(x >> 1))
+#define BIT2(x) (BIT0(x >> 2))
+#define BIT3(x) (BIT0(x >> 3))
+#define BIT4(x) (BIT0(x >> 4))
+#define BIT5(x) (BIT0(x >> 5))
+#define BIT6(x) (BIT0(x >> 6))
+#define BIT7(x) (BIT0(x >> 7))
+
+#define	SOFTECC_SIZE		256
+#define	SOFTECC_BYTES		3
+
+int nand_debug_flag = 0;
+SYSCTL_INT(_debug, OID_AUTO, nand_debug, CTLFLAG_RW, &nand_debug_flag, 0,
+    "NAND subsystem debug flag");
+
+static void
+nand_tunable_init(void *arg)
+{
+
+	TUNABLE_INT_FETCH("debug.nand", &nand_debug_flag);
+}
+
+SYSINIT(nand_tunables, SI_SUB_VFS, SI_ORDER_ANY, nand_tunable_init, NULL);
+
+MALLOC_DEFINE(M_NAND, "NAND", "NAND dynamic data");
+
+static void calculate_ecc(const uint8_t *, uint8_t *);
+static int correct_ecc(uint8_t *, uint8_t *, uint8_t *);
+
+void
+nand_debug(int level, const char *fmt, ...)
+{
+	va_list ap;
+
+	if (!(nand_debug_flag & level))
+		return;
+	va_start(ap, fmt);
+	vprintf(fmt, ap);
+	va_end(ap);
+	printf("\n");
+}
+
+void
+nand_init(struct nand_softc *nand, device_t dev, int ecc_mode,
+    int ecc_bytes, int ecc_size, uint16_t *eccposition, char *cdev_name)
+{
+
+	nand->ecc.eccmode = ecc_mode;
+	nand->chip_cdev_name = cdev_name;
+
+	if (ecc_mode == NAND_ECC_SOFT) {
+		nand->ecc.eccbytes = SOFTECC_BYTES;
+		nand->ecc.eccsize = SOFTECC_SIZE;
+	} else if (ecc_mode != NAND_ECC_NONE) {
+		nand->ecc.eccbytes = ecc_bytes;
+		nand->ecc.eccsize = ecc_size;
+		if (eccposition)
+			nand->ecc.eccpositions = eccposition;
+	}
+}
+
+void
+nand_onfi_set_params(struct nand_chip *chip, struct onfi_chip_params *params)
+{
+	struct chip_geom *cg;
+
+	cg = &chip->chip_geom;
+
+	init_chip_geom(cg, params->luns, params->blocks_per_lun,
+	    params->pages_per_block, params->bytes_per_page,
+	    params->spare_bytes_per_page);
+	chip->t_bers = params->t_bers;
+	chip->t_prog = params->t_prog;
+	chip->t_r = params->t_r;
+	chip->t_ccs = params->t_ccs;
+
+	if (params->features & ONFI_FEAT_16BIT)
+		chip->flags |= NAND_16_BIT;
+}
+
+void
+nand_set_params(struct nand_chip *chip, struct nand_params *params)
+{
+	struct chip_geom *cg;
+	uint32_t blocks_per_chip;
+
+	cg = &chip->chip_geom;
+	blocks_per_chip = (params->chip_size << 20) /
+	    (params->page_size * params->pages_per_block);
+
+	init_chip_geom(cg, 1, blocks_per_chip,
+	    params->pages_per_block, params->page_size,
+	    params->oob_size);
+
+	chip->t_bers = NAND_ERASE_DELAY;
+	chip->t_prog = NAND_PROG_DELAY;
+	chip->t_r = NAND_READ_DELAY;
+	chip->t_ccs = 0;
+
+	if (params->flags & NAND_16_BIT)
+		chip->flags |= NAND_16_BIT;
+}
+
+int
+nand_init_stat(struct nand_chip *chip)
+{
+	struct block_stat *blk_stat;
+	struct page_stat *pg_stat;
+	struct chip_geom *cg;
+	uint32_t blks, pgs;
+
+	cg = &chip->chip_geom;
+	blks = cg->blks_per_lun * cg->luns;
+	blk_stat = malloc(sizeof(struct block_stat) * blks, M_NAND,
+	    M_WAITOK | M_ZERO);
+	if (!blk_stat)
+		return (ENOMEM);
+
+	pgs = blks * cg->pgs_per_blk;
+	pg_stat = malloc(sizeof(struct page_stat) * pgs, M_NAND,
+	    M_WAITOK | M_ZERO);
+	if (!pg_stat) {
+		free(blk_stat, M_NAND);
+		return (ENOMEM);
+	}
+
+	chip->blk_stat = blk_stat;
+	chip->pg_stat = pg_stat;
+
+	return (0);
+}
+
+void
+nand_destroy_stat(struct nand_chip *chip)
+{
+
+	free(chip->pg_stat, M_NAND);
+	free(chip->blk_stat, M_NAND);
+}
+
+int
+init_chip_geom(struct chip_geom *cg, uint32_t luns, uint32_t blks_per_lun,
+    uint32_t pgs_per_blk, uint32_t pg_size, uint32_t oob_size)
+{
+	int shift;
+
+	if (!cg)
+		return (-1);
+
+	cg->luns = luns;
+	cg->blks_per_lun = blks_per_lun;
+	cg->blks_per_chip = blks_per_lun * luns;
+	cg->pgs_per_blk = pgs_per_blk;
+
+	cg->page_size = pg_size;
+	cg->oob_size = oob_size;
+	cg->block_size = cg->page_size * cg->pgs_per_blk;
+	cg->chip_size = cg->block_size * cg->blks_per_chip;
+
+	shift = fls(cg->pgs_per_blk - 1);
+	cg->pg_mask = (1 << shift) - 1;
+	cg->blk_shift = shift;
+
+	if (cg->blks_per_lun > 0) {
+		shift = fls(cg->blks_per_lun - 1);
+		cg->blk_mask = ((1 << shift) - 1) << cg->blk_shift;
+	} else {
+		shift = 0;
+		cg->blk_mask = 0;
+	}
+
+	cg->lun_shift = shift + cg->blk_shift;
+	shift = fls(cg->luns - 1);
+	cg->lun_mask = ((1 << shift) - 1) << cg->lun_shift;
+
+	nand_debug(NDBG_NAND, "Masks: lun 0x%x blk 0x%x page 0x%x\n"
+	    "Shifts: lun %d blk %d",
+	    cg->lun_mask, cg->blk_mask, cg->pg_mask,
+	    cg->lun_shift, cg->blk_shift);
+
+	return (0);
+}
+
+int
+nand_row_to_blkpg(struct chip_geom *cg, uint32_t row, uint32_t *lun,
+    uint32_t *blk, uint32_t *pg)
+{
+
+	if (!cg || !lun || !blk || !pg)
+		return (-1);
+
+	if (row & ~(cg->lun_mask | cg->blk_mask | cg->pg_mask)) {
+		nand_debug(NDBG_NAND,"Address out of bounds\n");
+		return (-1);
+	}
+
+	*lun = (row & cg->lun_mask) >> cg->lun_shift;
+	*blk = (row & cg->blk_mask) >> cg->blk_shift;
+	*pg = (row & cg->pg_mask);
+
+	nand_debug(NDBG_NAND,"address %x-%x-%x\n", *lun, *blk, *pg);
+
+	return (0);
+}
+
+int page_to_row(struct chip_geom *cg, uint32_t page, uint32_t *row)
+{
+	uint32_t lun, block, pg_in_blk;
+
+	if (!cg || !row)
+		return (-1);
+
+	block = page / cg->pgs_per_blk;
+	pg_in_blk = page % cg->pgs_per_blk;
+
+	lun = block / cg->blks_per_lun;
+	block = block % cg->blks_per_lun;
+
+	*row = (lun << cg->lun_shift) & cg->lun_mask;
+	*row |= ((block << cg->blk_shift) & cg->blk_mask);
+	*row |= (pg_in_blk & cg->pg_mask);
+
+	return (0);
+}
+
+int
+nand_check_page_boundary(struct nand_chip *chip, uint32_t page)
+{
+	struct chip_geom* cg;
+
+	cg = &chip->chip_geom;
+	if (page >= (cg->pgs_per_blk * cg->blks_per_lun * cg->luns)) {
+		nand_debug(NDBG_GEN,"%s: page number too big %#x\n",
+		    __func__, page);
+		return (1);
+	}
+
+	return (0);
+}
+
+void
+nand_get_chip_param(struct nand_chip *chip, struct chip_param_io *param)
+{
+	struct chip_geom *cg;
+
+	cg = &chip->chip_geom;
+	param->page_size = cg->page_size;
+	param->oob_size = cg->oob_size;
+
+	param->blocks = cg->blks_per_lun * cg->luns;
+	param->pages_per_block = cg->pgs_per_blk;
+}
+
+static uint16_t *
+default_software_ecc_positions(struct nand_chip *chip)
+{
+	/* If positions have been set already, use them. */
+	if (chip->nand->ecc.eccpositions)
+		return (chip->nand->ecc.eccpositions);
+
+	/*
+	 * XXX Note that the following logic isn't really sufficient, especially
+	 * in the ONFI case where the number of ECC bytes can be dictated by
+	 * values in the parameters page, and that could lead to needing more
+	 * byte positions than exist within the tables of software-ecc defaults.
+	 */
+	if (chip->chip_geom.oob_size >= 128)
+		return (default_software_ecc_positions_128);
+	if (chip->chip_geom.oob_size >= 64)
+		return (default_software_ecc_positions_64);
+	else if (chip->chip_geom.oob_size >= 16)
+		return (default_software_ecc_positions_16);
+
+	return (NULL);
+}
+
+static void
+calculate_ecc(const uint8_t *buf, uint8_t *ecc)
+{
+	uint8_t p8, byte;
+	int i;
+
+	memset(ecc, 0, 3);
+
+	for (i = 0; i < 256; i++) {
+		byte = buf[i];
+		ecc[0] ^= (BIT0(byte) ^ BIT2(byte) ^ BIT4(byte) ^
+		    BIT6(byte)) << 2;
+		ecc[0] ^= (BIT1(byte) ^ BIT3(byte) ^ BIT5(byte) ^
+		    BIT7(byte)) << 3;
+		ecc[0] ^= (BIT0(byte) ^ BIT1(byte) ^ BIT4(byte) ^
+		    BIT5(byte)) << 4;
+		ecc[0] ^= (BIT2(byte) ^ BIT3(byte) ^ BIT6(byte) ^
+		    BIT7(byte)) << 5;
+		ecc[0] ^= (BIT0(byte) ^ BIT1(byte) ^ BIT2(byte) ^
+		    BIT3(byte)) << 6;
+		ecc[0] ^= (BIT4(byte) ^ BIT5(byte) ^ BIT6(byte) ^
+		    BIT7(byte)) << 7;
+
+		p8 = BIT0(byte) ^ BIT1(byte) ^ BIT2(byte) ^
+		    BIT3(byte) ^ BIT4(byte) ^ BIT5(byte) ^ BIT6(byte) ^
+		    BIT7(byte);
+
+		if (p8) {
+			ecc[2] ^= (0x1 << BIT0(i));
+			ecc[2] ^= (0x4 << BIT1(i));
+			ecc[2] ^= (0x10 << BIT2(i));
+			ecc[2] ^= (0x40 << BIT3(i));
+
+			ecc[1] ^= (0x1 << BIT4(i));
+			ecc[1] ^= (0x4 << BIT5(i));
+			ecc[1] ^= (0x10 << BIT6(i));
+			ecc[1] ^= (0x40 << BIT7(i));
+		}
+	}
+	ecc[0] = ~ecc[0];
+	ecc[1] = ~ecc[1];
+	ecc[2] = ~ecc[2];
+	ecc[0] |= 3;
+}
+
+static int
+correct_ecc(uint8_t *buf, uint8_t *calc_ecc, uint8_t *read_ecc)
+{
+	uint8_t ecc0, ecc1, ecc2, onesnum, bit, byte;
+	uint16_t addr = 0;
+
+	ecc0 = calc_ecc[0] ^ read_ecc[0];
+	ecc1 = calc_ecc[1] ^ read_ecc[1];
+	ecc2 = calc_ecc[2] ^ read_ecc[2];
+
+	if (!ecc0 && !ecc1 && !ecc2)
+		return (ECC_OK);
+
+	addr = BIT3(ecc0) | (BIT5(ecc0) << 1) | (BIT7(ecc0) << 2);
+	addr |= (BIT1(ecc2) << 3) | (BIT3(ecc2) << 4) |
+	    (BIT5(ecc2) << 5) |  (BIT7(ecc2) << 6);
+	addr |= (BIT1(ecc1) << 7) | (BIT3(ecc1) << 8) |
+	    (BIT5(ecc1) << 9) |  (BIT7(ecc1) << 10);
+
+	onesnum = 0;
+	while (ecc0 || ecc1 || ecc2) {
+		if (ecc0 & 1)
+			onesnum++;
+		if (ecc1 & 1)
+			onesnum++;
+		if (ecc2 & 1)
+			onesnum++;
+
+		ecc0 >>= 1;
+		ecc1 >>= 1;
+		ecc2 >>= 1;
+	}
+
+	if (onesnum == 11) {
+		/* Correctable error */
+		bit = addr & 7;
+		byte = addr >> 3;
+		buf[byte] ^= (1 << bit);
+		return (ECC_CORRECTABLE);
+	} else if (onesnum == 1) {
+		/* ECC error */
+		return (ECC_ERROR_ECC);
+	} else {
+		/* Uncorrectable error */
+		return (ECC_UNCORRECTABLE);
+	}
+
+	return (0);
+}
+
+int
+nand_softecc_get(device_t dev, uint8_t *buf, int pagesize, uint8_t *ecc)
+{
+	int steps = pagesize / SOFTECC_SIZE;
+	int i = 0, j = 0;
+
+	for (; i < (steps * SOFTECC_BYTES);
+	    i += SOFTECC_BYTES, j += SOFTECC_SIZE) {
+		calculate_ecc(&buf[j], &ecc[i]);
+	}
+
+	return (0);
+}
+
+int
+nand_softecc_correct(device_t dev, uint8_t *buf, int pagesize,
+    uint8_t *readecc, uint8_t *calcecc)
+{
+	int steps = pagesize / SOFTECC_SIZE;
+	int i = 0, j = 0, ret = 0;
+
+	for (i = 0; i < (steps * SOFTECC_BYTES);
+	    i += SOFTECC_BYTES, j += SOFTECC_SIZE) {
+		ret += correct_ecc(&buf[j], &calcecc[i], &readecc[i]);
+		if (ret < 0)
+			return (ret);
+	}
+
+	return (ret);
+}
+
+static int
+offset_to_page(struct chip_geom *cg, uint32_t offset)
+{
+
+	return (offset / cg->page_size);
+}
+
+int
+nand_read_pages(struct nand_chip *chip, uint32_t offset, void *buf,
+    uint32_t len)
+{
+	struct chip_geom *cg;
+	struct nand_ecc_data *eccd;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	void *oob = NULL;
+	uint8_t *ptr;
+	uint16_t *eccpos = NULL;
+	uint32_t page, num, steps = 0;
+	int i, retval = 0, needwrite;
+
+	nand_debug(NDBG_NAND,"%p read page %x[%x]", chip, offset, len);
+	cg = &chip->chip_geom;
+	eccd = &chip->nand->ecc;
+	page = offset_to_page(cg, offset);
+	num = len / cg->page_size;
+
+	if (eccd->eccmode != NAND_ECC_NONE) {
+		steps = cg->page_size / eccd->eccsize;
+		eccpos = default_software_ecc_positions(chip);
+		oob = malloc(cg->oob_size, M_NAND, M_WAITOK);
+	}
+
+	nandbus = device_get_parent(chip->dev);
+	NANDBUS_LOCK(nandbus);
+	NANDBUS_SELECT_CS(device_get_parent(chip->dev), chip->num);
+
+	ptr = (uint8_t *)buf;
+	while (num--) {
+		pg_stat = &(chip->pg_stat[page]);
+
+		if (NAND_READ_PAGE(chip->dev, page, ptr, cg->page_size, 0)) {
+			retval = ENXIO;
+			break;
+		}
+
+		if (eccd->eccmode != NAND_ECC_NONE) {
+			if (NAND_GET_ECC(chip->dev, ptr, eccd->ecccalculated,
+			    &needwrite)) {
+				retval = ENXIO;
+				break;
+			}
+			nand_debug(NDBG_ECC,"%s: ECC calculated:",
+			    __func__);
+			if (nand_debug_flag & NDBG_ECC)
+				for (i = 0; i < (eccd->eccbytes * steps); i++)
+					printf("%x ", eccd->ecccalculated[i]);
+
+			nand_debug(NDBG_ECC,"\n");
+
+			if (NAND_READ_OOB(chip->dev, page, oob, cg->oob_size,
+			    0)) {
+				retval = ENXIO;
+				break;
+			}
+			for (i = 0; i < (eccd->eccbytes * steps); i++)
+				eccd->eccread[i] = ((uint8_t *)oob)[eccpos[i]];
+
+			nand_debug(NDBG_ECC,"%s: ECC read:", __func__);
+			if (nand_debug_flag & NDBG_ECC)
+				for (i = 0; i < (eccd->eccbytes * steps); i++)
+					printf("%x ", eccd->eccread[i]);
+			nand_debug(NDBG_ECC,"\n");
+
+			retval = NAND_CORRECT_ECC(chip->dev, ptr, eccd->eccread,
+			    eccd->ecccalculated);
+
+			nand_debug(NDBG_ECC, "NAND_CORRECT_ECC() returned %d",
+			    retval);
+
+			if (retval == 0)
+				pg_stat->ecc_stat.ecc_succeded++;
+			else if (retval > 0) {
+				pg_stat->ecc_stat.ecc_corrected += retval;
+				retval = ECC_CORRECTABLE;
+			} else {
+				pg_stat->ecc_stat.ecc_failed++;
+				break;
+			}
+		}
+
+		pg_stat->page_read++;
+		page++;
+		ptr += cg->page_size;
+	}
+
+	NANDBUS_UNLOCK(nandbus);
+
+	if (oob)
+		free(oob, M_NAND);
+
+	return (retval);
+}
+
+int
+nand_read_pages_raw(struct nand_chip *chip, uint32_t offset, void *buf,
+    uint32_t len)
+{
+	struct chip_geom *cg;
+	device_t nandbus;
+	uint8_t *ptr;
+	uint32_t page, num, end, begin = 0, begin_off;
+	int retval = 0;
+
+	cg = &chip->chip_geom;
+	page = offset_to_page(cg, offset);
+	begin_off = offset - page * cg->page_size;
+	if (begin_off) {
+		begin = cg->page_size - begin_off;
+		len -= begin;
+	}
+	num = len / cg->page_size;
+	end = len % cg->page_size;
+
+	nandbus = device_get_parent(chip->dev);
+	NANDBUS_LOCK(nandbus);
+	NANDBUS_SELECT_CS(device_get_parent(chip->dev), chip->num);
+
+	ptr = (uint8_t *)buf;
+	if (begin_off) {
+		if (NAND_READ_PAGE(chip->dev, page, ptr, begin, begin_off)) {
+			NANDBUS_UNLOCK(nandbus);
+			return (ENXIO);
+		}
+
+		page++;
+		ptr += begin;
+	}
+
+	while (num--) {
+		if (NAND_READ_PAGE(chip->dev, page, ptr, cg->page_size, 0)) {
+			NANDBUS_UNLOCK(nandbus);
+			return (ENXIO);
+		}
+
+		page++;
+		ptr += cg->page_size;
+	}
+
+	if (end)
+		if (NAND_READ_PAGE(chip->dev, page, ptr, end, 0)) {
+			NANDBUS_UNLOCK(nandbus);
+			return (ENXIO);
+		}
+
+	NANDBUS_UNLOCK(nandbus);
+
+	return (retval);
+}
+
+
+int
+nand_prog_pages(struct nand_chip *chip, uint32_t offset, uint8_t *buf,
+    uint32_t len)
+{
+	struct chip_geom *cg;
+	struct page_stat *pg_stat;
+	struct nand_ecc_data *eccd;
+	device_t nandbus;
+	uint32_t page, num;
+	uint8_t *oob = NULL;
+	uint16_t *eccpos = NULL;
+	int steps = 0, i, needwrite, err = 0;
+
+	nand_debug(NDBG_NAND,"%p prog page %x[%x]", chip, offset, len);
+
+	eccd = &chip->nand->ecc;
+	cg = &chip->chip_geom;
+	page = offset_to_page(cg, offset);
+	num = len / cg->page_size;
+
+	if (eccd->eccmode != NAND_ECC_NONE) {
+		steps = cg->page_size / eccd->eccsize;
+		oob = malloc(cg->oob_size, M_NAND, M_WAITOK);
+		eccpos = default_software_ecc_positions(chip);
+	}
+
+	nandbus = device_get_parent(chip->dev);
+	NANDBUS_LOCK(nandbus);
+	NANDBUS_SELECT_CS(device_get_parent(chip->dev), chip->num);
+
+	while (num--) {
+		if (NAND_PROGRAM_PAGE(chip->dev, page, buf, cg->page_size, 0)) {
+			err = ENXIO;
+			break;
+		}
+
+		if (eccd->eccmode != NAND_ECC_NONE) {
+			if (NAND_GET_ECC(chip->dev, buf, &eccd->ecccalculated,
+			    &needwrite)) {
+				err = ENXIO;
+				break;
+			}
+			nand_debug(NDBG_ECC,"ECC calculated:");
+			if (nand_debug_flag & NDBG_ECC)
+				for (i = 0; i < (eccd->eccbytes * steps); i++)
+					printf("%x ", eccd->ecccalculated[i]);
+
+			nand_debug(NDBG_ECC,"\n");
+
+			if (needwrite) {
+				if (NAND_READ_OOB(chip->dev, page, oob, cg->oob_size,
+				    0)) {
+					err = ENXIO;
+					break;
+				}
+
+				for (i = 0; i < (eccd->eccbytes * steps); i++)
+					oob[eccpos[i]] = eccd->ecccalculated[i];
+
+				if (NAND_PROGRAM_OOB(chip->dev, page, oob,
+				    cg->oob_size, 0)) {
+					err = ENXIO;
+					break;
+				}
+			}
+		}
+
+		pg_stat = &(chip->pg_stat[page]);
+		pg_stat->page_written++;
+
+		page++;
+		buf += cg->page_size;
+	}
+
+	NANDBUS_UNLOCK(nandbus);
+
+	if (oob)
+		free(oob, M_NAND);
+
+	return (err);
+}
+
+int
+nand_prog_pages_raw(struct nand_chip *chip, uint32_t offset, void *buf,
+    uint32_t len)
+{
+	struct chip_geom *cg;
+	device_t nandbus;
+	uint8_t *ptr;
+	uint32_t page, num, end, begin = 0, begin_off;
+	int retval = 0;
+
+	cg = &chip->chip_geom;
+	page = offset_to_page(cg, offset);
+	begin_off = offset - page * cg->page_size;
+	if (begin_off) {
+		begin = cg->page_size - begin_off;
+		len -= begin;
+	}
+	num = len / cg->page_size;
+	end = len % cg->page_size;
+
+	nandbus = device_get_parent(chip->dev);
+	NANDBUS_LOCK(nandbus);
+	NANDBUS_SELECT_CS(device_get_parent(chip->dev), chip->num);
+
+	ptr = (uint8_t *)buf;
+	if (begin_off) {
+		if (NAND_PROGRAM_PAGE(chip->dev, page, ptr, begin, begin_off)) {
+			NANDBUS_UNLOCK(nandbus);
+			return (ENXIO);
+		}
+
+		page++;
+		ptr += begin;
+	}
+
+	while (num--) {
+		if (NAND_PROGRAM_PAGE(chip->dev, page, ptr, cg->page_size, 0)) {
+			NANDBUS_UNLOCK(nandbus);
+			return (ENXIO);
+		}
+
+		page++;
+		ptr += cg->page_size;
+	}
+
+	if (end)
+		retval = NAND_PROGRAM_PAGE(chip->dev, page, ptr, end, 0);
+
+	NANDBUS_UNLOCK(nandbus);
+
+	return (retval);
+}
+
+int
+nand_read_oob(struct nand_chip *chip, uint32_t page, void *buf,
+    uint32_t len)
+{
+	device_t nandbus;
+	int retval = 0;
+
+	nandbus = device_get_parent(chip->dev);
+	NANDBUS_LOCK(nandbus);
+	NANDBUS_SELECT_CS(device_get_parent(chip->dev), chip->num);
+
+	retval = NAND_READ_OOB(chip->dev, page, buf, len, 0);
+
+	NANDBUS_UNLOCK(nandbus);
+
+	return (retval);
+}
+
+
+int
+nand_prog_oob(struct nand_chip *chip, uint32_t page, void *buf,
+    uint32_t len)
+{
+	device_t nandbus;
+	int retval = 0;
+
+	nandbus = device_get_parent(chip->dev);
+	NANDBUS_LOCK(nandbus);
+	NANDBUS_SELECT_CS(device_get_parent(chip->dev), chip->num);
+
+	retval = NAND_PROGRAM_OOB(chip->dev, page, buf, len, 0);
+
+	NANDBUS_UNLOCK(nandbus);
+
+	return (retval);
+}
+
+int
+nand_erase_blocks(struct nand_chip *chip, off_t offset, size_t len)
+{
+	device_t nandbus;
+	struct chip_geom *cg;
+	uint32_t block, num_blocks;
+	int err = 0;
+
+	cg = &chip->chip_geom;
+	if ((offset % cg->block_size) || (len % cg->block_size))
+		return (EINVAL);
+
+	block = offset / cg->block_size;
+	num_blocks = len / cg->block_size;
+	nand_debug(NDBG_NAND,"%p erase blocks %d[%d]", chip, block, num_blocks);
+
+	nandbus = device_get_parent(chip->dev);
+	NANDBUS_LOCK(nandbus);
+	NANDBUS_SELECT_CS(device_get_parent(chip->dev), chip->num);
+
+	while (num_blocks--) {
+		if (!nand_check_bad_block(chip, block)) {
+			if (NAND_ERASE_BLOCK(chip->dev, block)) {
+				nand_debug(NDBG_NAND,"%p erase blocks %d error",
+				    chip, block);
+				nand_mark_bad_block(chip, block);
+				err = ENXIO;
+			}
+		} else
+			err = ENXIO;
+
+		block++;
+	};
+
+	NANDBUS_UNLOCK(nandbus);
+
+	if (err)
+		nand_update_bbt(chip);
+
+	return (err);
+}
+
+MODULE_VERSION(nand, 1);


Property changes on: trunk/sys/dev/nand/nand.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nand.h
===================================================================
--- trunk/sys/dev/nand/nand.h	                        (rev 0)
+++ trunk/sys/dev/nand/nand.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,414 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nand.h 259371 2013-12-14 00:54:05Z ian $
+ */
+
+#ifndef _DEV_NAND_H_
+#define _DEV_NAND_H_
+
+#include <sys/bus.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+#include <sys/taskqueue.h>
+#include <sys/queue.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+
+#include <dev/nand/nand_dev.h>
+
+MALLOC_DECLARE(M_NAND);
+
+/* Read commands */
+#define NAND_CMD_READ		0x00
+#define NAND_CMD_CHNG_READ_COL	0x05
+#define NAND_CMD_READ_END	0x30
+#define NAND_CMD_READ_CACHE	0x31
+#define NAND_CMD_READ_CPBK	0x35
+#define NAND_CMD_READ_CACHE_END	0x3F
+#define	NAND_CMD_CHNG_READ_COL_END	0xE0
+
+/* Erase commands */
+#define NAND_CMD_ERASE		0x60
+#define NAND_CMD_ERASE_END	0xD0
+#define NAND_CMD_ERASE_INTLV	0xD1
+
+/* Program commands */
+#define NAND_CMD_PROG		0x80
+#define NAND_CMD_CHNG_WRITE_COL	0x85
+#define NAND_CMD_PROG_END	0x10
+#define NAND_CMD_PROG_INTLV	0x11
+#define NAND_CMD_PROG_CACHE	0x15
+
+/* Misc commands */
+#define NAND_CMD_STATUS		0x70
+#define NAND_CMD_STATUS_ENH	0x78
+#define NAND_CMD_READ_ID	0x90
+#define NAND_CMD_READ_PARAMETER	0xec
+#define NAND_CMD_READ_UNIQUE_ID	0xed
+#define NAND_CMD_GET_FEATURE	0xee
+#define NAND_CMD_SET_FEATURE	0xef
+
+/* Reset commands */
+#define NAND_CMD_SYNCH_RESET	0xfc
+#define NAND_CMD_RESET		0xff
+
+/* Small page flash commands */
+#define NAND_CMD_SMALLA		0x00
+#define NAND_CMD_SMALLB		0x01
+#define NAND_CMD_SMALLOOB	0x50
+
+#define NAND_STATUS_FAIL	0x1
+#define NAND_STATUS_FAILC	0x2
+#define NAND_STATUS_ARDY	0x20
+#define NAND_STATUS_RDY		0x40
+#define NAND_STATUS_WP		0x80
+
+#define NAND_LP_OOB_COLUMN_START	0x800
+#define NAND_LP_OOBSZ			0x40
+#define NAND_SP_OOB_COLUMN_START	0x200
+#define NAND_SP_OOBSZ			0x10
+
+#define PAGE_PARAM_LENGTH		0x100
+#define PAGE_PARAMETER_DEF		0x0
+#define PAGE_PARAMETER_RED_1		0x100
+#define PAGE_PARAMETER_RED_2		0x200
+
+#define ONFI_SIG_ADDR	0x20
+
+#define NAND_MAX_CHIPS	0x4
+#define NAND_MAX_OOBSZ	512
+#define NAND_MAX_PAGESZ	16384
+
+#define NAND_SMALL_PAGE_SIZE	0x200
+
+#define NAND_16_BIT		0x00000001
+
+#define NAND_ECC_NONE			0x0
+#define NAND_ECC_SOFT			0x1
+#define	NAND_ECC_FULLHW			0x2
+#define	NAND_ECC_PARTHW			0x4
+#define NAND_ECC_MODE_MASK		0x7
+
+#define ECC_OK			0
+#define ECC_CORRECTABLE		1
+#define ECC_ERROR_ECC		(-1)
+#define ECC_UNCORRECTABLE	(-2)
+
+#define NAND_MAN_SAMSUNG		0xec
+#define NAND_MAN_HYNIX			0xad
+#define NAND_MAN_STMICRO		0x20
+#define NAND_MAN_MICRON			0x2c
+
+struct nand_id {
+	uint8_t man_id;
+	uint8_t dev_id;
+};
+
+struct nand_params {
+	struct nand_id	id;
+	char		*name;
+	uint32_t	chip_size;
+	uint32_t	page_size;
+	uint32_t	oob_size;
+	uint32_t	pages_per_block;
+	uint32_t	flags;
+};
+
+/* nand debug levels */
+#define NDBG_NAND	0x01
+#define NDBG_CDEV	0x02
+#define NDBG_GEN	0x04
+#define NDBG_GEOM	0x08
+#define NDBG_BUS	0x10
+#define NDBG_SIM	0x20
+#define NDBG_CTRL	0x40
+#define NDBG_DRV	0x80
+#define NDBG_ECC	0x100
+
+/* nand_debug_function */
+void nand_debug(int level, const char *fmt, ...);
+extern int nand_debug_flag;
+
+/* ONFI features bit*/
+#define ONFI_FEAT_16BIT		0x01
+#define ONFI_FEAT_MULT_LUN	0x02
+#define ONFI_FEAT_INTLV_OPS	0x04
+#define ONFI_FEAT_CPBK_RESTRICT	0x08
+#define ONFI_FEAT_SRC_SYNCH	0x10
+
+/* ONFI optional commands bits */
+#define ONFI_OPTCOM_PROG_CACHE	0x01
+#define ONFI_OPTCOM_READ_CACHE	0x02
+#define ONFI_OPTCOM_GETSET_FEAT	0x04
+#define ONFI_OPTCOM_STATUS_ENH	0x08
+#define ONFI_OPTCOM_COPYBACK	0x10
+#define ONFI_OPTCOM_UNIQUE_ID	0x20
+
+
+/* Layout of parameter page is defined in ONFI */
+struct onfi_params {
+	char		signature[4];
+	uint16_t	rev;
+	uint16_t	features;
+	uint16_t	optional_commands;
+	uint8_t		primary_advanced_command;
+	uint8_t		res1;
+	uint16_t	extended_parameter_page_length;
+	uint8_t		parameter_page_count;
+	uint8_t		res2[17];
+	char		manufacturer_name[12];
+	char		device_model[20];
+	uint8_t		manufacturer_id;
+	uint8_t		manufacture_date_yy;
+	uint8_t		manufacture_date_ww;
+	uint8_t		res3[13];
+	uint32_t	bytes_per_page;
+	uint16_t	spare_bytes_per_page;
+	uint32_t	bytes_per_partial_page;
+	uint16_t	spare_bytes_per_partial_page;
+	uint32_t	pages_per_block;
+	uint32_t	blocks_per_lun;
+	uint8_t		luns;
+	uint8_t		address_cycles;
+	uint8_t		bits_per_cell;
+	uint16_t	max_bad_block_per_lun;
+	uint16_t	block_endurance;
+	uint8_t		guaranteed_valid_blocks;
+	uint16_t	valid_block_endurance;
+	uint8_t		programs_per_page;
+	uint8_t		partial_prog_attr;
+	uint8_t		bits_of_ecc;
+	uint8_t		interleaved_addr_bits;
+	uint8_t		interleaved_oper_attr;
+	uint8_t		eznand_support;
+	uint8_t		res4[12];
+	uint8_t		pin_capacitance;
+	uint16_t	asynch_timing_mode_support;
+	uint16_t	asynch_prog_cache_timing_mode_support;
+	uint16_t	t_prog;	/* us, max page program time */
+	uint16_t	t_bers;	/* us, max block erase time */
+	uint16_t	t_r;	/* us, max page read time */
+	uint16_t	t_ccs;	/* ns, min change column setup time */
+	uint16_t	source_synch_timing_mode_support;
+	uint8_t		source_synch_feat;
+	uint16_t	clk_input_capacitance;
+	uint16_t	io_capacitance;
+	uint16_t	input_capacitance;
+	uint8_t		input_capacitance_max;
+	uint8_t		driver_strength_support;
+	uint16_t	t_r_interleaved;
+	uint16_t	t_adl;
+	uint16_t	t_r_eznand;
+	uint8_t		nv_ddr2_features;
+	uint8_t		nv_ddr2_warmup_cycles;
+	uint8_t		res5[4];
+	uint16_t	vendor_rev;
+	uint8_t		vendor_spec[88];
+	uint16_t	crc;
+}__attribute__((packed));
+CTASSERT(sizeof(struct onfi_params) == 256);
+
+struct onfi_chip_params {
+	uint32_t blocks_per_lun;
+	uint32_t pages_per_block;
+	uint32_t bytes_per_page;
+	uint32_t spare_bytes_per_page;
+	uint16_t t_bers;
+	uint16_t t_prog;
+	uint16_t t_r;
+	uint16_t t_ccs;
+	uint16_t features;
+	uint8_t address_cycles;
+	uint8_t luns;
+};
+
+struct nand_ecc_data {
+	int	eccsize;		/* Number of data bytes per ECC step */
+	int	eccmode;
+	int	eccbytes;		/* Number of ECC bytes per step */
+
+	uint16_t	*eccpositions;		/* Positions of ecc bytes */
+	uint8_t	ecccalculated[NAND_MAX_OOBSZ];
+	uint8_t	eccread[NAND_MAX_OOBSZ];
+};
+
+struct ecc_stat {
+	uint32_t ecc_succeded;
+	uint32_t ecc_corrected;
+	uint32_t ecc_failed;
+};
+
+struct page_stat {
+	struct ecc_stat	ecc_stat;
+	uint32_t	page_read;
+	uint32_t	page_raw_read;
+	uint32_t	page_written;
+	uint32_t	page_raw_written;
+};
+
+struct block_stat {
+	uint32_t block_erased;
+};
+
+struct chip_geom {
+	uint32_t	chip_size;
+	uint32_t	block_size;
+	uint32_t	page_size;
+	uint32_t	oob_size;
+
+	uint32_t	luns;
+	uint32_t	blks_per_lun;
+	uint32_t	blks_per_chip;
+	uint32_t	pgs_per_blk;
+
+	uint32_t	pg_mask;
+	uint32_t	blk_mask;
+	uint32_t	lun_mask;
+	uint8_t		blk_shift;
+	uint8_t		lun_shift;
+};
+
+struct nand_chip {
+	device_t		dev;
+	struct nand_id		id;
+	struct chip_geom	chip_geom;
+
+	uint16_t		t_prog;	/* us, max page program time */
+	uint16_t		t_bers;	/* us, max block erase time */
+	uint16_t		t_r;	/* us, max page read time */
+	uint16_t		t_ccs;	/* ns, min change column setup time */
+	uint8_t			num;
+	uint8_t			flags;
+
+	struct page_stat	*pg_stat;
+	struct block_stat	*blk_stat;
+	struct nand_softc	*nand;
+	struct nand_bbt		*bbt;
+	struct nand_ops		*ops;
+	struct cdev		*cdev;
+
+	struct disk		*ndisk;
+	struct disk		*rdisk;
+	struct bio_queue_head	bioq;	/* bio queue */
+	struct mtx		qlock;	/* bioq lock */
+	struct taskqueue	*tq;	/* private task queue for i/o request */
+	struct task		iotask;	/* i/o processing */
+
+};
+
+struct nand_softc {
+	uint8_t			flags;
+
+	char			*chip_cdev_name;
+	struct nand_ecc_data	ecc;
+};
+
+/* NAND ops */
+int nand_erase_blocks(struct nand_chip *chip, off_t offset, size_t len);
+int nand_prog_pages(struct nand_chip *chip, uint32_t offset, uint8_t *buf,
+    uint32_t len);
+int nand_read_pages(struct nand_chip *chip, uint32_t offset, void *buf,
+    uint32_t len);
+int nand_read_pages_raw(struct nand_chip *chip, uint32_t offset, void *buf,
+    uint32_t len);
+int nand_prog_pages_raw(struct nand_chip *chip, uint32_t offset, void *buf,
+    uint32_t len);
+int nand_read_oob(struct nand_chip *chip, uint32_t page, void *buf,
+    uint32_t len);
+int nand_prog_oob(struct nand_chip *chip, uint32_t page, void *buf,
+    uint32_t len);
+
+int nand_select_cs(device_t dev, uint8_t cs);
+
+int nand_read_parameter(struct nand_softc *nand, struct onfi_params *param);
+int nand_synch_reset(struct nand_softc *nand);
+int nand_chng_read_col(device_t dev, uint32_t col, void *buf, size_t len);
+int nand_chng_write_col(device_t dev, uint32_t col, void *buf, size_t len);
+int nand_get_feature(device_t dev, uint8_t feat, void* buf);
+int nand_set_feature(device_t dev, uint8_t feat, void* buf);
+
+
+int nand_erase_block_intlv(device_t dev, uint32_t block);
+int nand_copyback_read(device_t dev, uint32_t page, uint32_t col,
+    void *buf, size_t len);
+int nand_copyback_prog(device_t dev, uint32_t page, uint32_t col,
+    void *buf, size_t len);
+int nand_copyback_prog_intlv(device_t dev, uint32_t page);
+int nand_prog_cache(device_t dev, uint32_t page, uint32_t col,
+    void *buf, size_t len, uint8_t end);
+int nand_prog_intlv(device_t dev, uint32_t page, uint32_t col,
+    void *buf, size_t len);
+int nand_read_cache(device_t dev, uint32_t page, uint32_t col,
+    void *buf, size_t len, uint8_t end);
+
+int nand_write_ecc(struct nand_softc *nand, uint32_t page, uint8_t *data);
+int nand_read_ecc(struct nand_softc *nand, uint32_t page, uint8_t *data);
+
+int nand_softecc_get(device_t dev, uint8_t *buf, int pagesize, uint8_t *ecc);
+int nand_softecc_correct(device_t dev, uint8_t *buf, int pagesize,
+    uint8_t *readecc, uint8_t *calcecc);
+
+/* Chip initialization */
+void nand_init(struct nand_softc *nand, device_t dev, int ecc_mode,
+    int ecc_bytes, int ecc_size, uint16_t* eccposition, char* cdev_name);
+void nand_detach(struct nand_softc *nand);
+struct nand_params *nand_get_params(struct nand_id *id);
+
+void nand_onfi_set_params(struct nand_chip *chip, struct onfi_chip_params *params);
+void nand_set_params(struct nand_chip *chip, struct nand_params *params);
+int  nand_init_stat(struct nand_chip *chip);
+void nand_destroy_stat(struct nand_chip *chip);
+
+/* BBT */
+int nand_init_bbt(struct nand_chip *chip);
+void nand_destroy_bbt(struct nand_chip *chip);
+int nand_update_bbt(struct nand_chip *chip);
+int nand_mark_bad_block(struct nand_chip* chip, uint32_t block_num);
+int nand_check_bad_block(struct nand_chip* chip, uint32_t block_num);
+
+/* cdev creation/removal */
+int  nand_make_dev(struct nand_chip* chip);
+void nand_destroy_dev(struct nand_chip *chip);
+
+int  create_geom_disk(struct nand_chip* chip);
+int  create_geom_raw_disk(struct nand_chip *chip);
+void destroy_geom_disk(struct nand_chip *chip);
+void destroy_geom_raw_disk(struct nand_chip *chip);
+
+int init_chip_geom(struct chip_geom* cg, uint32_t luns, uint32_t blks_per_lun,
+    uint32_t pgs_per_blk, uint32_t pg_size, uint32_t oob_size);
+int nand_row_to_blkpg(struct chip_geom *cg, uint32_t row, uint32_t *lun,
+    uint32_t *blk, uint32_t *pg);
+int page_to_row(struct chip_geom *cg, uint32_t page, uint32_t *row);
+int nand_check_page_boundary(struct nand_chip *chip, uint32_t page);
+void nand_get_chip_param(struct nand_chip *chip, struct chip_param_io *param);
+
+#endif /* _DEV_NAND_H_ */


Property changes on: trunk/sys/dev/nand/nand.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nand_bbt.c
===================================================================
--- trunk/sys/dev/nand/nand_bbt.c	                        (rev 0)
+++ trunk/sys/dev/nand/nand_bbt.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,274 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nand_bbt.c 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+
+#include <dev/nand/nand.h>
+
+#include "nand_if.h"
+
+#define BBT_PRIMARY_PATTERN	0x01020304
+#define BBT_SECONDARY_PATTERN	0x05060708
+
+enum bbt_place {
+	BBT_NONE,
+	BBT_PRIMARY,
+	BBT_SECONDARY
+};
+
+struct nand_bbt {
+	struct nand_chip	*chip;
+	uint32_t		primary_map;
+	uint32_t		secondary_map;
+	enum bbt_place		active;
+	struct bbt_header	*hdr;
+	uint32_t		tab_len;
+	uint32_t		*table;
+};
+
+struct bbt_header {
+	uint32_t pattern;
+	int32_t seq_nr;
+};
+
+static int nand_bbt_save(struct nand_bbt *);
+static int nand_bbt_load_hdr(struct nand_bbt *, struct bbt_header *, int8_t);
+static int nand_bbt_load_table(struct nand_bbt *);
+static int nand_bbt_prescan(struct nand_bbt *);
+
+int
+nand_init_bbt(struct nand_chip *chip)
+{
+	struct chip_geom *cg;
+	struct nand_bbt *bbt;
+	int err;
+
+	cg = &chip->chip_geom;
+
+	bbt = malloc(sizeof(struct nand_bbt), M_NAND, M_ZERO | M_WAITOK);
+	if (!bbt) {
+		device_printf(chip->dev,
+		    "Cannot allocate memory for bad block struct");
+		return (ENOMEM);
+	}
+
+	bbt->chip = chip;
+	bbt->active = BBT_NONE;
+	bbt->primary_map = cg->chip_size - cg->block_size;
+	bbt->secondary_map = cg->chip_size - 2 * cg->block_size;
+	bbt->tab_len = cg->blks_per_chip * sizeof(uint32_t);
+	bbt->hdr = malloc(sizeof(struct bbt_header) + bbt->tab_len, M_NAND,
+	    M_WAITOK);
+	if (!bbt->hdr) {
+		device_printf(chip->dev, "Cannot allocate %d bytes for BB "
+		    "Table", bbt->tab_len);
+		free(bbt, M_NAND);
+		return (ENOMEM);
+	}
+	bbt->hdr->seq_nr = 0;
+	bbt->table = (uint32_t *)((uint8_t *)bbt->hdr +
+	    sizeof(struct bbt_header));
+
+	err = nand_bbt_load_table(bbt);
+	if (err) {
+		free(bbt->table, M_NAND);
+		free(bbt, M_NAND);
+		return (err);
+	}
+
+	chip->bbt = bbt;
+	if (bbt->active == BBT_NONE) {
+		bbt->active = BBT_PRIMARY;
+		memset(bbt->table, 0xff, bbt->tab_len);
+		nand_bbt_prescan(bbt);
+		nand_bbt_save(bbt);
+	} else
+		device_printf(chip->dev, "Found BBT table for chip\n");
+
+	return (0);
+}
+
+void
+nand_destroy_bbt(struct nand_chip *chip)
+{
+
+	if (chip->bbt) {
+		nand_bbt_save(chip->bbt);
+
+		free(chip->bbt->hdr, M_NAND);
+		free(chip->bbt, M_NAND);
+		chip->bbt = NULL;
+	}
+}
+
+int
+nand_update_bbt(struct nand_chip *chip)
+{
+
+	nand_bbt_save(chip->bbt);
+
+	return (0);
+}
+
+static int
+nand_bbt_save(struct nand_bbt *bbt)
+{
+	enum bbt_place next;
+	uint32_t addr;
+	int32_t err;
+
+	if (bbt->active == BBT_PRIMARY) {
+		addr = bbt->secondary_map;
+		bbt->hdr->pattern = BBT_SECONDARY_PATTERN;
+		next = BBT_SECONDARY;
+	} else {
+		addr = bbt->primary_map;
+		bbt->hdr->pattern = BBT_PRIMARY_PATTERN;
+		next = BBT_PRIMARY;
+	}
+
+	err = nand_erase_blocks(bbt->chip, addr,
+	    bbt->chip->chip_geom.block_size);
+	if (err)
+		return (err);
+
+	bbt->hdr->seq_nr++;
+
+	err = nand_prog_pages_raw(bbt->chip, addr, bbt->hdr,
+	    bbt->tab_len + sizeof(struct bbt_header));
+	if (err)
+		return (err);
+
+	bbt->active = next;
+	return (0);
+}
+
+static int
+nand_bbt_load_hdr(struct nand_bbt *bbt, struct bbt_header *hdr, int8_t primary)
+{
+	uint32_t addr;
+
+	if (primary)
+		addr = bbt->primary_map;
+	else
+		addr = bbt->secondary_map;
+
+	return (nand_read_pages_raw(bbt->chip, addr, hdr,
+	    sizeof(struct bbt_header)));
+}
+
+static int
+nand_bbt_load_table(struct nand_bbt *bbt)
+{
+	struct bbt_header hdr1, hdr2;
+	uint32_t address = 0;
+	int err = 0;
+
+	bzero(&hdr1, sizeof(hdr1));
+	bzero(&hdr2, sizeof(hdr2));
+
+	nand_bbt_load_hdr(bbt, &hdr1, 1);
+	if (hdr1.pattern == BBT_PRIMARY_PATTERN) {
+		bbt->active = BBT_PRIMARY;
+		address = bbt->primary_map;
+	} else
+		bzero(&hdr1, sizeof(hdr1));
+
+
+	nand_bbt_load_hdr(bbt, &hdr2, 0);
+	if ((hdr2.pattern == BBT_SECONDARY_PATTERN) &&
+	    (hdr2.seq_nr > hdr1.seq_nr)) {
+		bbt->active = BBT_SECONDARY;
+		address = bbt->secondary_map;
+	} else
+		bzero(&hdr2, sizeof(hdr2));
+
+	if (bbt->active != BBT_NONE)
+		err = nand_read_pages_raw(bbt->chip, address, bbt->hdr,
+		    bbt->tab_len + sizeof(struct bbt_header));
+
+	return (err);
+}
+
+static int
+nand_bbt_prescan(struct nand_bbt *bbt)
+{
+	int32_t i;
+	uint8_t bad;
+	bool printed_hash = 0;
+
+	device_printf(bbt->chip->dev, "No BBT found. Prescan chip...\n");
+	for (i = 0; i < bbt->chip->chip_geom.blks_per_chip; i++) {
+		if (NAND_IS_BLK_BAD(bbt->chip->dev, i, &bad))
+			return (ENXIO);
+
+		if (bad) {
+			device_printf(bbt->chip->dev, "Bad block(%d)\n", i);
+			bbt->table[i] = 0x0FFFFFFF;
+		}
+		if (!(i % 100)) {
+			printf("#");
+			printed_hash = 1;
+		}
+	}
+
+	if (printed_hash)
+		printf("\n");
+
+	return (0);
+}
+
+int
+nand_check_bad_block(struct nand_chip *chip, uint32_t block_number)
+{
+
+	if (!chip || !chip->bbt)
+		return (0);
+
+	if ((chip->bbt->table[block_number] & 0xF0000000) == 0)
+		return (1);
+
+	return (0);
+}
+
+int
+nand_mark_bad_block(struct nand_chip *chip, uint32_t block_number)
+{
+
+	chip->bbt->table[block_number] = 0x0FFFFFFF;
+
+	return (0);
+}


Property changes on: trunk/sys/dev/nand/nand_bbt.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nand_cdev.c
===================================================================
--- trunk/sys/dev/nand/nand_cdev.c	                        (rev 0)
+++ trunk/sys/dev/nand/nand_cdev.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,453 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nand_cdev.c 258554 2013-11-25 15:34:57Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/uio.h>
+#include <sys/bio.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandbus.h>
+#include <dev/nand/nand_dev.h>
+#include "nand_if.h"
+#include "nandbus_if.h"
+
+static int nand_page_stat(struct nand_chip *, struct page_stat_io *);
+static int nand_block_stat(struct nand_chip *, struct block_stat_io *);
+
+static d_ioctl_t nand_ioctl;
+static d_open_t nand_open;
+static d_strategy_t nand_strategy;
+
+static struct cdevsw nand_cdevsw = {
+	.d_version	= D_VERSION,
+	.d_name		= "nand",
+	.d_open		= nand_open,
+	.d_read		= physread,
+	.d_write	= physwrite,
+	.d_ioctl	= nand_ioctl,
+	.d_strategy =	nand_strategy,
+};
+
+static int
+offset_to_page(struct chip_geom *cg, uint32_t offset)
+{
+
+	return (offset / cg->page_size);
+}
+
+static int
+offset_to_page_off(struct chip_geom *cg, uint32_t offset)
+{
+
+	return (offset % cg->page_size);
+}
+
+int
+nand_make_dev(struct nand_chip *chip)
+{
+	struct nandbus_ivar *ivar;
+	device_t parent, nandbus;
+	int parent_unit, unit;
+	char *name;
+
+	ivar = device_get_ivars(chip->dev);
+	nandbus = device_get_parent(chip->dev);
+
+	if (ivar->chip_cdev_name) {
+		name = ivar->chip_cdev_name;
+
+		/*
+		 * If we got distinct name for chip device we can enumarete it
+		 * based on contoller number.
+		 */
+		parent = device_get_parent(nandbus);
+	} else {
+		name = "nand";
+		parent = nandbus;
+	}
+
+	parent_unit = device_get_unit(parent);
+	unit = parent_unit * 4 + chip->num;
+	chip->cdev = make_dev(&nand_cdevsw, unit, UID_ROOT, GID_WHEEL,
+	    0666, "%s%d.%d", name, parent_unit, chip->num);
+
+	if (chip->cdev == NULL)
+		return (ENXIO);
+
+	if (bootverbose)
+		device_printf(chip->dev, "Created cdev %s%d.%d for chip "
+		    "[0x%0x, 0x%0x]\n", name, parent_unit, chip->num,
+		    ivar->man_id, ivar->dev_id);
+
+	chip->cdev->si_drv1 = chip;
+
+	return (0);
+}
+
+void
+nand_destroy_dev(struct nand_chip *chip)
+{
+
+	if (chip->cdev)
+		destroy_dev(chip->cdev);
+}
+
+static int
+nand_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+
+	return (0);
+}
+
+static int
+nand_read(struct nand_chip *chip, uint32_t offset, void *buf, uint32_t len)
+{
+	struct chip_geom *cg;
+	device_t nandbus;
+	int start_page, count, off, err = 0;
+	uint8_t *ptr, *tmp;
+
+	nand_debug(NDBG_CDEV, "Read from chip%d [%p] at %d\n", chip->num,
+	    chip, offset);
+
+	nandbus = device_get_parent(chip->dev);
+	NANDBUS_LOCK(nandbus);
+	NANDBUS_SELECT_CS(device_get_parent(chip->dev), chip->num);
+
+	cg = &chip->chip_geom;
+	start_page = offset_to_page(cg, offset);
+	off = offset_to_page_off(cg, offset);
+	count = (len > cg->page_size - off) ? cg->page_size - off : len;
+
+	ptr = (uint8_t *)buf;
+	while (len > 0) {
+		if (len < cg->page_size) {
+			tmp = malloc(cg->page_size, M_NAND, M_WAITOK);
+			if (!tmp) {
+				err = ENOMEM;
+				break;
+			}
+			err = NAND_READ_PAGE(chip->dev, start_page,
+			    tmp, cg->page_size, 0);
+			if (err) {
+				free(tmp, M_NAND);
+				break;
+			}
+			bcopy(tmp + off, ptr, count);
+			free(tmp, M_NAND);
+		} else {
+			err = NAND_READ_PAGE(chip->dev, start_page,
+			    ptr, cg->page_size, 0);
+			if (err)
+				break;
+		}
+
+		len -= count;
+		start_page++;
+		ptr += count;
+		count = (len > cg->page_size) ? cg->page_size : len;
+		off = 0;
+	}
+
+	NANDBUS_UNLOCK(nandbus);
+	return (err);
+}
+
+static int
+nand_write(struct nand_chip *chip, uint32_t offset, void* buf, uint32_t len)
+{
+	struct chip_geom *cg;
+	device_t nandbus;
+	int off, start_page, err = 0;
+	uint8_t *ptr;
+
+	nand_debug(NDBG_CDEV, "Write to chip %d [%p] at %d\n", chip->num,
+	    chip, offset);
+
+	nandbus = device_get_parent(chip->dev);
+	NANDBUS_LOCK(nandbus);
+	NANDBUS_SELECT_CS(device_get_parent(chip->dev), chip->num);
+
+	cg = &chip->chip_geom;
+	start_page = offset_to_page(cg, offset);
+	off = offset_to_page_off(cg, offset);
+
+	if (off != 0 || (len % cg->page_size) != 0) {
+		printf("Not aligned write start [0x%08x] size [0x%08x]\n",
+		    off, len);
+		NANDBUS_UNLOCK(nandbus);
+		return (EINVAL);
+	}
+
+	ptr = (uint8_t *)buf;
+	while (len > 0) {
+		err = NAND_PROGRAM_PAGE(chip->dev, start_page, ptr,
+		    cg->page_size, 0);
+		if (err)
+			break;
+
+		len -= cg->page_size;
+		start_page++;
+		ptr += cg->page_size;
+	}
+
+	NANDBUS_UNLOCK(nandbus);
+	return (err);
+}
+
+static void
+nand_strategy(struct bio *bp)
+{
+	struct nand_chip *chip;
+	struct cdev *dev;
+	int err = 0;
+
+	dev = bp->bio_dev;
+	chip = dev->si_drv1;
+
+	nand_debug(NDBG_CDEV, "Strategy %s on chip %d [%p]\n",
+	    (bp->bio_cmd & BIO_READ) == BIO_READ ? "READ" : "WRITE",
+	    chip->num, chip);
+
+	if ((bp->bio_cmd & BIO_READ) == BIO_READ) {
+		err = nand_read(chip,
+		    bp->bio_offset & 0xffffffff,
+		    bp->bio_data, bp->bio_bcount);
+	} else {
+		err = nand_write(chip,
+		    bp->bio_offset & 0xffffffff,
+		    bp->bio_data, bp->bio_bcount);
+	}
+
+	if (err == 0)
+		bp->bio_resid = 0;
+	else {
+		bp->bio_error = EIO;
+		bp->bio_flags |= BIO_ERROR;
+		bp->bio_resid = bp->bio_bcount;
+	}
+
+	biodone(bp);
+}
+
+static int
+nand_oob_access(struct nand_chip *chip, uint32_t page, uint32_t offset,
+    uint32_t len, uint8_t *data, uint8_t write)
+{
+	struct chip_geom *cg;
+	uint8_t *buf = NULL;
+	int ret = 0;
+
+	cg = &chip->chip_geom;
+
+	buf = malloc(cg->oob_size, M_NAND, M_WAITOK);
+	if (!buf)
+		return (ENOMEM);
+
+	memset(buf, 0xff, cg->oob_size);
+
+	if (!write) {
+		ret = nand_read_oob(chip, page, buf, cg->oob_size);
+		copyout(buf, data, len);
+	} else {
+		copyin(data, buf, len);
+		ret = nand_prog_oob(chip, page, buf, cg->oob_size);
+	}
+
+	free(buf, M_NAND);
+
+	return (ret);
+}
+
+static int
+nand_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
+    struct thread *td)
+{
+	struct nand_chip *chip;
+	struct chip_geom  *cg;
+	struct nand_oob_rw *oob_rw = NULL;
+	struct nand_raw_rw *raw_rw = NULL;
+	device_t nandbus;
+	size_t bufsize = 0, len = 0;
+	size_t raw_size;
+	off_t off;
+	uint8_t *buf = NULL;
+	int ret = 0;
+	uint8_t status;
+
+	chip = (struct nand_chip *)dev->si_drv1;
+	cg = &chip->chip_geom;
+	nandbus = device_get_parent(chip->dev);
+
+	if ((cmd == NAND_IO_RAW_READ) || (cmd == NAND_IO_RAW_PROG)) {
+		raw_rw = (struct nand_raw_rw *)data;
+		raw_size =  cg->pgs_per_blk * (cg->page_size + cg->oob_size);
+
+		/* Check if len is not bigger than chip size */
+		if (raw_rw->len > raw_size)
+			return (EFBIG);
+
+		/*
+		 * Do not ask for too much memory, in case of large transfers
+		 * read/write in 16-pages chunks
+		 */
+		bufsize = 16 * (cg->page_size + cg->oob_size);
+		if (raw_rw->len < bufsize)
+			bufsize = raw_rw->len;
+
+		buf = malloc(bufsize, M_NAND, M_WAITOK);
+		len = raw_rw->len;
+		off = 0;
+	}
+	switch(cmd) {
+	case NAND_IO_ERASE:
+		ret = nand_erase_blocks(chip, ((off_t *)data)[0],
+		    ((off_t *)data)[1]);
+		break;
+
+	case NAND_IO_OOB_READ:
+		oob_rw = (struct nand_oob_rw *)data;
+		ret = nand_oob_access(chip, oob_rw->page, 0,
+		    oob_rw->len, oob_rw->data, 0);
+		break;
+
+	case NAND_IO_OOB_PROG:
+		oob_rw = (struct nand_oob_rw *)data;
+		ret = nand_oob_access(chip, oob_rw->page, 0,
+		    oob_rw->len, oob_rw->data, 1);
+		break;
+
+	case NAND_IO_GET_STATUS:
+		NANDBUS_LOCK(nandbus);
+		ret = NANDBUS_GET_STATUS(nandbus, &status);
+		if (ret == 0)
+			*(uint8_t *)data = status;
+		NANDBUS_UNLOCK(nandbus);
+		break;
+
+	case NAND_IO_RAW_PROG:
+		while (len > 0) {
+			if (len < bufsize)
+				bufsize = len;
+			ret = copyin(raw_rw->data + off, buf, bufsize);
+			if (ret)
+				break;
+			ret = nand_prog_pages_raw(chip, raw_rw->off + off, buf,
+			    bufsize);
+			if (ret)
+				break;
+			len -= bufsize;
+			off += bufsize;
+		}
+		break;
+
+	case NAND_IO_RAW_READ:
+		while (len > 0) {
+			if (len < bufsize)
+				bufsize = len;
+
+			ret = nand_read_pages_raw(chip, raw_rw->off + off, buf,
+			    bufsize);
+			if (ret)
+				break;
+
+			ret = copyout(buf, raw_rw->data + off, bufsize);
+			if (ret)
+				break;
+			len -= bufsize;
+			off += bufsize;
+		}
+		break;
+
+	case NAND_IO_PAGE_STAT:
+		ret = nand_page_stat(chip, (struct page_stat_io *)data);
+		break;
+
+	case NAND_IO_BLOCK_STAT:
+		ret = nand_block_stat(chip, (struct block_stat_io *)data);
+		break;
+
+	case NAND_IO_GET_CHIP_PARAM:
+		nand_get_chip_param(chip, (struct chip_param_io *)data);
+		break;
+
+	default:
+		printf("Unknown nand_ioctl request \n");
+		ret = EIO;
+	}
+
+	if (buf)
+		free(buf, M_NAND);
+
+	return (ret);
+}
+
+static int
+nand_page_stat(struct nand_chip *chip, struct page_stat_io *page_stat)
+{
+	struct chip_geom *cg;
+	struct page_stat *stat;
+	int num_pages;
+
+	cg = &chip->chip_geom;
+	num_pages = cg->pgs_per_blk * cg->blks_per_lun * cg->luns;
+	if (page_stat->page_num >= num_pages)
+		return (EINVAL);
+
+	stat = &chip->pg_stat[page_stat->page_num];
+	page_stat->page_read = stat->page_read;
+	page_stat->page_written = stat->page_written;
+	page_stat->page_raw_read = stat->page_raw_read;
+	page_stat->page_raw_written = stat->page_raw_written;
+	page_stat->ecc_succeded = stat->ecc_stat.ecc_succeded;
+	page_stat->ecc_corrected = stat->ecc_stat.ecc_corrected;
+	page_stat->ecc_failed = stat->ecc_stat.ecc_failed;
+
+	return (0);
+}
+
+static int
+nand_block_stat(struct nand_chip *chip, struct block_stat_io *block_stat)
+{
+	struct chip_geom *cg;
+	uint32_t block_num = block_stat->block_num;
+
+	cg = &chip->chip_geom;
+	if (block_num >= cg->blks_per_lun * cg->luns)
+		return (EINVAL);
+
+	block_stat->block_erased = chip->blk_stat[block_num].block_erased;
+
+	return (0);
+}


Property changes on: trunk/sys/dev/nand/nand_cdev.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nand_dev.h
===================================================================
--- trunk/sys/dev/nand/nand_dev.h	                        (rev 0)
+++ trunk/sys/dev/nand/nand_dev.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,91 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nand_dev.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _DEV_NAND_CDEV_H_
+#define _DEV_NAND_CDEV_H_
+
+#include <sys/ioccom.h>
+#include <sys/param.h>
+
+struct nand_raw_rw {
+	off_t	off;
+	off_t	len;
+	uint8_t	*data;
+};
+
+struct nand_oob_rw {
+	uint32_t	page;
+	off_t		len;
+	uint8_t		*data;
+};
+
+#define NAND_IOCTL_GROUP	'N'
+#define NAND_IO_ERASE		_IOWR(NAND_IOCTL_GROUP, 0x0, off_t[2])
+
+#define NAND_IO_OOB_READ	_IOWR(NAND_IOCTL_GROUP, 0x1, struct nand_oob_rw)
+
+#define NAND_IO_OOB_PROG	_IOWR(NAND_IOCTL_GROUP, 0x2, struct nand_oob_rw)
+
+#define NAND_IO_RAW_READ	_IOWR(NAND_IOCTL_GROUP, 0x3, struct nand_raw_rw)
+
+#define NAND_IO_RAW_PROG	_IOWR(NAND_IOCTL_GROUP, 0x4, struct nand_raw_rw)
+
+#define NAND_IO_GET_STATUS	_IOWR(NAND_IOCTL_GROUP, 0x5, uint8_t)
+
+struct page_stat_io {
+	uint32_t	page_num;
+	uint32_t	page_read;
+	uint32_t	page_written;
+	uint32_t	page_raw_read;
+	uint32_t	page_raw_written;
+	uint32_t	ecc_succeded;
+	uint32_t	ecc_corrected;
+	uint32_t	ecc_failed;
+};
+#define NAND_IO_PAGE_STAT	_IOWR(NAND_IOCTL_GROUP, 0x6, \
+    struct page_stat_io)
+
+struct block_stat_io {
+	uint32_t	block_num;
+	uint32_t	block_erased;
+};
+#define NAND_IO_BLOCK_STAT	_IOWR(NAND_IOCTL_GROUP, 0x7, \
+    struct block_stat_io)
+
+struct chip_param_io {
+	uint32_t	page_size;
+	uint32_t	oob_size;
+
+	uint32_t	blocks;
+	uint32_t	pages_per_block;
+};
+#define NAND_IO_GET_CHIP_PARAM	_IOWR(NAND_IOCTL_GROUP, 0x8, \
+    struct chip_param_io)
+
+#endif /* _DEV_NAND_CDEV_H_ */


Property changes on: trunk/sys/dev/nand/nand_dev.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nand_ecc_pos.h
===================================================================
--- trunk/sys/dev/nand/nand_ecc_pos.h	                        (rev 0)
+++ trunk/sys/dev/nand/nand_ecc_pos.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,57 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nand_ecc_pos.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _DEV_NAND_ECC_POS_H_
+#define _DEV_NAND_ECC_POS_H_
+
+static uint16_t default_software_ecc_positions_16[] = {2, 0, 1, 7, 4, 6};
+
+static uint16_t default_software_ecc_positions_64[] = {
+
+	42, 40, 41, 45, 43, 44, 48, 46,
+	47, 51, 49, 50, 54, 52, 53, 57,
+	55, 56, 60, 58, 59, 63, 61, 62
+};
+
+static uint16_t default_software_ecc_positions_128[] = {
+	8, 9, 10, 11, 12, 13,
+	18, 19, 20, 21, 22, 23,
+	28, 29, 30, 31, 32, 33,
+	38, 39, 40, 41, 42, 43,
+	48, 49, 50, 51, 52, 53,
+	58, 59, 60, 61, 62, 63,
+	68, 69, 70, 71, 72, 73,
+	78, 79, 80, 81, 82, 83,
+	88, 89, 90, 91, 92, 93,
+	98, 99, 100, 101, 102, 103,
+	108, 109, 110, 111, 112, 113,
+	118, 119, 120, 121, 122, 123,
+};
+#endif /* _DEV_NAND_ECC_POS_H_ */
+


Property changes on: trunk/sys/dev/nand/nand_ecc_pos.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nand_generic.c
===================================================================
--- trunk/sys/dev/nand/nand_generic.c	                        (rev 0)
+++ trunk/sys/dev/nand/nand_generic.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,1375 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Generic NAND driver */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nand_generic.c 285599 2015-07-15 11:58:30Z brueffer $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/time.h>
+#include <sys/malloc.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandbus.h>
+#include "nfc_if.h"
+#include "nand_if.h"
+#include "nandbus_if.h"
+
+
+static int onfi_nand_probe(device_t dev);
+static int large_nand_probe(device_t dev);
+static int small_nand_probe(device_t dev);
+static int generic_nand_attach(device_t dev);
+static int generic_nand_detach(device_t dev);
+
+static int generic_erase_block(device_t, uint32_t);
+static int generic_erase_block_intlv(device_t, uint32_t);
+static int generic_read_page (device_t, uint32_t, void *, uint32_t, uint32_t);
+static int generic_read_oob(device_t, uint32_t, void *, uint32_t, uint32_t);
+static int generic_program_page(device_t, uint32_t, void *, uint32_t, uint32_t);
+static int generic_program_page_intlv(device_t, uint32_t, void *, uint32_t,
+    uint32_t);
+static int generic_program_oob(device_t, uint32_t, void *, uint32_t, uint32_t);
+static int generic_is_blk_bad(device_t, uint32_t, uint8_t *);
+static int generic_get_ecc(device_t, void *, void *, int *);
+static int generic_correct_ecc(device_t, void *, void *, void *);
+
+static int small_read_page(device_t, uint32_t, void *, uint32_t, uint32_t);
+static int small_read_oob(device_t, uint32_t, void *, uint32_t, uint32_t);
+static int small_program_page(device_t, uint32_t, void *, uint32_t, uint32_t);
+static int small_program_oob(device_t, uint32_t, void *, uint32_t, uint32_t);
+
+static int onfi_is_blk_bad(device_t, uint32_t, uint8_t *);
+static int onfi_read_parameter(struct nand_chip *, struct onfi_chip_params *);
+
+static int nand_send_address(device_t, int32_t, int32_t, int8_t);
+
+static device_method_t onand_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,			onfi_nand_probe),
+	DEVMETHOD(device_attach,		generic_nand_attach),
+	DEVMETHOD(device_detach,		generic_nand_detach),
+
+	DEVMETHOD(nand_read_page,		generic_read_page),
+	DEVMETHOD(nand_program_page,		generic_program_page),
+	DEVMETHOD(nand_program_page_intlv,	generic_program_page_intlv),
+	DEVMETHOD(nand_read_oob,		generic_read_oob),
+	DEVMETHOD(nand_program_oob,		generic_program_oob),
+	DEVMETHOD(nand_erase_block,		generic_erase_block),
+	DEVMETHOD(nand_erase_block_intlv,	generic_erase_block_intlv),
+
+	DEVMETHOD(nand_is_blk_bad,		onfi_is_blk_bad),
+	DEVMETHOD(nand_get_ecc,			generic_get_ecc),
+	DEVMETHOD(nand_correct_ecc,		generic_correct_ecc),
+	{ 0, 0 }
+};
+
+static device_method_t lnand_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		large_nand_probe),
+	DEVMETHOD(device_attach,	generic_nand_attach),
+	DEVMETHOD(device_detach,	generic_nand_detach),
+
+	DEVMETHOD(nand_read_page,	generic_read_page),
+	DEVMETHOD(nand_program_page,	generic_program_page),
+	DEVMETHOD(nand_read_oob,	generic_read_oob),
+	DEVMETHOD(nand_program_oob,	generic_program_oob),
+	DEVMETHOD(nand_erase_block,	generic_erase_block),
+
+	DEVMETHOD(nand_is_blk_bad,	generic_is_blk_bad),
+	DEVMETHOD(nand_get_ecc,		generic_get_ecc),
+	DEVMETHOD(nand_correct_ecc,	generic_correct_ecc),
+	{ 0, 0 }
+};
+
+static device_method_t snand_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		small_nand_probe),
+	DEVMETHOD(device_attach,	generic_nand_attach),
+	DEVMETHOD(device_detach,	generic_nand_detach),
+
+	DEVMETHOD(nand_read_page,	small_read_page),
+	DEVMETHOD(nand_program_page,	small_program_page),
+	DEVMETHOD(nand_read_oob,	small_read_oob),
+	DEVMETHOD(nand_program_oob,	small_program_oob),
+	DEVMETHOD(nand_erase_block,	generic_erase_block),
+
+	DEVMETHOD(nand_is_blk_bad,	generic_is_blk_bad),
+	DEVMETHOD(nand_get_ecc,		generic_get_ecc),
+	DEVMETHOD(nand_correct_ecc,	generic_correct_ecc),
+	{ 0, 0 }
+};
+
+devclass_t onand_devclass;
+devclass_t lnand_devclass;
+devclass_t snand_devclass;
+
+driver_t onand_driver = {
+	"onand",
+	onand_methods,
+	sizeof(struct nand_chip)
+};
+
+driver_t lnand_driver = {
+	"lnand",
+	lnand_methods,
+	sizeof(struct nand_chip)
+};
+
+driver_t snand_driver = {
+	"snand",
+	snand_methods,
+	sizeof(struct nand_chip)
+};
+
+DRIVER_MODULE(onand, nandbus, onand_driver, onand_devclass, 0, 0);
+DRIVER_MODULE(lnand, nandbus, lnand_driver, lnand_devclass, 0, 0);
+DRIVER_MODULE(snand, nandbus, snand_driver, snand_devclass, 0, 0);
+
+static int
+onfi_nand_probe(device_t dev)
+{
+	struct nandbus_ivar *ivar;
+
+	ivar = device_get_ivars(dev);
+	if (ivar && ivar->is_onfi) {
+		device_set_desc(dev, "ONFI compliant NAND");
+		return (BUS_PROBE_DEFAULT);
+	}
+
+	return (ENODEV);
+}
+
+static int
+large_nand_probe(device_t dev)
+{
+	struct nandbus_ivar *ivar;
+
+	ivar = device_get_ivars(dev);
+	if (ivar && !ivar->is_onfi && ivar->params->page_size >= 512) {
+		device_set_desc(dev, ivar->params->name);
+		return (BUS_PROBE_DEFAULT);
+	}
+
+	return (ENODEV);
+}
+
+static int
+small_nand_probe(device_t dev)
+{
+	struct nandbus_ivar *ivar;
+
+	ivar = device_get_ivars(dev);
+	if (ivar && !ivar->is_onfi && ivar->params->page_size == 512) {
+		device_set_desc(dev, ivar->params->name);
+		return (BUS_PROBE_DEFAULT);
+	}
+
+	return (ENODEV);
+}
+
+static int
+generic_nand_attach(device_t dev)
+{
+	struct nand_chip *chip;
+	struct nandbus_ivar *ivar;
+	struct onfi_chip_params *onfi_chip_params;
+	device_t nandbus, nfc;
+	int err;
+
+	chip = device_get_softc(dev);
+	chip->dev = dev;
+
+	ivar = device_get_ivars(dev);
+	chip->id.man_id = ivar->man_id;
+	chip->id.dev_id = ivar->dev_id;
+	chip->num = ivar->cs;
+
+	/* TODO remove when HW ECC supported */
+	nandbus = device_get_parent(dev);
+	nfc = device_get_parent(nandbus);
+
+	chip->nand = device_get_softc(nfc);
+
+	if (ivar->is_onfi) {
+		onfi_chip_params = malloc(sizeof(struct onfi_chip_params),
+		    M_NAND, M_WAITOK | M_ZERO);
+		if (onfi_chip_params == NULL)
+			return (ENOMEM);
+
+		if (onfi_read_parameter(chip, onfi_chip_params)) {
+			nand_debug(NDBG_GEN,"Could not read parameter page!\n");
+			free(onfi_chip_params, M_NAND);
+			return (ENXIO);
+		}
+
+		nand_onfi_set_params(chip, onfi_chip_params);
+		/* Set proper column and row cycles */
+		ivar->cols = (onfi_chip_params->address_cycles >> 4) & 0xf;
+		ivar->rows = onfi_chip_params->address_cycles & 0xf;
+		free(onfi_chip_params, M_NAND);
+
+	} else {
+		nand_set_params(chip, ivar->params);
+	}
+
+	err = nand_init_stat(chip);
+	if (err) {
+		generic_nand_detach(dev);
+		return (err);
+	}
+
+	err = nand_init_bbt(chip);
+	if (err) {
+		generic_nand_detach(dev);
+		return (err);
+	}
+
+	err = nand_make_dev(chip);
+	if (err) {
+		generic_nand_detach(dev);
+		return (err);
+	}
+
+	err = create_geom_disk(chip);
+	if (err) {
+		generic_nand_detach(dev);
+		return (err);
+	}
+
+	return (0);
+}
+
+static int
+generic_nand_detach(device_t dev)
+{
+	struct nand_chip *chip;
+
+	chip = device_get_softc(dev);
+
+	nand_destroy_bbt(chip);
+	destroy_geom_disk(chip);
+	nand_destroy_dev(chip);
+	nand_destroy_stat(chip);
+
+	return (0);
+}
+
+static int
+can_write(device_t nandbus)
+{
+	uint8_t status;
+
+	if (NANDBUS_WAIT_READY(nandbus, &status))
+		return (0);
+
+	if (!(status & NAND_STATUS_WP)) {
+		nand_debug(NDBG_GEN,"Chip is write-protected");
+		return (0);
+	}
+
+	return (1);
+}
+
+static int
+check_fail(device_t nandbus)
+{
+	uint8_t status;
+
+	NANDBUS_WAIT_READY(nandbus, &status);
+	if (status & NAND_STATUS_FAIL) {
+		nand_debug(NDBG_GEN,"Status failed %x", status);
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+static uint16_t
+onfi_crc(const void *buf, size_t buflen)
+{
+	int i, j;
+	uint16_t crc;
+	const uint8_t *bufptr;
+
+	bufptr = buf;
+	crc = 0x4f4e;
+	for (j = 0; j < buflen; j++) {
+		crc ^= *bufptr++ << 8;
+		for (i = 0; i < 8; i++)
+			if (crc & 0x8000)
+				crc = (crc << 1) ^ 0x8005;
+			else
+				crc <<= 1;
+	}
+       return crc;
+}
+
+static int
+onfi_read_parameter(struct nand_chip *chip, struct onfi_chip_params *chip_params)
+{
+	device_t nandbus;
+	struct onfi_params params;
+	int found, sigcount, trycopy;
+
+	nand_debug(NDBG_GEN,"read parameter");
+
+	nandbus = device_get_parent(chip->dev);
+
+	NANDBUS_SELECT_CS(nandbus, chip->num);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_READ_PARAMETER))
+		return (ENXIO);
+
+	if (nand_send_address(chip->dev, -1, -1, PAGE_PARAMETER_DEF))
+		return (ENXIO);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	/*
+	 * XXX Bogus DELAY, we really need a nandbus_wait_ready() here, but it's
+	 * not accessible from here (static to nandbus).
+	 */
+	DELAY(1000);
+
+	/*
+	 * The ONFI spec mandates a minimum of three copies of the parameter
+	 * data, so loop up to 3 times trying to find good data.  Each copy is
+	 * validated by a signature of "ONFI" and a crc. There is a very strange
+	 * rule that the signature is valid if any 2 of the 4 bytes are correct.
+	 */
+	for (found= 0, trycopy = 0; !found && trycopy < 3; trycopy++) {
+		NANDBUS_READ_BUFFER(nandbus, &params, sizeof(struct onfi_params));
+		sigcount  = params.signature[0] == 'O';
+		sigcount += params.signature[1] == 'N';
+		sigcount += params.signature[2] == 'F';
+		sigcount += params.signature[3] == 'I';
+		if (sigcount < 2)
+			continue;
+		if (onfi_crc(&params, 254) != params.crc)
+			continue;
+		found = 1;
+	}
+	if (!found)
+		return (ENXIO);
+
+	chip_params->luns = params.luns;
+	chip_params->blocks_per_lun = le32dec(&params.blocks_per_lun);
+	chip_params->pages_per_block = le32dec(&params.pages_per_block);
+	chip_params->bytes_per_page = le32dec(&params.bytes_per_page);
+	chip_params->spare_bytes_per_page = le16dec(&params.spare_bytes_per_page);
+	chip_params->t_bers = le16dec(&params.t_bers);
+	chip_params->t_prog = le16dec(&params.t_prog);
+	chip_params->t_r = le16dec(&params.t_r);
+	chip_params->t_ccs = le16dec(&params.t_ccs);
+	chip_params->features = le16dec(&params.features);
+	chip_params->address_cycles = params.address_cycles;
+
+	return (0);
+}
+
+static int
+send_read_page(device_t nand, uint8_t start_command, uint8_t end_command,
+    uint32_t row, uint32_t column)
+{
+	device_t nandbus = device_get_parent(nand);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, start_command))
+		return (ENXIO);
+
+	if (nand_send_address(nand, row, column, -1))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, end_command))
+		return (ENXIO);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+
+static int
+generic_read_page(device_t nand, uint32_t page, void *buf, uint32_t len,
+    uint32_t offset)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"%p raw read page %x[%x] at %x", nand, page, len, offset);
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (send_read_page(nand, NAND_CMD_READ, NAND_CMD_READ_END, row,
+	    offset))
+		return (ENXIO);
+
+	DELAY(chip->t_r);
+
+	NANDBUS_READ_BUFFER(nandbus, buf, len);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_read++;
+
+	return (0);
+}
+
+static int
+generic_read_oob(device_t nand, uint32_t page, void* buf, uint32_t len,
+    uint32_t offset)
+{
+	struct nand_chip *chip;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"%p raw read oob %x[%x] at %x", nand, page, len, offset);
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (nand_check_page_boundary(chip, page)) {
+		nand_debug(NDBG_GEN,"page boundary check failed: %08x\n", page);
+		return (ENXIO);
+	}
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	offset += chip->chip_geom.page_size;
+
+	if (send_read_page(nand, NAND_CMD_READ, NAND_CMD_READ_END, row,
+	    offset))
+		return (ENXIO);
+
+	DELAY(chip->t_r);
+
+	NANDBUS_READ_BUFFER(nandbus, buf, len);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+
+static int
+send_start_program_page(device_t nand, uint32_t row, uint32_t column)
+{
+	device_t nandbus = device_get_parent(nand);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_PROG))
+		return (ENXIO);
+
+	if (nand_send_address(nand, row, column, -1))
+		return (ENXIO);
+
+	return (0);
+}
+
+static int
+send_end_program_page(device_t nandbus, uint8_t end_command)
+{
+
+	if (NANDBUS_SEND_COMMAND(nandbus, end_command))
+		return (ENXIO);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+
+static int
+generic_program_page(device_t nand, uint32_t page, void *buf, uint32_t len,
+    uint32_t offset)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"%p raw prog page %x[%x] at %x", nand, page, len,
+	    offset);
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	if (send_start_program_page(nand, row, offset))
+		return (ENXIO);
+
+	NANDBUS_WRITE_BUFFER(nandbus, buf, len);
+
+	if (send_end_program_page(nandbus, NAND_CMD_PROG_END))
+		return (ENXIO);
+
+	DELAY(chip->t_prog);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_written++;
+
+	return (0);
+}
+
+static int
+generic_program_page_intlv(device_t nand, uint32_t page, void *buf,
+    uint32_t len, uint32_t offset)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"%p raw prog page %x[%x] at %x", nand, page, len, offset);
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	if (send_start_program_page(nand, row, offset))
+		return (ENXIO);
+
+	NANDBUS_WRITE_BUFFER(nandbus, buf, len);
+
+	if (send_end_program_page(nandbus, NAND_CMD_PROG_INTLV))
+		return (ENXIO);
+
+	DELAY(chip->t_prog);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_written++;
+
+	return (0);
+}
+
+static int
+generic_program_oob(device_t nand, uint32_t page, void* buf, uint32_t len,
+    uint32_t offset)
+{
+	struct nand_chip *chip;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"%p raw prog oob %x[%x] at %x", nand, page, len,
+	    offset);
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+	offset += chip->chip_geom.page_size;
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	if (send_start_program_page(nand, row, offset))
+		return (ENXIO);
+
+	NANDBUS_WRITE_BUFFER(nandbus, buf, len);
+
+	if (send_end_program_page(nandbus, NAND_CMD_PROG_END))
+		return (ENXIO);
+
+	DELAY(chip->t_prog);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+
+static int
+send_erase_block(device_t nand, uint32_t row, uint8_t second_command)
+{
+	device_t nandbus = device_get_parent(nand);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_ERASE))
+		return (ENXIO);
+
+	if (nand_send_address(nand, row, -1, -1))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, second_command))
+		return (ENXIO);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+
+static int
+generic_erase_block(device_t nand, uint32_t block)
+{
+	struct block_stat *blk_stat;
+	struct nand_chip *chip;
+	device_t nandbus;
+	int row;
+
+	nand_debug(NDBG_GEN,"%p erase block  %x", nand, block);
+	nandbus = device_get_parent(nand);
+	chip = device_get_softc(nand);
+
+	if (block >= (chip->chip_geom.blks_per_lun * chip->chip_geom.luns))
+		return (ENXIO);
+
+	row = (block << chip->chip_geom.blk_shift) &
+	    chip->chip_geom.blk_mask;
+
+	nand_debug(NDBG_GEN,"%p erase block  row %x", nand, row);
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	send_erase_block(nand, row, NAND_CMD_ERASE_END);
+
+	DELAY(chip->t_bers);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	blk_stat = &(chip->blk_stat[block]);
+	blk_stat->block_erased++;
+
+	return (0);
+}
+
+static int
+generic_erase_block_intlv(device_t nand, uint32_t block)
+{
+	struct block_stat *blk_stat;
+	struct nand_chip *chip;
+	device_t nandbus;
+	int row;
+
+	nand_debug(NDBG_GEN,"%p erase block  %x", nand, block);
+	nandbus = device_get_parent(nand);
+	chip = device_get_softc(nand);
+
+	if (block >= (chip->chip_geom.blks_per_lun * chip->chip_geom.luns))
+		return (ENXIO);
+
+	row = (block << chip->chip_geom.blk_shift) &
+	    chip->chip_geom.blk_mask;
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	send_erase_block(nand, row, NAND_CMD_ERASE_INTLV);
+
+	DELAY(chip->t_bers);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	blk_stat = &(chip->blk_stat[block]);
+	blk_stat->block_erased++;
+
+	return (0);
+
+}
+
+static int
+onfi_is_blk_bad(device_t device, uint32_t block_number, uint8_t *bad)
+{
+	struct nand_chip *chip;
+	int page_number, i, j, err;
+	uint8_t *oob;
+
+	chip = device_get_softc(device);
+
+	oob = malloc(chip->chip_geom.oob_size, M_NAND, M_WAITOK);
+	if (!oob) {
+		device_printf(device, "%s: cannot allocate oob\n", __func__);
+		return (ENOMEM);
+	}
+
+	page_number = block_number * chip->chip_geom.pgs_per_blk;
+	*bad = 0;
+	/* Check OOB of first and last page */
+	for (i = 0; i < 2; i++, page_number+= chip->chip_geom.pgs_per_blk - 1) {
+		err = generic_read_oob(device, page_number, oob,
+		    chip->chip_geom.oob_size, 0);
+		if (err) {
+			device_printf(device, "%s: cannot allocate oob\n",
+			    __func__);
+			free(oob, M_NAND);
+			return (ENOMEM);
+		}
+
+		for (j = 0; j < chip->chip_geom.oob_size; j++) {
+			if (!oob[j]) {
+				*bad = 1;
+				free(oob, M_NAND);
+				return (0);
+			}
+		}
+	}
+
+	free(oob, M_NAND);
+
+	return (0);
+}
+
+static int
+send_small_read_page(device_t nand, uint8_t start_command,
+    uint32_t row, uint32_t column)
+{
+	device_t nandbus = device_get_parent(nand);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, start_command))
+		return (ENXIO);
+
+	if (nand_send_address(nand, row, column, -1))
+		return (ENXIO);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+
+
+static int
+small_read_page(device_t nand, uint32_t page, void *buf, uint32_t len,
+    uint32_t offset)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"%p small read page %x[%x] at %x", nand, page, len, offset);
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (offset < 256) {
+		if (send_small_read_page(nand, NAND_CMD_SMALLA, row, offset))
+			return (ENXIO);
+	} else {
+		offset -= 256;
+		if (send_small_read_page(nandbus, NAND_CMD_SMALLB, row, offset))
+			return (ENXIO);
+	}
+
+	DELAY(chip->t_r);
+
+	NANDBUS_READ_BUFFER(nandbus, buf, len);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_read++;
+
+	return (0);
+}
+
+static int
+small_read_oob(device_t nand, uint32_t page, void *buf, uint32_t len,
+    uint32_t offset)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"%p small read oob %x[%x] at %x", nand, page, len, offset);
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (send_small_read_page(nand, NAND_CMD_SMALLOOB, row, 0))
+		return (ENXIO);
+
+	DELAY(chip->t_r);
+
+	NANDBUS_READ_BUFFER(nandbus, buf, len);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_read++;
+
+	return (0);
+}
+
+static int
+small_program_page(device_t nand, uint32_t page, void* buf, uint32_t len,
+    uint32_t offset)
+{
+	struct nand_chip *chip;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"%p small prog page %x[%x] at %x", nand, page, len, offset);
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	if (offset < 256) {
+		if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_SMALLA))
+			return (ENXIO);
+	} else {
+		if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_SMALLB))
+			return (ENXIO);
+	}
+
+	if (send_start_program_page(nand, row, offset))
+		return (ENXIO);
+
+	NANDBUS_WRITE_BUFFER(nandbus, buf, len);
+
+	if (send_end_program_page(nandbus, NAND_CMD_PROG_END))
+		return (ENXIO);
+
+	DELAY(chip->t_prog);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+
+static int
+small_program_oob(device_t nand, uint32_t page, void* buf, uint32_t len,
+    uint32_t offset)
+{
+	struct nand_chip *chip;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"%p small prog oob %x[%x] at %x", nand, page, len, offset);
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_SMALLOOB))
+		return (ENXIO);
+
+	if (send_start_program_page(nand, row, offset))
+		return (ENXIO);
+
+	NANDBUS_WRITE_BUFFER(nandbus, buf, len);
+
+	if (send_end_program_page(nandbus, NAND_CMD_PROG_END))
+		return (ENXIO);
+
+	DELAY(chip->t_prog);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+
+int
+nand_send_address(device_t nand, int32_t row, int32_t col, int8_t id)
+{
+	struct nandbus_ivar *ivar;
+	device_t nandbus;
+	uint8_t addr;
+	int err = 0;
+	int i;
+
+	nandbus = device_get_parent(nand);
+	ivar = device_get_ivars(nand);
+
+	if (id != -1) {
+		nand_debug(NDBG_GEN,"send_address: send id %02x", id);
+		err = NANDBUS_SEND_ADDRESS(nandbus, id);
+	}
+
+	if (!err && col != -1) {
+		for (i = 0; i < ivar->cols; i++, col >>= 8) {
+			addr = (uint8_t)(col & 0xff);
+			nand_debug(NDBG_GEN,"send_address: send address column "
+			    "%02x", addr);
+			err = NANDBUS_SEND_ADDRESS(nandbus, addr);
+			if (err)
+				break;
+		}
+	}
+
+	if (!err && row != -1) {
+		for (i = 0; i < ivar->rows; i++, row >>= 8) {
+			addr = (uint8_t)(row & 0xff);
+			nand_debug(NDBG_GEN,"send_address: send address row "
+			    "%02x", addr);
+			err = NANDBUS_SEND_ADDRESS(nandbus, addr);
+			if (err)
+				break;
+		}
+	}
+
+	return (err);
+}
+
+static int
+generic_is_blk_bad(device_t dev, uint32_t block, uint8_t *bad)
+{
+	struct nand_chip *chip;
+	int page_number, err, i;
+	uint8_t *oob;
+
+	chip = device_get_softc(dev);
+
+	oob = malloc(chip->chip_geom.oob_size, M_NAND, M_WAITOK);
+	if (!oob) {
+		device_printf(dev, "%s: cannot allocate OOB\n", __func__);
+		return (ENOMEM);
+	}
+
+	page_number = block * chip->chip_geom.pgs_per_blk;
+	*bad = 0;
+
+	/* Check OOB of first and second page */
+	for (i = 0; i < 2; i++) {
+		err = NAND_READ_OOB(dev, page_number + i, oob,
+		    chip->chip_geom.oob_size, 0);
+		if (err) {
+			device_printf(dev, "%s: cannot allocate OOB\n",
+			    __func__);
+			free(oob, M_NAND);
+			return (ENOMEM);
+		}
+
+		if (!oob[0]) {
+			*bad = 1;
+			free(oob, M_NAND);
+			return (0);
+		}
+	}
+
+	free(oob, M_NAND);
+
+	return (0);
+}
+
+static int
+generic_get_ecc(device_t dev, void *buf, void *ecc, int *needwrite)
+{
+	struct nand_chip *chip = device_get_softc(dev);
+	struct chip_geom *cg = &chip->chip_geom;
+
+	return (NANDBUS_GET_ECC(device_get_parent(dev), buf, cg->page_size,
+	    ecc, needwrite));
+}
+
+static int
+generic_correct_ecc(device_t dev, void *buf, void *readecc, void *calcecc)
+{
+	struct nand_chip *chip = device_get_softc(dev);
+	struct chip_geom *cg = &chip->chip_geom;
+
+	return (NANDBUS_CORRECT_ECC(device_get_parent(dev), buf,
+	    cg->page_size, readecc, calcecc));
+}
+
+
+#if 0
+int
+nand_chng_read_col(device_t nand, uint32_t col, void *buf, size_t len)
+{
+	struct nand_chip *chip;
+	device_t nandbus;
+
+	chip = device_get_softc(nand);
+	nandbus = device_get_parent(nand);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_CHNG_READ_COL))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_ADDRESS(nandbus, -1, col, -1))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_CHNG_READ_COL_END))
+		return (ENXIO);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	if (buf != NULL && len > 0)
+		NANDBUS_READ_BUFFER(nandbus, buf, len);
+
+	return (0);
+}
+
+int
+nand_chng_write_col(device_t dev, uint32_t col, void *buf,
+    size_t len)
+{
+	struct nand_chip *chip;
+	device_t nandbus;
+
+	chip = device_get_softc(dev);
+	nandbus = device_get_parent(dev);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_CHNG_WRITE_COL))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_ADDRESS(nandbus, -1, col, -1))
+		return (ENXIO);
+
+	if (buf != NULL && len > 0)
+		NANDBUS_WRITE_BUFFER(nandbus, buf, len);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_CHNG_READ_COL_END))
+		return (ENXIO);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+
+int
+nand_copyback_read(device_t dev, uint32_t page, uint32_t col,
+    void *buf, size_t len)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN," raw read page %x[%x] at %x", page, col, len);
+	chip = device_get_softc(dev);
+	nandbus = device_get_parent(dev);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (send_read_page(nand, NAND_CMD_READ, NAND_CMD_READ_CPBK, row, 0))
+		return (ENXIO);
+
+	DELAY(chip->t_r);
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	if (buf != NULL && len > 0)
+		NANDBUS_READ_BUFFER(nandbus, buf, len);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_read++;
+
+	return (0);
+}
+
+int
+nand_copyback_prog(device_t dev, uint32_t page, uint32_t col,
+    void *buf, size_t len)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"copyback prog page %x[%x]",  page, len);
+	chip = device_get_softc(dev);
+	nandbus = device_get_parent(dev);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_CHNG_WRITE_COL))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_ADDRESS(nandbus, row, col, -1))
+		return (ENXIO);
+
+	if (buf != NULL && len > 0)
+		NANDBUS_WRITE_BUFFER(nandbus, buf, len);
+
+	if (send_end_program_page(nandbus, NAND_CMD_PROG_END))
+		return (ENXIO);
+
+	DELAY(chip->t_prog);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_written++;
+
+	return (0);
+}
+
+int
+nand_copyback_prog_intlv(device_t dev, uint32_t page)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+
+	nand_debug(NDBG_GEN,"cache prog page %x", page);
+	chip = device_get_softc(dev);
+	nandbus = device_get_parent(dev);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	if (send_start_program_page(nand, row, 0))
+		return (ENXIO);
+
+	if (send_end_program_page(nandbus, NAND_CMD_PROG_INTLV))
+		return (ENXIO);
+
+	DELAY(chip->t_prog);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_written++;
+
+	return (0);
+}
+
+int
+nand_prog_cache(device_t dev, uint32_t page, uint32_t col,
+    void *buf, size_t len, uint8_t end)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+	uint8_t command;
+
+	nand_debug(NDBG_GEN,"cache prog page %x[%x]",  page, len);
+	chip = device_get_softc(dev);
+	nandbus = device_get_parent(dev);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (!can_write(nandbus))
+		return (ENXIO);
+
+	if (send_start_program_page(dev, row, 0))
+		return (ENXIO);
+
+	NANDBUS_WRITE_BUFFER(nandbus, buf, len);
+
+	if (end)
+		command = NAND_CMD_PROG_END;
+	else
+		command = NAND_CMD_PROG_CACHE;
+
+	if (send_end_program_page(nandbus, command))
+		return (ENXIO);
+
+	DELAY(chip->t_prog);
+
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_written++;
+
+	return (0);
+}
+
+int
+nand_read_cache(device_t dev, uint32_t page, uint32_t col,
+    void *buf, size_t len, uint8_t end)
+{
+	struct nand_chip *chip;
+	struct page_stat *pg_stat;
+	device_t nandbus;
+	uint32_t row;
+	uint8_t command;
+
+	nand_debug(NDBG_GEN,"cache read page %x[%x] ", page, len);
+	chip = device_get_softc(dev);
+	nandbus = device_get_parent(dev);
+
+	if (nand_check_page_boundary(chip, page))
+		return (ENXIO);
+
+	page_to_row(&chip->chip_geom, page, &row);
+
+	if (page != -1) {
+		if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_READ))
+			return (ENXIO);
+
+		if (NANDBUS_SEND_ADDRESS(nandbus, row, col, -1))
+			return (ENXIO);
+	}
+
+	if (end)
+		command = NAND_CMD_READ_CACHE_END;
+	else
+		command = NAND_CMD_READ_CACHE;
+
+	if (NANDBUS_SEND_COMMAND(nandbus, command))
+		return (ENXIO);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	DELAY(chip->t_r);
+	if (check_fail(nandbus))
+		return (ENXIO);
+
+	if (buf != NULL && len > 0)
+		NANDBUS_READ_BUFFER(nandbus, buf, len);
+
+	pg_stat = &(chip->pg_stat[page]);
+	pg_stat->page_raw_read++;
+
+	return (0);
+}
+
+int
+nand_get_feature(device_t dev, uint8_t feat, void *buf)
+{
+	struct nand_chip *chip;
+	device_t nandbus;
+
+	nand_debug(NDBG_GEN,"nand get feature");
+
+	chip = device_get_softc(dev);
+	nandbus = device_get_parent(dev);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_GET_FEATURE))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_ADDRESS(nandbus, -1, -1, feat))
+		return (ENXIO);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	DELAY(chip->t_r);
+	NANDBUS_READ_BUFFER(nandbus, buf, 4);
+
+	return (0);
+}
+
+int
+nand_set_feature(device_t dev, uint8_t feat, void *buf)
+{
+	struct nand_chip *chip;
+	device_t nandbus;
+
+	nand_debug(NDBG_GEN,"nand set feature");
+
+	chip = device_get_softc(dev);
+	nandbus = device_get_parent(dev);
+
+	if (NANDBUS_SEND_COMMAND(nandbus, NAND_CMD_SET_FEATURE))
+		return (ENXIO);
+
+	if (NANDBUS_SEND_ADDRESS(nandbus, -1, -1, feat))
+		return (ENXIO);
+
+	NANDBUS_WRITE_BUFFER(nandbus, buf, 4);
+
+	if (NANDBUS_START_COMMAND(nandbus))
+		return (ENXIO);
+
+	return (0);
+}
+#endif


Property changes on: trunk/sys/dev/nand/nand_generic.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nand_geom.c
===================================================================
--- trunk/sys/dev/nand/nand_geom.c	                        (rev 0)
+++ trunk/sys/dev/nand/nand_geom.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,466 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nand_geom.c 313525 2017-02-10 05:35:30Z ngie $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/uio.h>
+#include <sys/bio.h>
+#include <geom/geom.h>
+#include <geom/geom_disk.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandbus.h>
+#include <dev/nand/nand_dev.h>
+#include "nand_if.h"
+#include "nandbus_if.h"
+
+#define	BIO_NAND_STD	((void *)1)
+#define	BIO_NAND_RAW	((void *)2)
+
+static disk_ioctl_t nand_ioctl;
+static disk_getattr_t nand_getattr;
+static disk_strategy_t nand_strategy;
+static disk_strategy_t nand_strategy_raw;
+
+static int
+nand_read(struct nand_chip *chip, uint32_t offset, void *buf, uint32_t len)
+{
+
+	nand_debug(NDBG_GEOM, "Read from chip %d [%p] at %d", chip->num, chip,
+	    offset);
+
+	return (nand_read_pages(chip, offset, buf, len));
+}
+
+static int
+nand_write(struct nand_chip *chip, uint32_t offset, void* buf, uint32_t len)
+{
+
+	nand_debug(NDBG_GEOM, "Write to chip %d [%p] at %d", chip->num, chip,
+	    offset);
+
+	return (nand_prog_pages(chip, offset, buf, len));
+}
+
+static int
+nand_read_raw(struct nand_chip *chip, uint32_t offset, void *buf, uint32_t len)
+{
+	nand_debug(NDBG_GEOM, "Raw read from chip %d [%p] at %d", chip->num,
+	    chip, offset);
+
+	return (nand_read_pages_raw(chip, offset, buf, len));
+}
+
+static int
+nand_write_raw(struct nand_chip *chip, uint32_t offset, void *buf, uint32_t len)
+{
+
+	nand_debug(NDBG_GEOM, "Raw write to chip %d [%p] at %d", chip->num,
+	    chip, offset);
+
+	return (nand_prog_pages_raw(chip, offset, buf, len));
+}
+
+static void
+nand_strategy(struct bio *bp)
+{
+	struct nand_chip *chip;
+
+	chip = (struct nand_chip *)bp->bio_disk->d_drv1;
+
+	bp->bio_driver1 = BIO_NAND_STD;
+
+	nand_debug(NDBG_GEOM, "Strategy %s on chip %d [%p]",
+	    (bp->bio_cmd & BIO_READ) == BIO_READ ? "READ" :
+	    ((bp->bio_cmd & BIO_WRITE) == BIO_WRITE ? "WRITE" :
+	    ((bp->bio_cmd & BIO_DELETE) == BIO_DELETE ? "DELETE" : "UNKNOWN")),
+	    chip->num, chip);
+
+	mtx_lock(&chip->qlock);
+	bioq_insert_tail(&chip->bioq, bp);
+	mtx_unlock(&chip->qlock);
+	taskqueue_enqueue(chip->tq, &chip->iotask);
+}
+
+static void
+nand_strategy_raw(struct bio *bp)
+{
+	struct nand_chip *chip;
+
+	chip = (struct nand_chip *)bp->bio_disk->d_drv1;
+
+	/* Inform taskqueue that it's a raw access */
+	bp->bio_driver1 = BIO_NAND_RAW;
+
+	nand_debug(NDBG_GEOM, "Strategy %s on chip %d [%p]",
+	    (bp->bio_cmd & BIO_READ) == BIO_READ ? "READ" :
+	    ((bp->bio_cmd & BIO_WRITE) == BIO_WRITE ? "WRITE" :
+	    ((bp->bio_cmd & BIO_DELETE) == BIO_DELETE ? "DELETE" : "UNKNOWN")),
+	    chip->num, chip);
+
+	mtx_lock(&chip->qlock);
+	bioq_insert_tail(&chip->bioq, bp);
+	mtx_unlock(&chip->qlock);
+	taskqueue_enqueue(chip->tq, &chip->iotask);
+}
+
+static int
+nand_oob_access(struct nand_chip *chip, uint32_t page, uint32_t offset,
+    uint32_t len, uint8_t *data, uint8_t write)
+{
+	struct chip_geom *cg;
+	int ret = 0;
+
+	cg = &chip->chip_geom;
+
+	if (!write)
+		ret = nand_read_oob(chip, page, data, cg->oob_size);
+	else
+		ret = nand_prog_oob(chip, page, data, cg->oob_size);
+
+	return (ret);
+}
+
+static int
+nand_getattr(struct bio *bp)
+{
+	struct nand_chip *chip;
+	struct chip_geom *cg;
+	device_t dev;
+	int val;
+
+	if (bp->bio_disk == NULL || bp->bio_disk->d_drv1 == NULL)
+		return (ENXIO);
+
+	chip = (struct nand_chip *)bp->bio_disk->d_drv1;
+	cg = &(chip->chip_geom);
+
+	dev = device_get_parent(chip->dev);
+	dev = device_get_parent(dev);
+
+	if (strcmp(bp->bio_attribute, "NAND::device") == 0) {
+		if (bp->bio_length != sizeof(dev))
+			return (EFAULT);
+		bcopy(&dev, bp->bio_data, sizeof(dev));
+	} else {
+		if (strcmp(bp->bio_attribute, "NAND::oobsize") == 0)
+			val = cg->oob_size;
+		else if (strcmp(bp->bio_attribute, "NAND::pagesize") == 0)
+			val = cg->page_size;
+		else if (strcmp(bp->bio_attribute, "NAND::blocksize") == 0)
+			val = cg->block_size;
+		else
+			return (-1);
+		if (bp->bio_length != sizeof(val))
+			return (EFAULT);
+		bcopy(&val, bp->bio_data, sizeof(val));
+	}
+	bp->bio_completed = bp->bio_length;
+	return (0);
+}
+
+static int
+nand_ioctl(struct disk *ndisk, u_long cmd, void *data, int fflag,
+    struct thread *td)
+{
+	struct nand_chip *chip;
+	struct chip_geom  *cg;
+	struct nand_oob_rw *oob_rw = NULL;
+	struct nand_raw_rw *raw_rw = NULL;
+	device_t nandbus;
+	size_t bufsize = 0, len = 0;
+	size_t raw_size;
+	off_t off;
+	uint8_t *buf = NULL;
+	int ret = 0;
+	uint8_t status;
+
+	chip = (struct nand_chip *)ndisk->d_drv1;
+	cg = &chip->chip_geom;
+	nandbus = device_get_parent(chip->dev);
+
+	if ((cmd == NAND_IO_RAW_READ) || (cmd == NAND_IO_RAW_PROG)) {
+		raw_rw = (struct nand_raw_rw *)data;
+		raw_size =  cg->pgs_per_blk * (cg->page_size + cg->oob_size);
+
+		/* Check if len is not bigger than chip size */
+		if (raw_rw->len > raw_size)
+			return (EFBIG);
+
+		/*
+		 * Do not ask for too much memory, in case of large transfers
+		 * read/write in 16-pages chunks
+		 */
+		bufsize = 16 * (cg->page_size + cg->oob_size);
+		if (raw_rw->len < bufsize)
+			bufsize = raw_rw->len;
+
+		buf = malloc(bufsize, M_NAND, M_WAITOK);
+		len = raw_rw->len;
+		off = 0;
+	}
+
+	switch (cmd) {
+	case NAND_IO_ERASE:
+		ret = nand_erase_blocks(chip, ((off_t *)data)[0],
+		    ((off_t *)data)[1]);
+		break;
+
+	case NAND_IO_OOB_READ:
+		oob_rw = (struct nand_oob_rw *)data;
+		ret = nand_oob_access(chip, oob_rw->page, 0,
+		    oob_rw->len, oob_rw->data, 0);
+		break;
+
+	case NAND_IO_OOB_PROG:
+		oob_rw = (struct nand_oob_rw *)data;
+		ret = nand_oob_access(chip, oob_rw->page, 0,
+		    oob_rw->len, oob_rw->data, 1);
+		break;
+
+	case NAND_IO_GET_STATUS:
+		NANDBUS_LOCK(nandbus);
+		ret = NANDBUS_GET_STATUS(nandbus, &status);
+		if (ret == 0)
+			*(uint8_t *)data = status;
+		NANDBUS_UNLOCK(nandbus);
+		break;
+
+	case NAND_IO_RAW_PROG:
+		while (len > 0) {
+			if (len < bufsize)
+				bufsize = len;
+
+			ret = copyin(raw_rw->data + off, buf, bufsize);
+			if (ret)
+				break;
+			ret = nand_prog_pages_raw(chip, raw_rw->off + off, buf,
+			    bufsize);
+			if (ret)
+				break;
+			len -= bufsize;
+			off += bufsize;
+		}
+		break;
+
+	case NAND_IO_RAW_READ:
+		while (len > 0) {
+			if (len < bufsize)
+				bufsize = len;
+
+			ret = nand_read_pages_raw(chip, raw_rw->off + off, buf,
+			    bufsize);
+			if (ret)
+				break;
+
+			ret = copyout(buf, raw_rw->data + off, bufsize);
+			if (ret)
+				break;
+			len -= bufsize;
+			off += bufsize;
+		}
+		break;
+
+	case NAND_IO_GET_CHIP_PARAM:
+		nand_get_chip_param(chip, (struct chip_param_io *)data);
+		break;
+
+	default:
+		printf("Unknown nand_ioctl request \n");
+		ret = EIO;
+	}
+
+	if (buf)
+		free(buf, M_NAND);
+
+	return (ret);
+}
+
+static void
+nand_io_proc(void *arg, int pending)
+{
+	struct nand_chip *chip = arg;
+	struct bio *bp;
+	int err = 0;
+
+	for (;;) {
+		mtx_lock(&chip->qlock);
+		bp = bioq_takefirst(&chip->bioq);
+		mtx_unlock(&chip->qlock);
+		if (bp == NULL)
+			break;
+
+		if (bp->bio_driver1 == BIO_NAND_STD) {
+			if ((bp->bio_cmd & BIO_READ) == BIO_READ) {
+				err = nand_read(chip,
+				    bp->bio_offset & 0xffffffff,
+				    bp->bio_data, bp->bio_bcount);
+			} else if ((bp->bio_cmd & BIO_WRITE) == BIO_WRITE) {
+				err = nand_write(chip,
+				    bp->bio_offset & 0xffffffff,
+				    bp->bio_data, bp->bio_bcount);
+			}
+		} else if (bp->bio_driver1 == BIO_NAND_RAW) {
+			if ((bp->bio_cmd & BIO_READ) == BIO_READ) {
+				err = nand_read_raw(chip,
+				    bp->bio_offset & 0xffffffff,
+				    bp->bio_data, bp->bio_bcount);
+			} else if ((bp->bio_cmd & BIO_WRITE) == BIO_WRITE) {
+				err = nand_write_raw(chip,
+				    bp->bio_offset & 0xffffffff,
+				    bp->bio_data, bp->bio_bcount);
+			}
+		} else
+			panic("Unknown access type in bio->bio_driver1\n");
+
+		if ((bp->bio_cmd & BIO_DELETE) == BIO_DELETE) {
+			nand_debug(NDBG_GEOM, "Delete on chip%d offset %lld "
+			    "length %ld\n", chip->num, bp->bio_offset,
+			    bp->bio_bcount);
+			err = nand_erase_blocks(chip,
+			    bp->bio_offset & 0xffffffff,
+			    bp->bio_bcount);
+		}
+
+		if (err == 0 || err == ECC_CORRECTABLE)
+			bp->bio_resid = 0;
+		else {
+			nand_debug(NDBG_GEOM,"nand_[read|write|erase_blocks] "
+			    "error: %d\n", err);
+
+			bp->bio_error = EIO;
+			bp->bio_flags |= BIO_ERROR;
+			bp->bio_resid = bp->bio_bcount;
+		}
+		biodone(bp);
+	}
+}
+
+int
+create_geom_disk(struct nand_chip *chip)
+{
+	struct disk *ndisk, *rdisk;
+
+	/* Create the disk device */
+	ndisk = disk_alloc();
+	ndisk->d_strategy = nand_strategy;
+	ndisk->d_ioctl = nand_ioctl;
+	ndisk->d_getattr = nand_getattr;
+	ndisk->d_name = "gnand";
+	ndisk->d_drv1 = chip;
+	ndisk->d_maxsize = chip->chip_geom.block_size;
+	ndisk->d_sectorsize = chip->chip_geom.page_size;
+	ndisk->d_mediasize = chip->chip_geom.chip_size;
+	ndisk->d_unit = chip->num +
+	    10 * device_get_unit(device_get_parent(chip->dev));
+
+	/*
+	 * When using BBT, make two last blocks of device unavailable
+	 * to user (because those are used to store BBT table).
+	 */
+	if (chip->bbt != NULL)
+		ndisk->d_mediasize -= (2 * chip->chip_geom.block_size);
+
+	ndisk->d_flags = DISKFLAG_CANDELETE;
+
+	snprintf(ndisk->d_ident, sizeof(ndisk->d_ident),
+	    "nand: Man:0x%02x Dev:0x%02x", chip->id.man_id, chip->id.dev_id);
+	ndisk->d_rotation_rate = DISK_RR_NON_ROTATING;
+
+	disk_create(ndisk, DISK_VERSION);
+
+	/* Create the RAW disk device */
+	rdisk = disk_alloc();
+	rdisk->d_strategy = nand_strategy_raw;
+	rdisk->d_ioctl = nand_ioctl;
+	rdisk->d_getattr = nand_getattr;
+	rdisk->d_name = "gnand.raw";
+	rdisk->d_drv1 = chip;
+	rdisk->d_maxsize = chip->chip_geom.block_size;
+	rdisk->d_sectorsize = chip->chip_geom.page_size;
+	rdisk->d_mediasize = chip->chip_geom.chip_size;
+	rdisk->d_unit = chip->num +
+	    10 * device_get_unit(device_get_parent(chip->dev));
+
+	rdisk->d_flags = DISKFLAG_CANDELETE;
+
+	snprintf(rdisk->d_ident, sizeof(rdisk->d_ident),
+	    "nand_raw: Man:0x%02x Dev:0x%02x", chip->id.man_id,
+	    chip->id.dev_id);
+	rdisk->d_rotation_rate = DISK_RR_NON_ROTATING;
+
+	disk_create(rdisk, DISK_VERSION);
+
+	chip->ndisk = ndisk;
+	chip->rdisk = rdisk;
+
+	mtx_init(&chip->qlock, "NAND I/O lock", NULL, MTX_DEF);
+	bioq_init(&chip->bioq);
+
+	TASK_INIT(&chip->iotask, 0, nand_io_proc, chip);
+	chip->tq = taskqueue_create("nand_taskq", M_WAITOK,
+	    taskqueue_thread_enqueue, &chip->tq);
+	taskqueue_start_threads(&chip->tq, 1, PI_DISK, "nand taskq");
+
+	if (bootverbose)
+		device_printf(chip->dev, "Created gnand%d for chip [0x%0x, "
+		    "0x%0x]\n", ndisk->d_unit, chip->id.man_id,
+		    chip->id.dev_id);
+
+	return (0);
+}
+
+void
+destroy_geom_disk(struct nand_chip *chip)
+{
+	struct bio *bp;
+
+	taskqueue_free(chip->tq);
+	disk_destroy(chip->ndisk);
+	disk_destroy(chip->rdisk);
+
+	mtx_lock(&chip->qlock);
+	for (;;) {
+		bp = bioq_takefirst(&chip->bioq);
+		if (bp == NULL)
+			break;
+		bp->bio_error = EIO;
+		bp->bio_flags |= BIO_ERROR;
+		bp->bio_resid = bp->bio_bcount;
+
+		biodone(bp);
+	}
+	mtx_unlock(&chip->qlock);
+
+	mtx_destroy(&chip->qlock);
+}


Property changes on: trunk/sys/dev/nand/nand_geom.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nand_id.c
===================================================================
--- trunk/sys/dev/nand/nand_id.c	                        (rev 0)
+++ trunk/sys/dev/nand/nand_id.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,67 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nand_id.c 266065 2014-05-14 18:16:32Z ian $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <dev/nand/nand.h>
+
+struct nand_params nand_ids[] = {
+	{ { NAND_MAN_SAMSUNG, 0x75 }, "Samsung K9F5608U0B",
+	    0x20, 0x200, 0x10, 0x20, 0 },
+	{ { NAND_MAN_SAMSUNG, 0xd3 }, "Samsung NAND 1GiB 3,3V 8-bit",
+	    0x400, 0x800, 0x40, 0x40, 0 },
+	{ { NAND_MAN_SAMSUNG, 0xdc }, "Samsung NAND 512MiB 3,3V 8-bit",
+	    0x200, 0x800, 0x40, 0x40, 0 },
+	{ { NAND_MAN_SAMSUNG, 0xda }, "Samsung NAND 256MiB 3,3V 8-bit",
+	    0x100, 0x800, 0x40, 0x40, 0 },
+	{ { NAND_MAN_HYNIX, 0x76 }, "Hynix NAND 64MiB 3,3V 8-bit",
+	    0x40, 0x200, 0x10, 0x20, 0 },
+	{ { NAND_MAN_HYNIX, 0xdc }, "Hynix NAND 512MiB 3,3V 8-bit",
+	    0x200, 0x800, 0x40, 0x40, 0 },
+	{ { NAND_MAN_HYNIX, 0x79 }, "NAND 128MB 3,3V 8-bit",
+	    0x80, 0x200, 0x10, 0x20, 0 },
+	{ { NAND_MAN_STMICRO, 0xf1 }, "STMicro 128MB 3,3V 8-bit",
+	    0x80, 2048, 64, 0x40, 0 },
+	{ { NAND_MAN_MICRON, 0xcc }, "Micron NAND 512MiB 3,3V 16-bit",
+	    0x200, 2048, 64, 0x40, 0 },
+};
+
+struct nand_params *nand_get_params(struct nand_id *id)
+{
+	int i;
+
+	for (i = 0; i < sizeof(nand_ids) / sizeof(nand_ids[0]); i++)
+		if (nand_ids[i].id.man_id == id->man_id &&
+		    nand_ids[i].id.dev_id == id->dev_id)
+			return (&nand_ids[i]);
+
+	return (NULL);
+}


Property changes on: trunk/sys/dev/nand/nand_id.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nand_if.m
===================================================================
--- trunk/sys/dev/nand/nand_if.m	                        (rev 0)
+++ trunk/sys/dev/nand/nand_if.m	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,169 @@
+/* $MidnightBSD$ */
+#-
+# Copyright (C) 2009-2012 Semihalf
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD: stable/10/sys/dev/nand/nand_if.m 235537 2012-05-17 10:11:18Z gber $
+
+# NAND chip interface description
+#
+
+#include <sys/bus.h>
+#include <dev/nand/nand.h>
+
+INTERFACE nand;
+
+CODE {
+	static int nand_method_not_supported(device_t dev)
+	{
+		return (ENOENT);
+	}
+};
+
+# Read NAND page
+#
+# Return values:
+# 0: Success
+#
+METHOD int read_page {
+	device_t dev;
+	uint32_t page;
+	void* buf;
+	uint32_t len;
+	uint32_t offset;
+};
+
+# Program NAND page
+#
+# Return values:
+# 0: Success
+#
+METHOD int program_page {
+	device_t dev;
+	uint32_t page;
+	void* buf;
+	uint32_t len;
+	uint32_t offset;
+};
+
+# Program NAND page interleaved
+#
+# Return values:
+# 0: Success
+#
+METHOD int program_page_intlv {
+	device_t dev;
+	uint32_t page;
+	void* buf;
+	uint32_t len;
+	uint32_t offset;
+} DEFAULT nand_method_not_supported;
+
+# Read NAND oob
+#
+# Return values:
+# 0: Success
+#
+METHOD int read_oob {
+	device_t dev;
+	uint32_t page;
+	void* buf;
+	uint32_t len;
+	uint32_t offset;
+};
+
+# Program NAND oob
+#
+# Return values:
+# 0: Success
+#
+METHOD int program_oob {
+	device_t dev;
+	uint32_t page;
+	void* buf;
+	uint32_t len;
+	uint32_t offset;
+};
+
+# Erase NAND block
+#
+# Return values:
+# 0: Success
+#
+METHOD int erase_block {
+	device_t dev;
+	uint32_t block;
+};
+
+# Erase NAND block interleaved
+#
+# Return values:
+# 0: Success
+#
+METHOD int erase_block_intlv {
+	device_t dev;
+	uint32_t block;
+} DEFAULT nand_method_not_supported;
+
+# NAND get status
+#
+# Return values:
+# 0: Success
+#
+METHOD int get_status {
+	device_t dev;
+	uint8_t *status;
+};
+
+# NAND check if block is bad
+#
+# Return values:
+# 0: Success
+#
+METHOD int is_blk_bad {
+	device_t dev;
+	uint32_t block_number;
+	uint8_t  *bad;
+};
+
+# NAND get ECC
+#
+#
+METHOD int get_ecc {
+	device_t dev;
+	void *buf;
+	void *ecc;
+	int *needwrite;
+};
+
+# NAND correct ECC
+#
+#
+METHOD int correct_ecc {
+	device_t dev;
+	void *buf;
+	void *readecc;
+	void *calcecc;
+};
+


Property changes on: trunk/sys/dev/nand/nand_if.m
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: trunk/sys/dev/nand/nandbus.c
===================================================================
--- trunk/sys/dev/nand/nandbus.c	                        (rev 0)
+++ trunk/sys/dev/nand/nandbus.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,541 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nandbus.c 266065 2014-05-14 18:16:32Z ian $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandbus.h>
+#include "nand_if.h"
+#include "nandbus_if.h"
+#include "nfc_if.h"
+
+#define NAND_NCS 4
+
+static int nandbus_probe(device_t dev);
+static int nandbus_attach(device_t dev);
+static int nandbus_detach(device_t dev);
+
+static int nandbus_child_location_str(device_t, device_t, char *, size_t);
+static int nandbus_child_pnpinfo_str(device_t, device_t, char *, size_t);
+
+static int nandbus_get_status(device_t, uint8_t *);
+static void nandbus_read_buffer(device_t, void *, uint32_t);
+static int nandbus_select_cs(device_t, uint8_t);
+static int nandbus_send_command(device_t, uint8_t);
+static int nandbus_send_address(device_t, uint8_t);
+static int nandbus_start_command(device_t);
+static int nandbus_wait_ready(device_t, uint8_t *);
+static void nandbus_write_buffer(device_t, void *, uint32_t);
+static int nandbus_get_ecc(device_t, void *, uint32_t, void *, int *);
+static int nandbus_correct_ecc(device_t, void *, int, void *, void *);
+static void nandbus_lock(device_t);
+static void nandbus_unlock(device_t);
+
+static int nand_readid(device_t, uint8_t *, uint8_t *);
+static int nand_probe_onfi(device_t, uint8_t *);
+static int nand_reset(device_t);
+
+struct nandbus_softc {
+	device_t dev;
+	struct cv nandbus_cv;
+	struct mtx nandbus_mtx;
+	uint8_t busy;
+};
+
+static device_method_t nandbus_methods[] = {
+	/* device interface */
+	DEVMETHOD(device_probe,		nandbus_probe),
+	DEVMETHOD(device_attach,	nandbus_attach),
+	DEVMETHOD(device_detach,	nandbus_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+
+	/* bus interface */
+	DEVMETHOD(bus_print_child,	bus_generic_print_child),
+	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
+	DEVMETHOD(bus_child_pnpinfo_str, nandbus_child_pnpinfo_str),
+	DEVMETHOD(bus_child_location_str, nandbus_child_location_str),
+
+	/* nandbus interface */
+	DEVMETHOD(nandbus_get_status,	nandbus_get_status),
+	DEVMETHOD(nandbus_read_buffer,	nandbus_read_buffer),
+	DEVMETHOD(nandbus_select_cs,	nandbus_select_cs),
+	DEVMETHOD(nandbus_send_command,	nandbus_send_command),
+	DEVMETHOD(nandbus_send_address,	nandbus_send_address),
+	DEVMETHOD(nandbus_start_command,nandbus_start_command),
+	DEVMETHOD(nandbus_wait_ready,	nandbus_wait_ready),
+	DEVMETHOD(nandbus_write_buffer,	nandbus_write_buffer),
+	DEVMETHOD(nandbus_get_ecc,	nandbus_get_ecc),
+	DEVMETHOD(nandbus_correct_ecc,	nandbus_correct_ecc),
+	DEVMETHOD(nandbus_lock,		nandbus_lock),
+	DEVMETHOD(nandbus_unlock,	nandbus_unlock),
+	{ 0, 0 }
+};
+
+devclass_t nandbus_devclass;
+
+driver_t nandbus_driver = {
+	"nandbus",
+	nandbus_methods,
+	sizeof(struct nandbus_softc)
+};
+
+DRIVER_MODULE(nandbus, nand, nandbus_driver, nandbus_devclass, 0, 0);
+
+int
+nandbus_create(device_t nfc)
+{
+	device_t child;
+
+	child = device_add_child(nfc, "nandbus", -1);
+	if (!child)
+		return (ENODEV);
+
+	bus_generic_attach(nfc);
+
+	return(0);
+}
+
+void
+nandbus_destroy(device_t nfc)
+{
+	device_t *children;
+	int nchildren, i;
+
+	mtx_lock(&Giant);
+	/* Detach & delete all children */
+	if (!device_get_children(nfc, &children, &nchildren)) {
+		for (i = 0; i < nchildren; i++)
+			device_delete_child(nfc, children[i]);
+
+		free(children, M_TEMP);
+	}
+	mtx_unlock(&Giant);
+}
+
+static int
+nandbus_probe(device_t dev)
+{
+
+	device_set_desc(dev, "NAND bus");
+
+	return (0);
+}
+
+static int
+nandbus_attach(device_t dev)
+{
+	device_t child, nfc;
+	struct nand_id chip_id;
+	struct nandbus_softc *sc;
+	struct nandbus_ivar *ivar;
+	struct nand_softc *nfc_sc;
+	struct nand_params *chip_params;
+	uint8_t cs, onfi;
+
+	sc = device_get_softc(dev);
+	sc->dev = dev;
+
+	nfc = device_get_parent(dev);
+	nfc_sc = device_get_softc(nfc);
+
+	mtx_init(&sc->nandbus_mtx, "nandbus lock", NULL, MTX_DEF);
+	cv_init(&sc->nandbus_cv, "nandbus cv");
+
+	/* Check each possible CS for existing nand devices */
+	for (cs = 0; cs < NAND_NCS; cs++) {
+		nand_debug(NDBG_BUS,"probe chip select %x", cs);
+
+		/* Select & reset chip */
+		if (nandbus_select_cs(dev, cs))
+			break;
+
+		if (nand_reset(dev))
+			continue;
+
+		/* Read manufacturer and device id */
+		if (nand_readid(dev, &chip_id.man_id, &chip_id.dev_id))
+			continue;
+
+		if (chip_id.man_id == 0xff)
+			continue;
+
+		/*
+		 * First try to get info from the table.  If that fails, see if
+		 * the chip can provide ONFI info.  We check the table first to
+		 * allow table entries to override info from chips that are
+		 * known to provide bad ONFI data.
+		 */
+		onfi = 0;
+		chip_params = nand_get_params(&chip_id);
+		if (chip_params == NULL) {
+			nand_probe_onfi(dev, &onfi);
+		}
+
+		/*
+		 * At this point it appears there is a chip at this chipselect,
+		 * so if we can't work with it, whine about it.
+		 */
+		if (chip_params == NULL && onfi == 0) {
+			if (bootverbose || (nand_debug_flag & NDBG_BUS))
+				printf("Chip params not found, chipsel: %d "
+				    "(manuf: 0x%0x, chipid: 0x%0x, onfi: %d)\n",
+				    cs, chip_id.man_id, chip_id.dev_id, onfi);
+			continue;
+		}
+
+		ivar = malloc(sizeof(struct nandbus_ivar),
+		    M_NAND, M_WAITOK);
+
+		if (onfi == 1) {
+			ivar->cs = cs;
+			ivar->cols = 0;
+			ivar->rows = 0;
+			ivar->params = NULL;
+			ivar->man_id = chip_id.man_id;
+			ivar->dev_id = chip_id.dev_id;
+			ivar->is_onfi = onfi;
+			ivar->chip_cdev_name = nfc_sc->chip_cdev_name;
+
+			child = device_add_child(dev, NULL, -1);
+			device_set_ivars(child, ivar);
+			continue;
+		}
+
+		ivar->cs = cs;
+		ivar->cols = 1;
+		ivar->rows = 2;
+		ivar->params = chip_params;
+		ivar->man_id = chip_id.man_id;
+		ivar->dev_id = chip_id.dev_id;
+		ivar->is_onfi = onfi;
+		ivar->chip_cdev_name = nfc_sc->chip_cdev_name;
+
+		/*
+		 * Check what type of device we have.
+		 * devices bigger than 32MiB have on more row (3)
+		 */
+		if (chip_params->chip_size > 32)
+			ivar->rows++;
+		/* Large page devices have one more col (2) */
+		if (chip_params->chip_size >= 128 &&
+		    chip_params->page_size > 512)
+			ivar->cols++;
+
+		child = device_add_child(dev, NULL, -1);
+		device_set_ivars(child, ivar);
+	}
+
+	bus_generic_attach(dev);
+	return (0);
+}
+
+static int
+nandbus_detach(device_t dev)
+{
+	struct nandbus_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	bus_generic_detach(dev);
+
+	mtx_destroy(&sc->nandbus_mtx);
+	cv_destroy(&sc->nandbus_cv);
+
+	return (0);
+}
+
+static int
+nandbus_child_location_str(device_t bus, device_t child, char *buf,
+    size_t buflen)
+{
+	struct nandbus_ivar *ivar = device_get_ivars(child);
+
+	snprintf(buf, buflen, "at cs#%d", ivar->cs);
+	return (0);
+}
+
+static int
+nandbus_child_pnpinfo_str(device_t bus, device_t child, char *buf,
+    size_t buflen)
+{
+	// XXX man id, model id ????
+	*buf = '\0';
+	return (0);
+}
+
+static int
+nand_readid(device_t bus, uint8_t *man_id, uint8_t *dev_id)
+{
+	device_t nfc;
+
+	if (!bus || !man_id || !dev_id)
+		return (EINVAL);
+
+	nand_debug(NDBG_BUS,"read id");
+
+	nfc = device_get_parent(bus);
+
+	if (NFC_SEND_COMMAND(nfc, NAND_CMD_READ_ID)) {
+		nand_debug(NDBG_BUS,"Error : could not send READ ID command");
+		return (ENXIO);
+	}
+
+	if (NFC_SEND_ADDRESS(nfc, 0)) {
+		nand_debug(NDBG_BUS,"Error : could not sent address to chip");
+		return (ENXIO);
+	}
+
+	if (NFC_START_COMMAND(nfc) != 0) {
+		nand_debug(NDBG_BUS,"Error : could not start command");
+		return (ENXIO);
+	}
+
+	DELAY(25);
+
+	*man_id = NFC_READ_BYTE(nfc);
+	*dev_id = NFC_READ_BYTE(nfc);
+
+	nand_debug(NDBG_BUS,"manufacturer id: %x chip id: %x", *man_id,
+	    *dev_id);
+
+	return (0);
+}
+
+static int
+nand_probe_onfi(device_t bus, uint8_t *onfi_compliant)
+{
+	device_t nfc;
+	char onfi_id[] = {'O', 'N', 'F', 'I', '\0'};
+	int i;
+
+	nand_debug(NDBG_BUS,"probing ONFI");
+
+	nfc = device_get_parent(bus);
+
+	if (NFC_SEND_COMMAND(nfc, NAND_CMD_READ_ID)) {
+		nand_debug(NDBG_BUS,"Error : could not sent READ ID command");
+		return (ENXIO);
+	}
+
+	if (NFC_SEND_ADDRESS(nfc, ONFI_SIG_ADDR)) {
+		nand_debug(NDBG_BUS,"Error : could not sent address to chip");
+		return (ENXIO);
+	}
+
+	if (NFC_START_COMMAND(nfc) != 0) {
+		nand_debug(NDBG_BUS,"Error : could not start command");
+		return (ENXIO);
+	}
+	for (i = 0; onfi_id[i] != '\0'; i++)
+		if (NFC_READ_BYTE(nfc) != onfi_id[i]) {
+			nand_debug(NDBG_BUS,"ONFI non-compliant");
+			*onfi_compliant = 0;
+			return (0);
+		}
+
+	nand_debug(NDBG_BUS,"ONFI compliant");
+	*onfi_compliant = 1;
+
+	return (0);
+}
+
+static int
+nand_reset(device_t bus)
+{
+	device_t nfc;
+	nand_debug(NDBG_BUS,"resetting...");
+
+	nfc = device_get_parent(bus);
+
+	if (NFC_SEND_COMMAND(nfc, NAND_CMD_RESET) != 0) {
+		nand_debug(NDBG_BUS,"Error : could not sent RESET command");
+		return (ENXIO);
+	}
+
+	if (NFC_START_COMMAND(nfc) != 0) {
+		nand_debug(NDBG_BUS,"Error : could not start RESET command");
+		return (ENXIO);
+	}
+
+	DELAY(1000);
+
+	return (0);
+}
+
+void
+nandbus_lock(device_t dev)
+{
+	struct nandbus_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	mtx_lock(&sc->nandbus_mtx);
+	if (sc->busy)
+		cv_wait(&sc->nandbus_cv, &sc->nandbus_mtx);
+	sc->busy = 1;
+	mtx_unlock(&sc->nandbus_mtx);
+}
+
+void
+nandbus_unlock(device_t dev)
+{
+	struct nandbus_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	mtx_lock(&sc->nandbus_mtx);
+	sc->busy = 0;
+	cv_signal(&sc->nandbus_cv);
+	mtx_unlock(&sc->nandbus_mtx);
+}
+
+int
+nandbus_select_cs(device_t dev, uint8_t cs)
+{
+
+	return (NFC_SELECT_CS(device_get_parent(dev), cs));
+}
+
+int
+nandbus_send_command(device_t dev, uint8_t command)
+{
+	int err;
+
+	if ((err = NFC_SEND_COMMAND(device_get_parent(dev), command)))
+		nand_debug(NDBG_BUS,"Err: Could not send command %x, err %x",
+		    command, err);
+
+	return (err);
+}
+
+int
+nandbus_send_address(device_t dev, uint8_t address)
+{
+	int err;
+
+	if ((err = NFC_SEND_ADDRESS(device_get_parent(dev), address)))
+		nand_debug(NDBG_BUS,"Err: Could not send address %x, err %x",
+		    address, err);
+
+	return (err);
+}
+
+int
+nandbus_start_command(device_t dev)
+{
+	int err;
+
+	if ((err = NFC_START_COMMAND(device_get_parent(dev))))
+		nand_debug(NDBG_BUS,"Err: Could not start command, err %x",
+		    err);
+
+	return (err);
+}
+
+void
+nandbus_read_buffer(device_t dev, void *buf, uint32_t len)
+{
+
+	NFC_READ_BUF(device_get_parent(dev), buf, len);
+}
+
+void
+nandbus_write_buffer(device_t dev, void *buf, uint32_t len)
+{
+
+	NFC_WRITE_BUF(device_get_parent(dev), buf, len);
+}
+
+int
+nandbus_get_status(device_t dev, uint8_t *status)
+{
+	int err;
+
+	if ((err = NANDBUS_SEND_COMMAND(dev, NAND_CMD_STATUS)))
+		return (err);
+	if ((err = NANDBUS_START_COMMAND(dev)))
+		return (err);
+
+	*status = NFC_READ_BYTE(device_get_parent(dev));
+
+	return (0);
+}
+
+int
+nandbus_wait_ready(device_t dev, uint8_t *status)
+{
+	struct timeval tv, tv2;
+
+	tv2.tv_sec = 0;
+	tv2.tv_usec = 50 * 5000; /* 250ms */
+
+	getmicrotime(&tv);
+	timevaladd(&tv, &tv2);
+
+	do {
+		if (NANDBUS_GET_STATUS(dev, status))
+			return (ENXIO);
+
+		if (*status & NAND_STATUS_RDY)
+			return (0);
+
+		getmicrotime(&tv2);
+	} while (timevalcmp(&tv2, &tv, <=));
+
+	return (EBUSY);
+}
+
+int
+nandbus_get_ecc(device_t dev, void *buf, uint32_t pagesize, void *ecc,
+    int *needwrite)
+{
+
+	return (NFC_GET_ECC(device_get_parent(dev), buf, pagesize, ecc, needwrite));
+}
+
+int
+nandbus_correct_ecc(device_t dev, void *buf, int pagesize, void *readecc,
+    void *calcecc)
+{
+
+	return (NFC_CORRECT_ECC(device_get_parent(dev), buf, pagesize,
+	    readecc, calcecc));
+}
+


Property changes on: trunk/sys/dev/nand/nandbus.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandbus.h
===================================================================
--- trunk/sys/dev/nand/nandbus.h	                        (rev 0)
+++ trunk/sys/dev/nand/nandbus.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,50 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nandbus.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _NANDBUS_H_
+#define _NANDBUS_H_
+
+struct nandbus_ivar {
+	uint8_t			 cs;
+	uint8_t 		 cols;
+	uint8_t 		 rows;
+	uint8_t			 man_id;
+	uint8_t			 dev_id;
+	uint8_t 		 is_onfi;
+	char			*chip_cdev_name;
+	struct nand_params	*params;
+};
+
+extern devclass_t	nandbus_devclass;
+extern driver_t		nandbus_driver;
+
+int  nandbus_create(device_t nfc);
+void nandbus_destroy(device_t nfc);
+
+#endif /* _NANDBUS_H_ */


Property changes on: trunk/sys/dev/nand/nandbus.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandbus_if.m
===================================================================
--- trunk/sys/dev/nand/nandbus_if.m	                        (rev 0)
+++ trunk/sys/dev/nand/nandbus_if.m	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,101 @@
+/* $MidnightBSD$ */
+#-
+# Copyright (C) 2009-2012 Semihalf
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD: stable/10/sys/dev/nand/nandbus_if.m 235537 2012-05-17 10:11:18Z gber $
+
+# NAND bus interface description
+#
+
+#include <sys/bus.h>
+#include <dev/nand/nand.h>
+
+INTERFACE nandbus;
+
+METHOD int get_status {
+	device_t	dev;
+	uint8_t	*	status;
+};
+
+METHOD void read_buffer {
+	device_t	dev;
+	void *		buf;
+	uint32_t	len;
+};
+
+METHOD int select_cs {
+	device_t	dev;
+	uint8_t		cs;
+};
+
+METHOD int send_command {
+	device_t	dev;
+	uint8_t		command;
+};
+
+METHOD int send_address {
+	device_t	dev;
+	uint8_t		address;
+};
+
+METHOD int start_command {
+	device_t	dev;
+};
+
+METHOD int wait_ready {
+	device_t 	dev;
+	uint8_t *	status;	
+}
+
+METHOD void write_buffer {
+	device_t	dev;
+	void *		buf;
+	uint32_t	len;
+};
+
+METHOD int get_ecc {
+	device_t	dev;
+	void *		buf;
+	uint32_t	pagesize;
+	void *		ecc;
+	int *		needwrite;
+};
+
+METHOD int correct_ecc {
+	device_t	dev;
+	void *		buf;
+	int		pagesize;
+	void *		readecc;
+	void *		calcecc;
+};
+
+METHOD void lock {
+	device_t	dev;
+};
+
+METHOD void unlock {
+	device_t	dev;
+};
+	


Property changes on: trunk/sys/dev/nand/nandbus_if.m
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: trunk/sys/dev/nand/nandsim.c
===================================================================
--- trunk/sys/dev/nand/nandsim.c	                        (rev 0)
+++ trunk/sys/dev/nand/nandsim.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,669 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Simulated NAND controller driver */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nandsim.c 328267 2018-01-23 02:16:06Z emaste $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandsim.h>
+#include <dev/nand/nandsim_chip.h>
+#include <dev/nand/nandsim_log.h>
+#include <dev/nand/nandsim_swap.h>
+
+struct sim_param sim;
+struct sim_ctrl_conf ctrls[MAX_SIM_DEV];
+
+static struct cdev *nandsim_dev;
+static d_ioctl_t nandsim_ioctl;
+
+static void nandsim_init_sim_param(struct sim_param *);
+static int nandsim_create_ctrl(struct sim_ctrl *);
+static int nandsim_destroy_ctrl(int);
+static int nandsim_ctrl_status(struct sim_ctrl *);
+static int nandsim_create_chip(struct sim_chip *);
+static int nandsim_destroy_chip(struct sim_ctrl_chip *);
+static int nandsim_chip_status(struct sim_chip *);
+static int nandsim_start_ctrl(int);
+static int nandsim_stop_ctrl(int);
+static int nandsim_inject_error(struct sim_error *);
+static int nandsim_get_block_state(struct sim_block_state *);
+static int nandsim_set_block_state(struct sim_block_state *);
+static int nandsim_modify(struct sim_mod *);
+static int nandsim_dump(struct sim_dump *);
+static int nandsim_restore(struct sim_dump *);
+static int nandsim_freeze(struct sim_ctrl_chip *);
+static void nandsim_print_log(struct sim_log *);
+static struct nandsim_chip *get_nandsim_chip(uint8_t, uint8_t);
+
+static struct cdevsw nandsim_cdevsw = {
+	.d_version =    D_VERSION,
+	.d_flags =	D_NEEDGIANT,
+	.d_ioctl =      nandsim_ioctl,
+	.d_name =       "nandsim",
+};
+
+int
+nandsim_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
+    int flags, struct thread *td)
+{
+	int ret = 0;
+
+	switch (cmd) {
+	case NANDSIM_SIM_PARAM:
+		nandsim_init_sim_param((struct sim_param *)data);
+		break;
+	case NANDSIM_CREATE_CTRL:
+		ret = nandsim_create_ctrl((struct sim_ctrl *)data);
+		break;
+	case NANDSIM_DESTROY_CTRL:
+		ret = nandsim_destroy_ctrl(*(int *)data);
+		break;
+	case NANDSIM_STATUS_CTRL:
+		ret = nandsim_ctrl_status((struct sim_ctrl *)data);
+		break;
+	case NANDSIM_CREATE_CHIP:
+		ret = nandsim_create_chip((struct sim_chip *)data);
+		break;
+	case NANDSIM_DESTROY_CHIP:
+		ret = nandsim_destroy_chip((struct sim_ctrl_chip *)data);
+		break;
+	case NANDSIM_STATUS_CHIP:
+		ret = nandsim_chip_status((struct sim_chip *)data);
+		break;
+	case NANDSIM_MODIFY:
+		ret = nandsim_modify((struct sim_mod *)data);
+		break;
+	case NANDSIM_START_CTRL:
+		ret = nandsim_start_ctrl(*(int *)data);
+		break;
+	case NANDSIM_STOP_CTRL:
+		ret = nandsim_stop_ctrl(*(int *)data);
+		break;
+	case NANDSIM_INJECT_ERROR:
+		ret = nandsim_inject_error((struct sim_error *)data);
+		break;
+	case NANDSIM_SET_BLOCK_STATE:
+		ret = nandsim_set_block_state((struct sim_block_state *)data);
+		break;
+	case NANDSIM_GET_BLOCK_STATE:
+		ret = nandsim_get_block_state((struct sim_block_state *)data);
+		break;
+	case NANDSIM_PRINT_LOG:
+		nandsim_print_log((struct sim_log *)data);
+		break;
+	case NANDSIM_DUMP:
+		ret = nandsim_dump((struct sim_dump *)data);
+		break;
+	case NANDSIM_RESTORE:
+		ret = nandsim_restore((struct sim_dump *)data);
+		break;
+	case NANDSIM_FREEZE:
+		ret = nandsim_freeze((struct sim_ctrl_chip *)data);
+		break;
+	default:
+		ret = EINVAL;
+		break;
+	}
+
+	return (ret);
+}
+
+static void
+nandsim_init_sim_param(struct sim_param *param)
+{
+
+	if (!param)
+		return;
+
+	nand_debug(NDBG_SIM,"log level:%d output %d", param->log_level,
+	    param->log_output);
+	nandsim_log_level = param->log_level;
+	nandsim_log_output = param->log_output;
+}
+
+static int
+nandsim_create_ctrl(struct sim_ctrl *ctrl)
+{
+	struct sim_ctrl_conf *sim_ctrl;
+
+	nand_debug(NDBG_SIM,"create controller num:%d cs:%d",ctrl->num,
+	    ctrl->num_cs);
+
+	if (ctrl->num >= MAX_SIM_DEV) {
+		return (EINVAL);
+	}
+
+	sim_ctrl = &ctrls[ctrl->num];
+	if(sim_ctrl->created)
+		return (EEXIST);
+
+	sim_ctrl->num = ctrl->num;
+	sim_ctrl->num_cs = ctrl->num_cs;
+	sim_ctrl->ecc = ctrl->ecc;
+	memcpy(sim_ctrl->ecc_layout, ctrl->ecc_layout,
+	    MAX_ECC_BYTES * sizeof(ctrl->ecc_layout[0]));
+	strlcpy(sim_ctrl->filename, ctrl->filename,
+	    FILENAME_SIZE);
+	sim_ctrl->created = 1;
+
+	return (0);
+}
+
+static int
+nandsim_destroy_ctrl(int ctrl_num)
+{
+
+	nand_debug(NDBG_SIM,"destroy controller num:%d", ctrl_num);
+
+	if (ctrl_num >= MAX_SIM_DEV) {
+		return (EINVAL);
+	}
+
+	if (!ctrls[ctrl_num].created) {
+		return (ENODEV);
+	}
+
+	if (ctrls[ctrl_num].running) {
+		return (EBUSY);
+	}
+
+	memset(&ctrls[ctrl_num], 0, sizeof(ctrls[ctrl_num]));
+
+	return (0);
+}
+
+static int
+nandsim_ctrl_status(struct sim_ctrl *ctrl)
+{
+
+	nand_debug(NDBG_SIM,"status controller num:%d cs:%d",ctrl->num,
+	    ctrl->num_cs);
+
+	if (ctrl->num >= MAX_SIM_DEV) {
+		return (EINVAL);
+	}
+
+	ctrl->num_cs = ctrls[ctrl->num].num_cs;
+	ctrl->ecc = ctrls[ctrl->num].ecc;
+	memcpy(ctrl->ecc_layout, ctrls[ctrl->num].ecc_layout,
+	    MAX_ECC_BYTES * sizeof(ctrl->ecc_layout[0]));
+	strlcpy(ctrl->filename, ctrls[ctrl->num].filename,
+	    FILENAME_SIZE);
+	ctrl->running = ctrls[ctrl->num].running;
+	ctrl->created = ctrls[ctrl->num].created;
+
+	return (0);
+}
+
+static int
+nandsim_create_chip(struct sim_chip *chip)
+{
+	struct sim_chip *sim_chip;
+
+	nand_debug(NDBG_SIM,"create chip num:%d at ctrl:%d", chip->num,
+	    chip->ctrl_num);
+
+	if (chip->ctrl_num >= MAX_SIM_DEV ||
+	    chip->num >= MAX_CTRL_CS) {
+		return (EINVAL);
+	}
+
+	if (ctrls[chip->ctrl_num].chips[chip->num]) {
+		return (EEXIST);
+	}
+
+	sim_chip = malloc(sizeof(*sim_chip), M_NANDSIM,
+	    M_WAITOK);
+	if (sim_chip == NULL) {
+		return (ENOMEM);
+	}
+
+	memcpy(sim_chip, chip, sizeof(*sim_chip));
+	ctrls[chip->ctrl_num].chips[chip->num] = sim_chip;
+	sim_chip->created = 1;
+
+	return (0);
+}
+
+static int
+nandsim_destroy_chip(struct sim_ctrl_chip *chip)
+{
+	struct sim_ctrl_conf *ctrl_conf;
+
+	nand_debug(NDBG_SIM,"destroy chip num:%d at ctrl:%d", chip->chip_num,
+	    chip->ctrl_num);
+
+	if (chip->ctrl_num >= MAX_SIM_DEV ||
+	    chip->chip_num >= MAX_CTRL_CS)
+		return (EINVAL);
+
+	ctrl_conf = &ctrls[chip->ctrl_num];
+
+	if (!ctrl_conf->created || !ctrl_conf->chips[chip->chip_num])
+		return (ENODEV);
+
+	if (ctrl_conf->running)
+		return (EBUSY);
+
+	free(ctrl_conf->chips[chip->chip_num], M_NANDSIM);
+	ctrl_conf->chips[chip->chip_num] = NULL;
+
+	return (0);
+}
+
+static int
+nandsim_chip_status(struct sim_chip *chip)
+{
+	struct sim_ctrl_conf *ctrl_conf;
+
+	nand_debug(NDBG_SIM,"status for chip num:%d at ctrl:%d", chip->num,
+	    chip->ctrl_num);
+
+	if (chip->ctrl_num >= MAX_SIM_DEV &&
+	    chip->num >= MAX_CTRL_CS)
+		return (EINVAL);
+
+	ctrl_conf = &ctrls[chip->ctrl_num];
+	if (!ctrl_conf->chips[chip->num])
+		chip->created = 0;
+	else
+		memcpy(chip, ctrl_conf->chips[chip->num], sizeof(*chip));
+
+	return (0);
+}
+
+static int
+nandsim_start_ctrl(int num)
+{
+	device_t nexus, ndev;
+	devclass_t nexus_devclass;
+	int ret = 0;
+
+	nand_debug(NDBG_SIM,"start ctlr num:%d", num);
+
+	if (num >= MAX_SIM_DEV)
+		return (EINVAL);
+
+	if (!ctrls[num].created)
+		return (ENODEV);
+
+	if (ctrls[num].running)
+		return (EBUSY);
+
+	/* We will add our device as a child of the nexus0 device */
+	if (!(nexus_devclass = devclass_find("nexus")) ||
+	    !(nexus = devclass_get_device(nexus_devclass, 0)))
+		return (EFAULT);
+
+	/*
+	 * Create a newbus device representing this frontend instance
+	 *
+	 * XXX powerpc nexus doesn't implement bus_add_child, so child
+	 * must be added by device_add_child().
+	 */
+#if defined(__powerpc__)
+	ndev = device_add_child(nexus, "nandsim", num);
+#else
+	ndev = BUS_ADD_CHILD(nexus, 0, "nandsim", num);
+#endif
+	if (!ndev)
+		return (EFAULT);
+
+	mtx_lock(&Giant);
+	ret = device_probe_and_attach(ndev);
+	mtx_unlock(&Giant);
+
+	if (ret == 0) {
+		ctrls[num].sim_ctrl_dev = ndev;
+		ctrls[num].running = 1;
+	}
+
+	return (ret);
+}
+
+static int
+nandsim_stop_ctrl(int num)
+{
+	device_t nexus;
+	devclass_t nexus_devclass;
+	int ret = 0;
+
+	nand_debug(NDBG_SIM,"stop controller num:%d", num);
+
+	if (num >= MAX_SIM_DEV) {
+		return (EINVAL);
+	}
+
+	if (!ctrls[num].created || !ctrls[num].running) {
+		return (ENODEV);
+	}
+
+	/* We will add our device as a child of the nexus0 device */
+	if (!(nexus_devclass = devclass_find("nexus")) ||
+	    !(nexus = devclass_get_device(nexus_devclass, 0))) {
+		return (ENODEV);
+	}
+
+	mtx_lock(&Giant);
+	if (ctrls[num].sim_ctrl_dev) {
+		ret = device_delete_child(nexus, ctrls[num].sim_ctrl_dev);
+		ctrls[num].sim_ctrl_dev = NULL;
+	}
+	mtx_unlock(&Giant);
+
+	ctrls[num].running = 0;
+
+	return (ret);
+}
+
+static struct nandsim_chip *
+get_nandsim_chip(uint8_t ctrl_num, uint8_t chip_num)
+{
+	struct nandsim_softc *sc;
+
+	if (!ctrls[ctrl_num].sim_ctrl_dev)
+		return (NULL);
+
+	sc = device_get_softc(ctrls[ctrl_num].sim_ctrl_dev);
+	return (sc->chips[chip_num]);
+}
+
+static void
+nandsim_print_log(struct sim_log *sim_log)
+{
+	struct nandsim_softc *sc;
+	int len1, len2;
+
+	if (!ctrls[sim_log->ctrl_num].sim_ctrl_dev)
+		return;
+
+	sc = device_get_softc(ctrls[sim_log->ctrl_num].sim_ctrl_dev);
+	if (sc->log_buff) {
+		len1 = strlen(&sc->log_buff[sc->log_idx + 1]);
+		if (len1 >= sim_log->len)
+			len1 = sim_log->len;
+		copyout(&sc->log_buff[sc->log_idx + 1], sim_log->log, len1);
+		len2 = strlen(sc->log_buff);
+		if (len2 >= (sim_log->len - len1))
+			len2 = (sim_log->len - len1);
+		copyout(sc->log_buff, &sim_log->log[len1], len2);
+		sim_log->len = len1 + len2;
+	}
+}
+
+static int
+nandsim_inject_error(struct sim_error *error)
+{
+	struct nandsim_chip *chip;
+	struct block_space *bs;
+	struct onfi_params *param;
+	int page, page_size, block, offset;
+
+	nand_debug(NDBG_SIM,"inject error for chip %d at ctrl %d\n",
+	    error->chip_num, error->ctrl_num);
+
+	if (error->ctrl_num >= MAX_SIM_DEV ||
+	    error->chip_num >= MAX_CTRL_CS)
+		return (EINVAL);
+
+	if (!ctrls[error->ctrl_num].created || !ctrls[error->ctrl_num].running)
+		return (ENODEV);
+
+	chip = get_nandsim_chip(error->ctrl_num, error->chip_num);
+	param = &chip->params;
+	page_size = param->bytes_per_page + param->spare_bytes_per_page;
+	block = error->page_num / param->pages_per_block;
+	page = error->page_num % param->pages_per_block;
+
+	bs = get_bs(chip->swap, block, 1);
+	if (!bs)
+		return (EINVAL);
+
+	offset = (page * page_size) + error->column;
+	memset(&bs->blk_ptr[offset], error->pattern, error->len);
+
+	return (0);
+}
+
+static int
+nandsim_set_block_state(struct sim_block_state *bs)
+{
+	struct onfi_params *params;
+	struct nandsim_chip *chip;
+	int blocks;
+
+	nand_debug(NDBG_SIM,"set block state for %d:%d block %d\n",
+	    bs->chip_num, bs->ctrl_num, bs->block_num);
+
+	if (bs->ctrl_num >= MAX_SIM_DEV ||
+	    bs->chip_num >= MAX_CTRL_CS)
+		return (EINVAL);
+
+	chip = get_nandsim_chip(bs->ctrl_num, bs->chip_num);
+	params = &chip->params;
+	blocks = params->luns * params->blocks_per_lun;
+
+	if (bs->block_num > blocks)
+		return (EINVAL);
+
+	chip->blk_state[bs->block_num].is_bad = bs->state;
+
+	if (bs->wearout >= 0)
+		chip->blk_state[bs->block_num].wear_lev = bs->wearout;
+
+	return (0);
+}
+
+static int
+nandsim_get_block_state(struct sim_block_state *bs)
+{
+	struct onfi_params *params;
+	struct nandsim_chip *chip;
+	int blocks;
+
+	if (bs->ctrl_num >= MAX_SIM_DEV ||
+	    bs->chip_num >= MAX_CTRL_CS)
+		return (EINVAL);
+
+	nand_debug(NDBG_SIM,"get block state for %d:%d block %d\n",
+	    bs->chip_num, bs->ctrl_num, bs->block_num);
+
+	chip = get_nandsim_chip(bs->ctrl_num, bs->chip_num);
+	params = &chip->params;
+	blocks = params->luns * params->blocks_per_lun;
+
+	if (bs->block_num > blocks)
+		return (EINVAL);
+
+	bs->state = chip->blk_state[bs->block_num].is_bad;
+	bs->wearout = chip->blk_state[bs->block_num].wear_lev;
+
+	return (0);
+}
+
+static int
+nandsim_dump(struct sim_dump *dump)
+{
+	struct nandsim_chip *chip;
+	struct block_space *bs;
+	int blk_size;
+
+	nand_debug(NDBG_SIM,"dump chip %d %d\n", dump->ctrl_num, dump->chip_num);
+
+	if (dump->ctrl_num >= MAX_SIM_DEV ||
+	    dump->chip_num >= MAX_CTRL_CS)
+		return (EINVAL);
+
+	chip = get_nandsim_chip(dump->ctrl_num, dump->chip_num);
+	blk_size = chip->cg.block_size +
+	    (chip->cg.oob_size * chip->cg.pgs_per_blk);
+
+	bs = get_bs(chip->swap, dump->block_num, 0);
+	if (!bs)
+		return (EINVAL);
+
+	if (dump->len > blk_size)
+		dump->len = blk_size;
+
+	copyout(bs->blk_ptr, dump->data, dump->len);
+
+	return (0);
+}
+
+static int
+nandsim_restore(struct sim_dump *dump)
+{
+	struct nandsim_chip *chip;
+	struct block_space *bs;
+	int blk_size;
+
+	nand_debug(NDBG_SIM,"restore chip %d %d\n", dump->ctrl_num,
+	    dump->chip_num);
+
+	if (dump->ctrl_num >= MAX_SIM_DEV ||
+	    dump->chip_num >= MAX_CTRL_CS)
+		return (EINVAL);
+
+	chip = get_nandsim_chip(dump->ctrl_num, dump->chip_num);
+	blk_size = chip->cg.block_size +
+	    (chip->cg.oob_size * chip->cg.pgs_per_blk);
+
+	bs = get_bs(chip->swap, dump->block_num, 1);
+	if (!bs)
+		return (EINVAL);
+
+	if (dump->len > blk_size)
+		dump->len = blk_size;
+
+
+	copyin(dump->data, bs->blk_ptr, dump->len);
+
+	return (0);
+}
+
+static int
+nandsim_freeze(struct sim_ctrl_chip *ctrl_chip)
+{
+	struct nandsim_chip *chip;
+
+	if (ctrl_chip->ctrl_num >= MAX_SIM_DEV ||
+	    ctrl_chip->chip_num >= MAX_CTRL_CS)
+		return (EINVAL);
+
+	chip = get_nandsim_chip(ctrl_chip->ctrl_num, ctrl_chip->chip_num);
+	nandsim_chip_freeze(chip);
+
+	return (0);
+}
+
+static int
+nandsim_modify(struct sim_mod *mod)
+{
+	struct sim_chip *sim_conf = NULL;
+	struct nandsim_chip *sim_chip = NULL;
+
+	nand_debug(NDBG_SIM,"modify ctlr %d chip %d", mod->ctrl_num,
+	    mod->chip_num);
+
+	if (mod->field != SIM_MOD_LOG_LEVEL) {
+		if (mod->ctrl_num >= MAX_SIM_DEV ||
+		    mod->chip_num >= MAX_CTRL_CS)
+			return (EINVAL);
+
+		sim_conf = ctrls[mod->ctrl_num].chips[mod->chip_num];
+		sim_chip = get_nandsim_chip(mod->ctrl_num, mod->chip_num);
+	}
+
+	switch (mod->field) {
+	case SIM_MOD_LOG_LEVEL:
+		nandsim_log_level = mod->new_value;
+		break;
+	case SIM_MOD_ERASE_TIME:
+		sim_conf->erase_time = sim_chip->erase_delay = mod->new_value;
+		break;
+	case SIM_MOD_PROG_TIME:
+		sim_conf->prog_time = sim_chip->prog_delay = mod->new_value;
+		break;
+	case SIM_MOD_READ_TIME:
+		sim_conf->read_time = sim_chip->read_delay = mod->new_value;
+		break;
+	case SIM_MOD_ERROR_RATIO:
+		sim_conf->error_ratio = mod->new_value;
+		sim_chip->error_ratio = mod->new_value;
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
+static int
+nandsim_modevent(module_t mod __unused, int type, void *data __unused)
+{
+	struct sim_ctrl_chip chip_ctrl;
+	int i, j;
+
+	switch (type) {
+	case MOD_LOAD:
+		nandsim_dev = make_dev(&nandsim_cdevsw, 0,
+		    UID_ROOT, GID_WHEEL, 0600, "nandsim.ioctl");
+		break;
+	case MOD_UNLOAD:
+		for (i = 0; i < MAX_SIM_DEV; i++) {
+			nandsim_stop_ctrl(i);
+			chip_ctrl.ctrl_num = i;
+			for (j = 0; j < MAX_CTRL_CS; j++) {
+				chip_ctrl.chip_num = j;
+				nandsim_destroy_chip(&chip_ctrl);
+			}
+			nandsim_destroy_ctrl(i);
+		}
+		destroy_dev(nandsim_dev);
+		break;
+	case MOD_SHUTDOWN:
+		break;
+	default:
+		return (EOPNOTSUPP);
+	}
+	return (0);
+}
+
+DEV_MODULE(nandsim, nandsim_modevent, NULL);
+MODULE_VERSION(nandsim, 1);
+MODULE_DEPEND(nandsim, nand, 1, 1, 1);
+MODULE_DEPEND(nandsim, alq, 1, 1, 1);


Property changes on: trunk/sys/dev/nand/nandsim.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandsim.h
===================================================================
--- trunk/sys/dev/nand/nandsim.h	                        (rev 0)
+++ trunk/sys/dev/nand/nandsim.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,176 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nandsim.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _NANDSIM_H_
+#define _NANDSIM_H_
+
+#include <sys/ioccom.h>
+#include <sys/types.h>
+
+#define MAX_SIM_DEV		4
+#define MAX_CTRL_CS		4
+#define MAX_ECC_BYTES		512
+#define MAX_BAD_BLOCKS		512
+#define DEV_MODEL_STR_SIZE	21
+#define MAN_STR_SIZE		13
+#define FILENAME_SIZE		20
+
+#define MAX_CHIPS	(MAX_SIM_DEV*MAX_CTRL_CS)
+
+#define NANDSIM_OUTPUT_NONE	0x0
+#define NANDSIM_OUTPUT_CONSOLE	0x1
+#define NANDSIM_OUTPUT_RAM	0x2
+#define NANDSIM_OUTPUT_FILE	0x3
+
+struct sim_ctrl_chip {
+	uint8_t		ctrl_num;
+	uint8_t		chip_num;
+};
+
+#define NANDSIM_BASE	'A'
+
+struct sim_param {
+	uint8_t	log_level;
+	uint8_t	log_output;
+};
+
+#define NANDSIM_SIM_PARAM	_IOW(NANDSIM_BASE, 1, struct sim_param)
+
+struct sim_ctrl {
+	uint8_t running;
+	uint8_t created;
+	uint8_t	num;
+	uint8_t	num_cs;
+	uint8_t ecc;
+	char	filename[FILENAME_SIZE];
+	uint16_t ecc_layout[MAX_ECC_BYTES];
+};
+#define NANDSIM_CREATE_CTRL	_IOW(NANDSIM_BASE, 2, struct sim_ctrl)
+#define NANDSIM_DESTROY_CTRL	_IOW(NANDSIM_BASE, 3, int)
+
+struct sim_chip {
+	uint8_t		num;
+	uint8_t		ctrl_num;
+	uint8_t		created;
+	uint8_t		device_id;
+	uint8_t		manufact_id;
+	char		device_model[DEV_MODEL_STR_SIZE];
+	char		manufacturer[MAN_STR_SIZE];
+	uint8_t		col_addr_cycles;
+	uint8_t		row_addr_cycles;
+	uint8_t		features;
+	uint8_t		width;
+	uint32_t	page_size;
+	uint32_t	oob_size;
+	uint32_t	pgs_per_blk;
+	uint32_t	blks_per_lun;
+	uint32_t	luns;
+
+	uint32_t	prog_time;
+	uint32_t	erase_time;
+	uint32_t	read_time;
+	uint32_t	ccs_time;
+
+	uint32_t	error_ratio;
+	uint32_t	wear_level;
+	uint32_t	bad_block_map[MAX_BAD_BLOCKS];
+	uint8_t		is_wp;
+};
+
+#define NANDSIM_CREATE_CHIP	_IOW(NANDSIM_BASE, 3, struct sim_chip)
+
+struct sim_chip_destroy {
+	uint8_t ctrl_num;
+	uint8_t chip_num;
+};
+#define NANDSIM_DESTROY_CHIP	_IOW(NANDSIM_BASE, 4, struct sim_chip_destroy)
+
+#define NANDSIM_START_CTRL	_IOW(NANDSIM_BASE, 5, int)
+#define NANDSIM_STOP_CTRL	_IOW(NANDSIM_BASE, 6, int)
+#define NANDSIM_RESTART_CTRL	_IOW(NANDSIM_BASE, 7, int)
+
+#define NANDSIM_STATUS_CTRL	_IOWR(NANDSIM_BASE, 8, struct sim_ctrl)
+#define NANDSIM_STATUS_CHIP	_IOWR(NANDSIM_BASE, 9, struct sim_chip)
+
+struct sim_mod {
+	uint8_t	chip_num;
+	uint8_t	ctrl_num;
+	uint32_t field;
+	uint32_t new_value;
+};
+#define SIM_MOD_LOG_LEVEL	0
+#define SIM_MOD_ERASE_TIME	1
+#define SIM_MOD_PROG_TIME	2
+#define SIM_MOD_READ_TIME	3
+#define SIM_MOD_CCS_TIME	4
+#define SIM_MOD_ERROR_RATIO	5
+
+#define NANDSIM_MODIFY	_IOW(NANDSIM_BASE, 10, struct sim_mod)
+#define NANDSIM_FREEZE	_IOW(NANDSIM_BASE, 11, struct sim_ctrl_chip)
+
+struct sim_error {
+	uint8_t		ctrl_num;
+	uint8_t		chip_num;
+	uint32_t	page_num;
+	uint32_t	column;
+	uint32_t	len;
+	uint32_t	pattern;
+};
+#define NANDSIM_INJECT_ERROR	_IOW(NANDSIM_BASE, 20, struct sim_error)
+
+#define NANDSIM_GOOD_BLOCK	0
+#define NANDSIM_BAD_BLOCK	1
+struct sim_block_state {
+	uint8_t		ctrl_num;
+	uint8_t		chip_num;
+	uint32_t	block_num;
+	int		wearout;
+	uint8_t		state;
+};
+#define NANDSIM_SET_BLOCK_STATE	_IOW(NANDSIM_BASE, 21, struct sim_block_state)
+#define NANDSIM_GET_BLOCK_STATE	_IOWR(NANDSIM_BASE, 22, struct sim_block_state)
+
+struct sim_log {
+	uint8_t		ctrl_num;
+	char*		log;
+	size_t		len;
+};
+#define NANDSIM_PRINT_LOG	_IOWR(NANDSIM_BASE, 23, struct sim_log)
+
+struct sim_dump {
+	uint8_t		ctrl_num;
+	uint8_t		chip_num;
+	uint32_t	block_num;
+	uint32_t	len;
+	void*		data;
+};
+#define NANDSIM_DUMP	_IOWR(NANDSIM_BASE, 24, struct sim_dump)
+#define NANDSIM_RESTORE	_IOWR(NANDSIM_BASE, 25, struct sim_dump)
+
+#endif /* _NANDSIM_H_ */


Property changes on: trunk/sys/dev/nand/nandsim.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandsim_chip.c
===================================================================
--- trunk/sys/dev/nand/nandsim_chip.c	                        (rev 0)
+++ trunk/sys/dev/nand/nandsim_chip.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,902 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nandsim_chip.c 314667 2017-03-04 13:03:31Z avg $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/kthread.h>
+#include <sys/unistd.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandsim_chip.h>
+#include <dev/nand/nandsim_log.h>
+#include <dev/nand/nandsim_swap.h>
+
+MALLOC_DEFINE(M_NANDSIM, "NANDsim", "NANDsim dynamic data");
+
+#define NANDSIM_CHIP_LOCK(chip)		mtx_lock(&(chip)->ns_lock)
+#define	NANDSIM_CHIP_UNLOCK(chip)	mtx_unlock(&(chip)->ns_lock)
+
+static nandsim_evh_t erase_evh;
+static nandsim_evh_t idle_evh;
+static nandsim_evh_t poweron_evh;
+static nandsim_evh_t reset_evh;
+static nandsim_evh_t read_evh;
+static nandsim_evh_t readid_evh;
+static nandsim_evh_t readparam_evh;
+static nandsim_evh_t write_evh;
+
+static void nandsim_loop(void *);
+static void nandsim_undefined(struct nandsim_chip *, uint8_t);
+static void nandsim_bad_address(struct nandsim_chip *, uint8_t *);
+static void nandsim_ignore_address(struct nandsim_chip *, uint8_t);
+static void nandsim_sm_error(struct nandsim_chip *);
+static void nandsim_start_handler(struct nandsim_chip *, nandsim_evh_t);
+
+static void nandsim_callout_eh(void *);
+static int  nandsim_delay(struct nandsim_chip *, int);
+
+static int  nandsim_bbm_init(struct nandsim_chip *, uint32_t, uint32_t *);
+static int  nandsim_blk_state_init(struct nandsim_chip *, uint32_t, uint32_t);
+static void nandsim_blk_state_destroy(struct nandsim_chip *);
+static int  nandchip_is_block_valid(struct nandsim_chip *, int);
+
+static void nandchip_set_status(struct nandsim_chip *, uint8_t);
+static void nandchip_clear_status(struct nandsim_chip *, uint8_t);
+
+struct proc *nandsim_proc;
+
+struct nandsim_chip *
+nandsim_chip_init(struct nandsim_softc* sc, uint8_t chip_num,
+    struct sim_chip *sim_chip)
+{
+	struct nandsim_chip *chip;
+	struct onfi_params *chip_param;
+	char swapfile[20];
+	uint32_t size;
+	int error;
+
+	chip = malloc(sizeof(*chip), M_NANDSIM, M_WAITOK | M_ZERO);
+	if (!chip)
+		return (NULL);
+
+	mtx_init(&chip->ns_lock, "nandsim lock", NULL, MTX_DEF);
+	callout_init(&chip->ns_callout, 1);
+	STAILQ_INIT(&chip->nandsim_events);
+
+	chip->chip_num = chip_num;
+	chip->ctrl_num = sim_chip->ctrl_num;
+	chip->sc = sc;
+
+	if (!sim_chip->is_wp)
+		nandchip_set_status(chip, NAND_STATUS_WP);
+
+	chip_param = &chip->params;
+
+	chip->id.dev_id = sim_chip->device_id;
+	chip->id.man_id = sim_chip->manufact_id;
+
+	chip->error_ratio = sim_chip->error_ratio;
+	chip->wear_level = sim_chip->wear_level;
+	chip->prog_delay = sim_chip->prog_time;
+	chip->erase_delay = sim_chip->erase_time;
+	chip->read_delay = sim_chip->read_time;
+
+	chip_param->t_prog = sim_chip->prog_time;
+	chip_param->t_bers = sim_chip->erase_time;
+	chip_param->t_r = sim_chip->read_time;
+	bcopy("onfi", &chip_param->signature, 4);
+
+	chip_param->manufacturer_id = sim_chip->manufact_id;
+	strncpy(chip_param->manufacturer_name, sim_chip->manufacturer, 12);
+	chip_param->manufacturer_name[11] = 0;
+	strncpy(chip_param->device_model, sim_chip->device_model, 20);
+	chip_param->device_model[19] = 0;
+
+	chip_param->bytes_per_page = sim_chip->page_size;
+	chip_param->spare_bytes_per_page = sim_chip->oob_size;
+	chip_param->pages_per_block = sim_chip->pgs_per_blk;
+	chip_param->blocks_per_lun = sim_chip->blks_per_lun;
+	chip_param->luns = sim_chip->luns;
+
+	init_chip_geom(&chip->cg, chip_param->luns, chip_param->blocks_per_lun,
+	    chip_param->pages_per_block, chip_param->bytes_per_page,
+	    chip_param->spare_bytes_per_page);
+
+	chip_param->address_cycles = sim_chip->row_addr_cycles |
+	    (sim_chip->col_addr_cycles << 4);
+	chip_param->features = sim_chip->features;
+	if (sim_chip->width == 16)
+		chip_param->features |= ONFI_FEAT_16BIT;
+
+	size = chip_param->blocks_per_lun * chip_param->luns;
+
+	error = nandsim_blk_state_init(chip, size, sim_chip->wear_level);
+	if (error) {
+		mtx_destroy(&chip->ns_lock);
+		free(chip, M_NANDSIM);
+		return (NULL);
+	}
+
+	error = nandsim_bbm_init(chip, size, sim_chip->bad_block_map);
+	if (error) {
+		mtx_destroy(&chip->ns_lock);
+		nandsim_blk_state_destroy(chip);
+		free(chip, M_NANDSIM);
+		return (NULL);
+	}
+
+	nandsim_start_handler(chip, poweron_evh);
+
+	nand_debug(NDBG_SIM,"Create thread for chip%d [%8p]", chip->chip_num,
+	    chip);
+	/* Create chip thread */
+	error = kproc_kthread_add(nandsim_loop, chip, &nandsim_proc,
+	    &chip->nandsim_td, RFSTOPPED | RFHIGHPID,
+	    0, "nandsim", "chip");
+	if (error) {
+		mtx_destroy(&chip->ns_lock);
+		nandsim_blk_state_destroy(chip);
+		free(chip, M_NANDSIM);
+		return (NULL);
+	}
+
+	thread_lock(chip->nandsim_td);
+	sched_class(chip->nandsim_td, PRI_REALTIME);
+	sched_add(chip->nandsim_td, SRQ_BORING);
+	thread_unlock(chip->nandsim_td);
+
+	size = (chip_param->bytes_per_page +
+	    chip_param->spare_bytes_per_page) *
+	    chip_param->pages_per_block;
+
+	sprintf(swapfile, "chip%d%d.swp", chip->ctrl_num, chip->chip_num);
+	chip->swap = nandsim_swap_init(swapfile, chip_param->blocks_per_lun *
+	    chip_param->luns, size);
+	if (!chip->swap)
+		nandsim_chip_destroy(chip);
+
+	/* Wait for new thread to enter main loop */
+	tsleep(chip->nandsim_td, PWAIT, "ns_chip", 1 * hz);
+
+	return (chip);
+}
+
+static int
+nandsim_blk_state_init(struct nandsim_chip *chip, uint32_t size,
+    uint32_t wear_lev)
+{
+	int i;
+
+	if (!chip || size == 0)
+		return (-1);
+
+	chip->blk_state = malloc(size * sizeof(struct nandsim_block_state),
+	    M_NANDSIM, M_WAITOK | M_ZERO);
+	if (!chip->blk_state) {
+		return (-1);
+	}
+
+	for (i = 0; i < size; i++) {
+		if (wear_lev)
+			chip->blk_state[i].wear_lev = wear_lev;
+		else
+			chip->blk_state[i].wear_lev = -1;
+	}
+
+	return (0);
+}
+
+static void
+nandsim_blk_state_destroy(struct nandsim_chip *chip)
+{
+
+	if (chip && chip->blk_state)
+		free(chip->blk_state, M_NANDSIM);
+}
+
+static int
+nandsim_bbm_init(struct nandsim_chip *chip, uint32_t size,
+    uint32_t *sim_bbm)
+{
+	uint32_t index;
+	int i;
+
+	if ((chip == NULL) || (size == 0))
+		return (-1);
+
+	if (chip->blk_state == NULL)
+		return (-1);
+
+	if (sim_bbm == NULL)
+		return (0);
+
+	for (i = 0; i < MAX_BAD_BLOCKS; i++) {
+		index = sim_bbm[i];
+
+		if (index == 0xffffffff)
+			break;
+		else if (index > size)
+			return (-1);
+		else
+			chip->blk_state[index].is_bad = 1;
+	}
+
+	return (0);
+}
+
+void
+nandsim_chip_destroy(struct nandsim_chip *chip)
+{
+	struct nandsim_ev *ev;
+
+	ev = create_event(chip, NANDSIM_EV_EXIT, 0);
+	if (ev)
+		send_event(ev);
+}
+
+void
+nandsim_chip_freeze(struct nandsim_chip *chip)
+{
+
+	chip->flags |= NANDSIM_CHIP_FROZEN;
+}
+
+static void
+nandsim_loop(void *arg)
+{
+	struct nandsim_chip *chip = (struct nandsim_chip *)arg;
+	struct nandsim_ev *ev;
+
+	nand_debug(NDBG_SIM,"Start main loop for chip%d [%8p]", chip->chip_num,
+	    chip);
+	for(;;) {
+		NANDSIM_CHIP_LOCK(chip);
+		if (!(chip->flags & NANDSIM_CHIP_ACTIVE)) {
+			chip->flags |= NANDSIM_CHIP_ACTIVE;
+			wakeup(chip->nandsim_td);
+		}
+
+		if (STAILQ_EMPTY(&chip->nandsim_events)) {
+			nand_debug(NDBG_SIM,"Chip%d [%8p] going sleep",
+			    chip->chip_num, chip);
+			msleep(chip, &chip->ns_lock, PRIBIO, "nandev", 0);
+		}
+
+		ev = STAILQ_FIRST(&chip->nandsim_events);
+		STAILQ_REMOVE_HEAD(&chip->nandsim_events, links);
+		NANDSIM_CHIP_UNLOCK(chip);
+		if (ev->type == NANDSIM_EV_EXIT) {
+			NANDSIM_CHIP_LOCK(chip);
+			destroy_event(ev);
+			wakeup(ev);
+			while (!STAILQ_EMPTY(&chip->nandsim_events)) {
+				ev = STAILQ_FIRST(&chip->nandsim_events);
+				STAILQ_REMOVE_HEAD(&chip->nandsim_events,
+				    links);
+				destroy_event(ev);
+				wakeup(ev);
+			};
+			NANDSIM_CHIP_UNLOCK(chip);
+			nandsim_log(chip, NANDSIM_LOG_SM, "destroyed\n");
+			mtx_destroy(&chip->ns_lock);
+			nandsim_blk_state_destroy(chip);
+			nandsim_swap_destroy(chip->swap);
+			free(chip, M_NANDSIM);
+			nandsim_proc = NULL;
+
+			kthread_exit();
+		}
+
+		if (!(chip->flags & NANDSIM_CHIP_FROZEN)) {
+			nand_debug(NDBG_SIM,"Chip [%x] get event [%x]",
+			    chip->chip_num, ev->type);
+			chip->ev_handler(chip, ev->type, ev->data);
+		}
+
+		wakeup(ev);
+		destroy_event(ev);
+	}
+
+}
+
+struct nandsim_ev *
+create_event(struct nandsim_chip *chip, uint8_t type, uint8_t data_size)
+{
+	struct nandsim_ev *ev;
+
+	ev = malloc(sizeof(*ev), M_NANDSIM, M_NOWAIT | M_ZERO);
+	if (!ev) {
+		nand_debug(NDBG_SIM,"Cannot create event");
+		return (NULL);
+	}
+
+	if (data_size > 0)
+		ev->data = malloc(sizeof(*ev), M_NANDSIM, M_NOWAIT | M_ZERO);
+	ev->type = type;
+	ev->chip = chip;
+
+	return (ev);
+}
+
+void
+destroy_event(struct nandsim_ev *ev)
+{
+
+	if (ev->data)
+		free(ev->data, M_NANDSIM);
+	free(ev, M_NANDSIM);
+}
+
+int
+send_event(struct nandsim_ev *ev)
+{
+	struct nandsim_chip *chip = ev->chip;
+
+	if (!(chip->flags & NANDSIM_CHIP_FROZEN)) {
+		nand_debug(NDBG_SIM,"Chip%d [%p] send event %x",
+		    chip->chip_num, chip, ev->type);
+
+		NANDSIM_CHIP_LOCK(chip);
+		STAILQ_INSERT_TAIL(&chip->nandsim_events, ev, links);
+		NANDSIM_CHIP_UNLOCK(chip);
+
+		wakeup(chip);
+		if ((ev->type != NANDSIM_EV_TIMEOUT) && chip->nandsim_td &&
+		    (curthread != chip->nandsim_td))
+			tsleep(ev, PWAIT, "ns_ev", 5 * hz);
+	}
+
+	return (0);
+}
+
+static void
+nandsim_callout_eh(void *arg)
+{
+	struct nandsim_ev *ev = (struct nandsim_ev *)arg;
+
+	send_event(ev);
+}
+
+static int
+nandsim_delay(struct nandsim_chip *chip, int timeout)
+{
+	struct nandsim_ev *ev;
+	struct timeval delay;
+	int tm;
+
+	nand_debug(NDBG_SIM,"Chip[%d] Set delay: %d", chip->chip_num, timeout);
+
+	ev = create_event(chip, NANDSIM_EV_TIMEOUT, 0);
+	if (!ev)
+		return (-1);
+
+	chip->sm_state = NANDSIM_STATE_TIMEOUT;
+	tm = (timeout/10000) * (hz / 100);
+	if (callout_reset(&chip->ns_callout, tm, nandsim_callout_eh, ev))
+		return (-1);
+
+	delay.tv_sec = chip->read_delay / 1000000;
+	delay.tv_usec = chip->read_delay % 1000000;
+	timevaladd(&chip->delay_tv, &delay);
+
+	return (0);
+}
+
+static void
+nandsim_start_handler(struct nandsim_chip *chip, nandsim_evh_t evh)
+{
+	struct nandsim_ev *ev;
+
+	chip->ev_handler = evh;
+
+	nand_debug(NDBG_SIM,"Start handler %p for chip%d [%p]", evh,
+	    chip->chip_num, chip);
+	ev = create_event(chip, NANDSIM_EV_START, 0);
+	if (!ev)
+		nandsim_sm_error(chip);
+
+	send_event(ev);
+}
+
+static void
+nandchip_set_data(struct nandsim_chip *chip, uint8_t *data, uint32_t len,
+    uint32_t idx)
+{
+
+	nand_debug(NDBG_SIM,"Chip [%x] data %p [%x] at %x", chip->chip_num,
+	    data, len, idx);
+	chip->data.data_ptr = data;
+	chip->data.size = len;
+	chip->data.index = idx;
+}
+
+static int
+nandchip_chip_space(struct nandsim_chip *chip, int32_t row, int32_t column,
+    size_t size, uint8_t writing)
+{
+	struct block_space *blk_space;
+	uint32_t lun, block, page, offset, block_size;
+	int err;
+
+	block_size = chip->cg.block_size +
+	    (chip->cg.oob_size * chip->cg.pgs_per_blk);
+
+	err = nand_row_to_blkpg(&chip->cg, row, &lun, &block, &page);
+	if (err) {
+		nand_debug(NDBG_SIM,"cannot get address\n");
+		return (-1);
+	}
+
+	if (!nandchip_is_block_valid(chip, block)) {
+		nandchip_set_data(chip, NULL, 0, 0);
+		return (-1);
+	}
+
+	blk_space = get_bs(chip->swap, block, writing);
+	if (!blk_space) {
+		nandchip_set_data(chip, NULL, 0, 0);
+		return (-1);
+	}
+
+	if (size > block_size)
+		size = block_size;
+
+	if (size == block_size) {
+		offset = 0;
+		column = 0;
+	} else
+		offset = page * (chip->cg.page_size + chip->cg.oob_size);
+
+	nandchip_set_data(chip, &blk_space->blk_ptr[offset], size, column);
+
+	return (0);
+}
+
+static int
+nandchip_get_addr_byte(struct nandsim_chip *chip, void *data, uint32_t *value)
+{
+	int ncycles = 0;
+	uint8_t byte;
+	uint8_t *buffer;
+
+	buffer = (uint8_t *)value;
+	byte = *((uint8_t *)data);
+
+	KASSERT((chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW ||
+	    chip->sm_state == NANDSIM_STATE_WAIT_ADDR_COL),
+	    ("unexpected state"));
+
+	if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW) {
+		ncycles = chip->params.address_cycles & 0xf;
+		buffer[chip->sm_addr_cycle++] = byte;
+	} else if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_COL) {
+		ncycles = (chip->params.address_cycles >> 4) & 0xf;
+		buffer[chip->sm_addr_cycle++] = byte;
+	}
+
+	nand_debug(NDBG_SIM, "Chip [%x] read addr byte: %02x (%d of %d)\n",
+	    chip->chip_num, byte, chip->sm_addr_cycle, ncycles);
+
+	if (chip->sm_addr_cycle == ncycles) {
+		chip->sm_addr_cycle = 0;
+		return (0);
+	}
+
+	return (1);
+}
+
+static int
+nandchip_is_block_valid(struct nandsim_chip *chip, int block_num)
+{
+
+	if (!chip || !chip->blk_state)
+		return (0);
+
+	if (chip->blk_state[block_num].wear_lev == 0 ||
+	    chip->blk_state[block_num].is_bad)
+		return (0);
+
+	return (1);
+}
+
+static void
+nandchip_set_status(struct nandsim_chip *chip, uint8_t flags)
+{
+
+	chip->chip_status |= flags;
+}
+
+static void
+nandchip_clear_status(struct nandsim_chip *chip, uint8_t flags)
+{
+
+	chip->chip_status &= ~flags;
+}
+
+uint8_t
+nandchip_get_status(struct nandsim_chip *chip)
+{
+	return (chip->chip_status);
+}
+
+void
+nandsim_chip_timeout(struct nandsim_chip *chip)
+{
+	struct timeval tv;
+
+	getmicrotime(&tv);
+
+	if (chip->sm_state == NANDSIM_STATE_TIMEOUT &&
+	    timevalcmp(&tv, &chip->delay_tv, >=)) {
+		nandchip_set_status(chip, NAND_STATUS_RDY);
+	}
+}
+void
+poweron_evh(struct nandsim_chip *chip, uint32_t type, void *data)
+{
+	uint8_t cmd;
+
+	if (type == NANDSIM_EV_START)
+		chip->sm_state = NANDSIM_STATE_IDLE;
+	else if (type == NANDSIM_EV_CMD) {
+		cmd = *(uint8_t *)data;
+		switch(cmd) {
+		case NAND_CMD_RESET:
+			nandsim_log(chip, NANDSIM_LOG_SM, "in RESET state\n");
+			nandsim_start_handler(chip, reset_evh);
+			break;
+		default:
+			nandsim_undefined(chip, type);
+			break;
+		}
+	} else
+		nandsim_undefined(chip, type);
+}
+
+void
+idle_evh(struct nandsim_chip *chip, uint32_t type, void *data)
+{
+	uint8_t cmd;
+
+	if (type == NANDSIM_EV_START) {
+		nandsim_log(chip, NANDSIM_LOG_SM, "in IDLE state\n");
+		chip->sm_state = NANDSIM_STATE_WAIT_CMD;
+	} else if (type == NANDSIM_EV_CMD) {
+		nandchip_clear_status(chip, NAND_STATUS_FAIL);
+		getmicrotime(&chip->delay_tv);
+		cmd = *(uint8_t *)data;
+		switch(cmd) {
+		case NAND_CMD_READ_ID:
+			nandsim_start_handler(chip, readid_evh);
+			break;
+		case NAND_CMD_READ_PARAMETER:
+			nandsim_start_handler(chip, readparam_evh);
+			break;
+		case NAND_CMD_READ:
+			nandsim_start_handler(chip, read_evh);
+			break;
+		case NAND_CMD_PROG:
+			nandsim_start_handler(chip, write_evh);
+			break;
+		case NAND_CMD_ERASE:
+			nandsim_start_handler(chip, erase_evh);
+			break;
+		default:
+			nandsim_undefined(chip, type);
+			break;
+		}
+	} else
+		nandsim_undefined(chip, type);
+}
+
+void
+readid_evh(struct nandsim_chip *chip, uint32_t type, void *data)
+{
+	struct onfi_params *params;
+	uint8_t addr;
+
+	params = &chip->params;
+
+	if (type == NANDSIM_EV_START) {
+		nandsim_log(chip, NANDSIM_LOG_SM, "in READID state\n");
+		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_BYTE;
+	} else if (type == NANDSIM_EV_ADDR) {
+
+		addr = *((uint8_t *)data);
+
+		if (addr == 0x0)
+			nandchip_set_data(chip, (uint8_t *)&chip->id, 2, 0);
+		else if (addr == ONFI_SIG_ADDR)
+			nandchip_set_data(chip, (uint8_t *)&params->signature,
+			    4, 0);
+		else
+			nandsim_bad_address(chip, &addr);
+
+		nandsim_start_handler(chip, idle_evh);
+	} else
+		nandsim_undefined(chip, type);
+}
+
+void
+readparam_evh(struct nandsim_chip *chip, uint32_t type, void *data)
+{
+	struct onfi_params *params;
+	uint8_t addr;
+
+	params = &chip->params;
+
+	if (type == NANDSIM_EV_START) {
+		nandsim_log(chip, NANDSIM_LOG_SM, "in READPARAM state\n");
+		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_BYTE;
+	} else if (type == NANDSIM_EV_ADDR) {
+		addr = *((uint8_t *)data);
+
+		if (addr == 0) {
+			nandchip_set_data(chip, (uint8_t *)params,
+			    sizeof(*params), 0);
+		} else
+			nandsim_bad_address(chip, &addr);
+
+		nandsim_start_handler(chip, idle_evh);
+	} else
+		nandsim_undefined(chip, type);
+}
+
+void
+read_evh(struct nandsim_chip *chip, uint32_t type, void *data)
+{
+	static uint32_t column = 0, row = 0;
+	uint32_t size;
+	uint8_t cmd;
+
+	size = chip->cg.page_size + chip->cg.oob_size;
+
+	switch (type) {
+	case NANDSIM_EV_START:
+		nandsim_log(chip, NANDSIM_LOG_SM, "in READ state\n");
+		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_COL;
+		break;
+	case NANDSIM_EV_ADDR:
+		if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_COL) {
+			if (nandchip_get_addr_byte(chip, data, &column))
+				break;
+
+			chip->sm_state = NANDSIM_STATE_WAIT_ADDR_ROW;
+		} else if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW) {
+			if (nandchip_get_addr_byte(chip, data, &row))
+				break;
+
+			chip->sm_state = NANDSIM_STATE_WAIT_CMD;
+		} else
+			nandsim_ignore_address(chip, *((uint8_t *)data));
+		break;
+	case NANDSIM_EV_CMD:
+		cmd = *(uint8_t *)data;
+		if (chip->sm_state == NANDSIM_STATE_WAIT_CMD &&
+		    cmd == NAND_CMD_READ_END) {
+			if (chip->read_delay != 0 &&
+			    nandsim_delay(chip, chip->read_delay) == 0)
+				nandchip_clear_status(chip, NAND_STATUS_RDY);
+			else {
+				nandchip_chip_space(chip, row, column, size, 0);
+				nandchip_set_status(chip, NAND_STATUS_RDY);
+				nandsim_start_handler(chip, idle_evh);
+			}
+		} else
+			nandsim_undefined(chip, type);
+		break;
+	case NANDSIM_EV_TIMEOUT:
+		if (chip->sm_state == NANDSIM_STATE_TIMEOUT) {
+			nandchip_chip_space(chip, row, column, size, 0);
+			nandchip_set_status(chip, NAND_STATUS_RDY);
+			nandsim_start_handler(chip, idle_evh);
+		} else
+			nandsim_undefined(chip, type);
+		break;
+	}
+}
+void
+write_evh(struct nandsim_chip *chip, uint32_t type, void *data)
+{
+	static uint32_t column, row;
+	uint32_t size;
+	uint8_t cmd;
+	int err;
+
+	size = chip->cg.page_size + chip->cg.oob_size;
+
+	switch(type) {
+	case NANDSIM_EV_START:
+		nandsim_log(chip, NANDSIM_LOG_SM, "in WRITE state\n");
+		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_COL;
+		break;
+	case NANDSIM_EV_ADDR:
+		if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_COL) {
+			if (nandchip_get_addr_byte(chip, data, &column))
+				break;
+
+			chip->sm_state = NANDSIM_STATE_WAIT_ADDR_ROW;
+		} else if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW) {
+			if (nandchip_get_addr_byte(chip, data, &row))
+				break;
+
+			err = nandchip_chip_space(chip, row, column, size, 1);
+			if (err == -1)
+				nandchip_set_status(chip, NAND_STATUS_FAIL);
+
+			chip->sm_state = NANDSIM_STATE_WAIT_CMD;
+		} else
+			nandsim_ignore_address(chip, *((uint8_t *)data));
+		break;
+	case NANDSIM_EV_CMD:
+		cmd = *(uint8_t *)data;
+		if (chip->sm_state == NANDSIM_STATE_WAIT_CMD &&
+		    cmd == NAND_CMD_PROG_END) {
+			if (chip->prog_delay != 0 &&
+			    nandsim_delay(chip, chip->prog_delay) == 0)
+				nandchip_clear_status(chip, NAND_STATUS_RDY);
+			else {
+				nandchip_set_status(chip, NAND_STATUS_RDY);
+				nandsim_start_handler(chip, idle_evh);
+			}
+		} else
+			nandsim_undefined(chip, type);
+		break;
+	case NANDSIM_EV_TIMEOUT:
+		if (chip->sm_state == NANDSIM_STATE_TIMEOUT) {
+			nandsim_start_handler(chip, idle_evh);
+			nandchip_set_status(chip, NAND_STATUS_RDY);
+		} else
+			nandsim_undefined(chip, type);
+		break;
+	}
+}
+
+void
+erase_evh(struct nandsim_chip *chip, uint32_t type, void *data)
+{
+	static uint32_t row, block_size;
+	uint32_t lun, block, page;
+	int err;
+	uint8_t cmd;
+
+	block_size = chip->cg.block_size +
+	    (chip->cg.oob_size * chip->cg.pgs_per_blk);
+
+	switch (type) {
+	case NANDSIM_EV_START:
+		nandsim_log(chip, NANDSIM_LOG_SM, "in ERASE state\n");
+		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_ROW;
+		break;
+	case NANDSIM_EV_CMD:
+		cmd = *(uint8_t *)data;
+		if (chip->sm_state == NANDSIM_STATE_WAIT_CMD &&
+		    cmd == NAND_CMD_ERASE_END) {
+			if (chip->data.data_ptr != NULL &&
+			    chip->data.size == block_size)
+				memset(chip->data.data_ptr, 0xff, block_size);
+			else
+				nand_debug(NDBG_SIM,"Bad block erase data\n");
+
+			err = nand_row_to_blkpg(&chip->cg, row, &lun,
+			    &block, &page);
+			if (!err) {
+				if (chip->blk_state[block].wear_lev > 0)
+					chip->blk_state[block].wear_lev--;
+			}
+
+			if (chip->erase_delay != 0 &&
+			    nandsim_delay(chip, chip->erase_delay) == 0)
+				nandchip_clear_status(chip, NAND_STATUS_RDY);
+			else {
+				nandchip_set_status(chip, NAND_STATUS_RDY);
+				nandsim_start_handler(chip, idle_evh);
+			}
+		} else
+			nandsim_undefined(chip, type);
+		break;
+	case NANDSIM_EV_ADDR:
+		if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW) {
+			if (nandchip_get_addr_byte(chip, data, &row))
+				break;
+
+			err = nandchip_chip_space(chip, row, 0, block_size, 1);
+			if (err == -1) {
+				nandchip_set_status(chip, NAND_STATUS_FAIL);
+			}
+			chip->sm_state = NANDSIM_STATE_WAIT_CMD;
+		} else
+			nandsim_ignore_address(chip, *((uint8_t *)data));
+		break;
+	case NANDSIM_EV_TIMEOUT:
+		if (chip->sm_state == NANDSIM_STATE_TIMEOUT) {
+			nandchip_set_status(chip, NAND_STATUS_RDY);
+			nandsim_start_handler(chip, idle_evh);
+		} else
+			nandsim_undefined(chip, type);
+		break;
+	}
+}
+
+void
+reset_evh(struct nandsim_chip *chip, uint32_t type, void *data)
+{
+
+	if (type == NANDSIM_EV_START) {
+		nandsim_log(chip, NANDSIM_LOG_SM, "in RESET state\n");
+		chip->sm_state = NANDSIM_STATE_TIMEOUT;
+		nandchip_set_data(chip, NULL, 0, 0);
+		DELAY(500);
+		nandsim_start_handler(chip, idle_evh);
+	} else
+		nandsim_undefined(chip, type);
+}
+
+static void
+nandsim_undefined(struct nandsim_chip *chip, uint8_t type)
+{
+
+	nandsim_log(chip, NANDSIM_LOG_ERR,
+	    "ERR: Chip received ev %x in state %x\n",
+	    type, chip->sm_state);
+	nandsim_start_handler(chip, idle_evh);
+}
+
+static void
+nandsim_bad_address(struct nandsim_chip *chip, uint8_t *addr)
+{
+
+	nandsim_log(chip, NANDSIM_LOG_ERR,
+	    "ERR: Chip received out of range address"
+	    "%02x%02x - %02x%02x%02x\n", addr[0], addr[1], addr[2],
+	    addr[3], addr[4]);
+}
+
+static void
+nandsim_ignore_address(struct nandsim_chip *chip, uint8_t byte)
+{
+	nandsim_log(chip, NANDSIM_LOG_SM, "ignored address byte: %d\n", byte);
+}
+
+static void
+nandsim_sm_error(struct nandsim_chip *chip)
+{
+
+	nandsim_log(chip, NANDSIM_LOG_ERR, "ERR: State machine error."
+	    "Restart required.\n");
+}


Property changes on: trunk/sys/dev/nand/nandsim_chip.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandsim_chip.h
===================================================================
--- trunk/sys/dev/nand/nandsim_chip.h	                        (rev 0)
+++ trunk/sys/dev/nand/nandsim_chip.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,160 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nandsim_chip.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _NANDSIM_CHIP_H
+#define _NANDSIM_CHIP_H
+
+#include <sys/malloc.h>
+#include <sys/callout.h>
+#include <dev/nand/nand.h>
+#include <dev/nand/nandsim.h>
+#include <dev/nand/nandsim_swap.h>
+
+MALLOC_DECLARE(M_NANDSIM);
+
+#define MAX_CS_NUM	4
+struct nandsim_chip;
+
+typedef void nandsim_evh_t(struct nandsim_chip *chip, uint32_t ev, void *data);
+
+enum addr_type {
+	ADDR_NONE,
+	ADDR_ID,
+	ADDR_ROW,
+	ADDR_ROWCOL
+};
+
+struct nandsim_softc {
+	struct nand_softc	nand_dev;
+	device_t		dev;
+
+	struct nandsim_chip	*chips[MAX_CS_NUM];
+	struct nandsim_chip	*active_chip;
+
+	uint8_t			address_cycle;
+	enum addr_type		address_type;
+	int			log_idx;
+	char			*log_buff;
+	struct alq		*alq;
+};
+
+struct nandsim_ev {
+	STAILQ_ENTRY(nandsim_ev)	links;
+	struct nandsim_chip		*chip;
+	uint8_t		type;
+	void		*data;
+};
+
+struct nandsim_data {
+	uint8_t		*data_ptr;
+	uint32_t	index;
+	uint32_t	size;
+};
+
+struct nandsim_block_state {
+	int32_t		wear_lev;
+	uint8_t		is_bad;
+};
+
+#define NANDSIM_CHIP_ACTIVE	0x1
+#define NANDSIM_CHIP_FROZEN	0x2
+#define NANDSIM_CHIP_GET_STATUS	0x4
+
+struct nandsim_chip {
+	struct nandsim_softc	*sc;
+	struct thread		*nandsim_td;
+
+	STAILQ_HEAD(, nandsim_ev) nandsim_events;
+	nandsim_evh_t		*ev_handler;
+	struct mtx		ns_lock;
+	struct callout		ns_callout;
+
+	struct chip_geom	cg;
+	struct nand_id		id;
+	struct onfi_params	params;
+	struct nandsim_data	data;
+	struct nandsim_block_state *blk_state;
+
+	struct chip_swap	*swap;
+
+	uint32_t	error_ratio;
+	uint32_t	wear_level;
+	uint32_t	sm_state;
+	uint32_t	sm_addr_cycle;
+
+	uint32_t	erase_delay;
+	uint32_t	prog_delay;
+	uint32_t	read_delay;
+	struct timeval	delay_tv;
+
+	uint8_t		flags;
+	uint8_t		chip_status;
+	uint8_t		ctrl_num;
+	uint8_t		chip_num;
+};
+
+struct sim_ctrl_conf {
+	uint8_t		num;
+	uint8_t		num_cs;
+	uint8_t		ecc;
+	uint8_t		running;
+	uint8_t		created;
+	device_t	sim_ctrl_dev;
+	struct sim_chip	*chips[MAX_CTRL_CS];
+	uint16_t	ecc_layout[MAX_ECC_BYTES];
+	char		filename[FILENAME_SIZE];
+};
+
+#define NANDSIM_STATE_IDLE		0x0
+#define NANDSIM_STATE_WAIT_ADDR_BYTE	0x1
+#define NANDSIM_STATE_WAIT_CMD		0x2
+#define NANDSIM_STATE_TIMEOUT		0x3
+#define	NANDSIM_STATE_WAIT_ADDR_ROW	0x4
+#define	NANDSIM_STATE_WAIT_ADDR_COL	0x5
+
+#define NANDSIM_EV_START	0x1
+#define NANDSIM_EV_CMD		0x2
+#define NANDSIM_EV_ADDR		0x3
+#define NANDSIM_EV_TIMEOUT	0x4
+#define NANDSIM_EV_EXIT		0xff
+
+struct nandsim_chip *nandsim_chip_init(struct nandsim_softc *,
+    uint8_t, struct sim_chip *);
+void nandsim_chip_destroy(struct nandsim_chip *);
+void nandsim_chip_freeze(struct nandsim_chip *);
+void nandsim_chip_timeout(struct nandsim_chip *);
+int nandsim_chip_check_bad_block(struct nandsim_chip *, int);
+
+uint8_t nandchip_get_status(struct nandsim_chip *);
+
+void destroy_event(struct nandsim_ev *);
+int send_event(struct nandsim_ev *);
+struct nandsim_ev *create_event(struct nandsim_chip *, uint8_t, uint8_t);
+
+#endif /*  _NANDSIM_CHIP_H */


Property changes on: trunk/sys/dev/nand/nandsim_chip.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandsim_ctrl.c
===================================================================
--- trunk/sys/dev/nand/nandsim_ctrl.c	                        (rev 0)
+++ trunk/sys/dev/nand/nandsim_ctrl.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,397 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Simulated NAND controller driver */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nandsim_ctrl.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/time.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandbus.h>
+#include <dev/nand/nandsim.h>
+#include <dev/nand/nandsim_log.h>
+#include <dev/nand/nandsim_chip.h>
+#include "nfc_if.h"
+
+#define ADDRESS_SIZE	5
+
+extern struct sim_ctrl_conf ctrls[MAX_SIM_DEV];
+
+static void	byte_corrupt(struct nandsim_chip *, uint8_t *);
+
+static int	nandsim_attach(device_t);
+static int	nandsim_detach(device_t);
+static int	nandsim_probe(device_t);
+
+static uint8_t	nandsim_read_byte(device_t);
+static uint16_t	nandsim_read_word(device_t);
+static int	nandsim_select_cs(device_t, uint8_t);
+static void	nandsim_write_byte(device_t, uint8_t);
+static void	nandsim_write_word(device_t, uint16_t);
+static void	nandsim_read_buf(device_t, void *, uint32_t);
+static void	nandsim_write_buf(device_t, void *, uint32_t);
+static int	nandsim_send_command(device_t, uint8_t);
+static int	nandsim_send_address(device_t, uint8_t);
+
+static device_method_t nandsim_methods[] = {
+	DEVMETHOD(device_probe,		nandsim_probe),
+	DEVMETHOD(device_attach,	nandsim_attach),
+	DEVMETHOD(device_detach,	nandsim_detach),
+
+	DEVMETHOD(nfc_select_cs,	nandsim_select_cs),
+	DEVMETHOD(nfc_send_command,	nandsim_send_command),
+	DEVMETHOD(nfc_send_address,	nandsim_send_address),
+	DEVMETHOD(nfc_read_byte,	nandsim_read_byte),
+	DEVMETHOD(nfc_read_word,	nandsim_read_word),
+	DEVMETHOD(nfc_write_byte,	nandsim_write_byte),
+	DEVMETHOD(nfc_read_buf,		nandsim_read_buf),
+	DEVMETHOD(nfc_write_buf,	nandsim_write_buf),
+
+	{ 0, 0 },
+};
+
+static driver_t nandsim_driver = {
+	"nandsim",
+	nandsim_methods,
+	sizeof(struct nandsim_softc),
+};
+
+static devclass_t nandsim_devclass;
+DRIVER_MODULE(nandsim, nexus, nandsim_driver, nandsim_devclass, 0, 0);
+DRIVER_MODULE(nandbus, nandsim, nandbus_driver, nandbus_devclass, 0, 0);
+
+static int
+nandsim_probe(device_t dev)
+{
+
+	device_set_desc(dev, "NAND controller simulator");
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+nandsim_attach(device_t dev)
+{
+	struct nandsim_softc *sc;
+	struct sim_ctrl_conf *params;
+	struct sim_chip *chip;
+	uint16_t *eccpos;
+	int i, err;
+
+	sc = device_get_softc(dev);
+	params = &ctrls[device_get_unit(dev)];
+
+	if (strlen(params->filename) == 0)
+		snprintf(params->filename, FILENAME_SIZE, "ctrl%d.log",
+		    params->num);
+
+	nandsim_log_init(sc, params->filename);
+	for (i = 0; i < params->num_cs; i++) {
+		chip = params->chips[i];
+		if (chip && chip->device_id != 0) {
+			sc->chips[i] = nandsim_chip_init(sc, i, chip);
+			if (chip->features & ONFI_FEAT_16BIT)
+				sc->nand_dev.flags |= NAND_16_BIT;
+		}
+	}
+
+	if (params->ecc_layout[0] != 0xffff)
+		eccpos = params->ecc_layout;
+	else
+		eccpos = NULL;
+
+	nand_init(&sc->nand_dev, dev, params->ecc, 0, 0, eccpos, "nandsim");
+
+	err = nandbus_create(dev);
+
+	return (err);
+}
+
+static int
+nandsim_detach(device_t dev)
+{
+	struct nandsim_softc *sc;
+	struct sim_ctrl_conf *params;
+	int i;
+
+	sc = device_get_softc(dev);
+	params = &ctrls[device_get_unit(dev)];
+
+	for (i = 0; i < params->num_cs; i++)
+		if (sc->chips[i] != NULL)
+			nandsim_chip_destroy(sc->chips[i]);
+
+	nandsim_log_close(sc);
+
+	return (0);
+}
+
+static int
+nandsim_select_cs(device_t dev, uint8_t cs)
+{
+	struct nandsim_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	if (cs >= MAX_CS_NUM)
+		return (EINVAL);
+
+	sc->active_chip = sc->chips[cs];
+
+	if (sc->active_chip)
+		nandsim_log(sc->active_chip, NANDSIM_LOG_EV,
+		    "Select cs %d\n", cs);
+
+	return (0);
+}
+
+static int
+nandsim_send_command(device_t dev, uint8_t command)
+{
+	struct nandsim_softc *sc;
+	struct nandsim_chip *chip;
+	struct nandsim_ev *ev;
+
+	sc = device_get_softc(dev);
+	chip = sc->active_chip;
+
+	if (chip == NULL)
+		return (0);
+
+	nandsim_log(chip, NANDSIM_LOG_EV, "Send command %x\n", command);
+
+	switch (command) {
+	case NAND_CMD_READ_ID:
+	case NAND_CMD_READ_PARAMETER:
+		sc->address_type = ADDR_ID;
+		break;
+	case NAND_CMD_ERASE:
+		sc->address_type = ADDR_ROW;
+		break;
+	case NAND_CMD_READ:
+	case NAND_CMD_PROG:
+		sc->address_type = ADDR_ROWCOL;
+		break;
+	default:
+		sc->address_type = ADDR_NONE;
+		break;
+	}
+
+	if (command == NAND_CMD_STATUS)
+		chip->flags |= NANDSIM_CHIP_GET_STATUS;
+	else {
+		ev = create_event(chip, NANDSIM_EV_CMD, 1);
+		*(uint8_t *)ev->data = command;
+		send_event(ev);
+	}
+
+	return (0);
+}
+
+static int
+nandsim_send_address(device_t dev, uint8_t addr)
+{
+	struct nandsim_ev *ev;
+	struct nandsim_softc *sc;
+	struct nandsim_chip *chip;
+
+	sc = device_get_softc(dev);
+	chip = sc->active_chip;
+
+	if (chip == NULL)
+		return (0);
+
+	KASSERT((sc->address_type != ADDR_NONE), ("unexpected address"));
+	nandsim_log(chip, NANDSIM_LOG_EV, "Send addr %x\n", addr);
+
+	ev = create_event(chip, NANDSIM_EV_ADDR, 1);
+
+	*((uint8_t *)(ev->data)) = addr;
+
+	send_event(ev);
+	return (0);
+}
+
+static uint8_t
+nandsim_read_byte(device_t dev)
+{
+	struct nandsim_softc *sc;
+	struct nandsim_chip *chip;
+	uint8_t ret = 0xff;
+
+	sc = device_get_softc(dev);
+	chip = sc->active_chip;
+
+	if (chip && !(chip->flags & NANDSIM_CHIP_FROZEN)) {
+		if (chip->flags & NANDSIM_CHIP_GET_STATUS) {
+			nandsim_chip_timeout(chip);
+			ret = nandchip_get_status(chip);
+			chip->flags &= ~NANDSIM_CHIP_GET_STATUS;
+		} else if (chip->data.index < chip->data.size) {
+			ret = chip->data.data_ptr[chip->data.index++];
+			byte_corrupt(chip, &ret);
+		}
+		nandsim_log(chip, NANDSIM_LOG_DATA, "read %02x\n", ret);
+	}
+
+	return (ret);
+}
+
+static uint16_t
+nandsim_read_word(device_t dev)
+{
+	struct nandsim_softc *sc;
+	struct nandsim_chip *chip;
+	uint16_t *data_ptr;
+	uint16_t ret = 0xffff;
+	uint8_t  *byte_ret = (uint8_t *)&ret;
+
+	sc = device_get_softc(dev);
+	chip = sc->active_chip;
+
+	if (chip && !(chip->flags & NANDSIM_CHIP_FROZEN)) {
+		if (chip->data.index < chip->data.size - 1) {
+			data_ptr =
+			    (uint16_t *)&(chip->data.data_ptr[chip->data.index]);
+			ret = *data_ptr;
+			chip->data.index += 2;
+			byte_corrupt(chip, byte_ret);
+			byte_corrupt(chip, byte_ret + 1);
+		}
+		nandsim_log(chip, NANDSIM_LOG_DATA, "read %04x\n", ret);
+	}
+
+	return (ret);
+}
+
+static void
+nandsim_write_byte(device_t dev, uint8_t byte)
+{
+	struct nandsim_softc *sc;
+	struct nandsim_chip *chip;
+
+	sc = device_get_softc(dev);
+	chip = sc->active_chip;
+
+	if (chip && !(chip->flags & NANDSIM_CHIP_FROZEN) &&
+	    (chip->data.index < chip->data.size)) {
+		byte_corrupt(chip, &byte);
+		chip->data.data_ptr[chip->data.index] &= byte;
+		chip->data.index++;
+		nandsim_log(chip, NANDSIM_LOG_DATA, "write %02x\n", byte);
+	}
+}
+
+static void
+nandsim_write_word(device_t dev, uint16_t word)
+{
+	struct nandsim_softc *sc;
+	struct nandsim_chip *chip;
+	uint16_t *data_ptr;
+	uint8_t  *byte_ptr = (uint8_t *)&word;
+
+	sc = device_get_softc(dev);
+	chip = sc->active_chip;
+
+	if (chip && !(chip->flags & NANDSIM_CHIP_FROZEN)) {
+		if ((chip->data.index + 1) < chip->data.size) {
+			byte_corrupt(chip, byte_ptr);
+			byte_corrupt(chip, byte_ptr + 1);
+			data_ptr =
+			    (uint16_t *)&(chip->data.data_ptr[chip->data.index]);
+			*data_ptr &= word;
+			chip->data.index += 2;
+		}
+
+		nandsim_log(chip, NANDSIM_LOG_DATA, "write %04x\n", word);
+	}
+}
+
+static void
+nandsim_read_buf(device_t dev, void *buf, uint32_t len)
+{
+	struct nandsim_softc *sc;
+	uint16_t *buf16 = (uint16_t *)buf;
+	uint8_t *buf8 = (uint8_t *)buf;
+	int i;
+
+	sc = device_get_softc(dev);
+
+	if (sc->nand_dev.flags & NAND_16_BIT) {
+		for (i = 0; i < len / 2; i++)
+			buf16[i] = nandsim_read_word(dev);
+	} else {
+		for (i = 0; i < len; i++)
+			buf8[i] = nandsim_read_byte(dev);
+	}
+}
+
+static void
+nandsim_write_buf(device_t dev, void *buf, uint32_t len)
+{
+	struct nandsim_softc *sc;
+	uint16_t *buf16 = (uint16_t *)buf;
+	uint8_t *buf8 = (uint8_t *)buf;
+	int i;
+
+	sc = device_get_softc(dev);
+
+	if (sc->nand_dev.flags & NAND_16_BIT) {
+		for (i = 0; i < len / 2; i++)
+			nandsim_write_word(dev, buf16[i]);
+	} else {
+		for (i = 0; i < len; i++)
+			nandsim_write_byte(dev, buf8[i]);
+	}
+}
+
+static void
+byte_corrupt(struct nandsim_chip *chip, uint8_t *byte)
+{
+	uint32_t rand;
+	uint8_t bit;
+
+	rand = random();
+	if ((rand % 1000000) < chip->error_ratio) {
+		bit = rand % 8;
+		if (*byte & (1 << bit))
+			*byte &= ~(1 << bit);
+		else
+			*byte |= (1 << bit);
+	}
+}


Property changes on: trunk/sys/dev/nand/nandsim_ctrl.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandsim_log.c
===================================================================
--- trunk/sys/dev/nand/nandsim_log.c	                        (rev 0)
+++ trunk/sys/dev/nand/nandsim_log.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,187 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nandsim_log.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/alq.h>
+#include <sys/time.h>
+
+#include <machine/stdarg.h>
+
+#include <dev/nand/nandsim_log.h>
+
+int  nandsim_log_level;
+int  nandsim_log_output;
+int  log_size = NANDSIM_RAM_LOG_SIZE;
+
+static int  nandsim_entry_size = NANDSIM_ENTRY_SIZE;
+static int  nandsim_entry_count = NANDSIM_ENTRY_COUNT;
+static int  str_index = 0;
+static char string[NANDSIM_ENTRY_SIZE + 1] = {0};
+
+int
+nandsim_log_init(struct nandsim_softc *sc, char *filename)
+{
+	int error = 0;
+
+	if (nandsim_log_output == NANDSIM_OUTPUT_FILE) {
+		error = alq_open(&sc->alq, filename,
+		    curthread->td_ucred, 0644,
+		    nandsim_entry_size, nandsim_entry_count);
+	} else if (nandsim_log_output == NANDSIM_OUTPUT_RAM) {
+		sc->log_buff = malloc(log_size, M_NANDSIM, M_WAITOK | M_ZERO);
+		if (!sc->log_buff)
+			error = ENOMEM;
+	}
+
+	return (error);
+}
+
+void
+nandsim_log_close(struct nandsim_softc *sc)
+{
+
+	if (nandsim_log_output == NANDSIM_OUTPUT_FILE) {
+		memset(&string[str_index], 0, NANDSIM_ENTRY_SIZE - str_index);
+		alq_write(sc->alq, (void *) string, ALQ_NOWAIT);
+		str_index = 0;
+		string[0] = '\0';
+		alq_close(sc->alq);
+	} else if (nandsim_log_output == NANDSIM_OUTPUT_RAM) {
+		free(sc->log_buff, M_NANDSIM);
+		sc->log_buff = NULL;
+	}
+}
+
+void
+nandsim_log(struct nandsim_chip *chip, int level, const char *fmt, ...)
+{
+	char hdr[TIME_STR_SIZE];
+	char tmp[NANDSIM_ENTRY_SIZE];
+	struct nandsim_softc *sc;
+	struct timeval currtime;
+	va_list ap;
+	int hdr_len, len, rest;
+
+	if (nandsim_log_output == NANDSIM_OUTPUT_NONE)
+		return;
+
+	if (chip == NULL)
+		return;
+
+	sc = chip->sc;
+	if (!sc->alq && nandsim_log_output == NANDSIM_OUTPUT_FILE)
+		return;
+
+	if (level <= nandsim_log_level) {
+		microtime(&currtime);
+		hdr_len = sprintf(hdr, "%08jd.%08li [chip:%d, ctrl:%d]: ",
+		    (intmax_t)currtime.tv_sec, currtime.tv_usec,
+		    chip->chip_num, chip->ctrl_num);
+
+		switch(nandsim_log_output) {
+		case NANDSIM_OUTPUT_CONSOLE:
+			printf("%s", hdr);
+			va_start(ap, fmt);
+			vprintf(fmt, ap);
+			va_end(ap);
+			break;
+		case NANDSIM_OUTPUT_RAM:
+			va_start(ap, fmt);
+			len = vsnprintf(tmp, NANDSIM_ENTRY_SIZE - 1, fmt, ap);
+			tmp[NANDSIM_ENTRY_SIZE - 1] = 0;
+			va_end(ap);
+
+			rest = log_size - sc->log_idx - 1;
+			if (rest >= hdr_len) {
+				bcopy(hdr, &sc->log_buff[sc->log_idx],
+				    hdr_len);
+				sc->log_idx += hdr_len;
+				sc->log_buff[sc->log_idx] = 0;
+			} else {
+				bcopy(hdr, &sc->log_buff[sc->log_idx], rest);
+				bcopy(&hdr[rest], sc->log_buff,
+				    hdr_len - rest);
+				sc->log_idx = hdr_len - rest;
+				sc->log_buff[sc->log_idx] = 0;
+			}
+
+			rest = log_size - sc->log_idx - 1;
+			if (rest >= len) {
+				bcopy(tmp, &sc->log_buff[sc->log_idx], len);
+				sc->log_idx += len;
+				sc->log_buff[sc->log_idx] = 0;
+			} else {
+				bcopy(tmp, &sc->log_buff[sc->log_idx], rest);
+				bcopy(&tmp[rest], sc->log_buff, len - rest);
+				sc->log_idx = len - rest;
+				sc->log_buff[sc->log_idx] = 0;
+			}
+
+			break;
+
+		case NANDSIM_OUTPUT_FILE:
+			va_start(ap, fmt);
+			len = vsnprintf(tmp, NANDSIM_ENTRY_SIZE - 1, fmt, ap);
+			tmp[NANDSIM_ENTRY_SIZE - 1] = 0;
+			va_end(ap);
+
+			rest = NANDSIM_ENTRY_SIZE - str_index;
+			if (rest >= hdr_len) {
+				strcat(string, hdr);
+				str_index += hdr_len;
+			} else {
+				strlcat(string, hdr, NANDSIM_ENTRY_SIZE + 1);
+				alq_write(sc->alq, (void *) string,
+				    ALQ_NOWAIT);
+				strcpy(string, &hdr[rest]);
+				str_index = hdr_len - rest;
+			}
+			rest = NANDSIM_ENTRY_SIZE - str_index;
+			if (rest >= len) {
+				strcat(string, tmp);
+				str_index += len;
+			} else {
+				strlcat(string, tmp, NANDSIM_ENTRY_SIZE + 1);
+				alq_write(sc->alq, (void *) string,
+				    ALQ_NOWAIT);
+				strcpy(string, &tmp[rest]);
+				str_index = len - rest;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+}


Property changes on: trunk/sys/dev/nand/nandsim_log.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandsim_log.h
===================================================================
--- trunk/sys/dev/nand/nandsim_log.h	                        (rev 0)
+++ trunk/sys/dev/nand/nandsim_log.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,53 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nandsim_log.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _NANDSIM_LOG_H
+#define _NANDSIM_LOG_H
+
+#include <dev/nand/nandsim_chip.h>
+
+#define NANDSIM_ENTRY_SIZE	128
+#define NANDSIM_ENTRY_COUNT	1024
+#define NANDSIM_RAM_LOG_SIZE	16384
+#define TIME_STR_SIZE		40
+
+#define NANDSIM_LOG_ERR		1
+#define NANDSIM_LOG_SM		5
+#define NANDSIM_LOG_EV		10
+#define NANDSIM_LOG_DATA	15
+
+extern int nandsim_log_level;
+extern int nandsim_log_output;
+
+int nandsim_log_init(struct nandsim_softc *, char *);
+void nandsim_log_close(struct nandsim_softc *);
+void nandsim_log(struct nandsim_chip *, int, const char *, ...);
+
+#endif /*  _NANDSIM_LOG_H */
+


Property changes on: trunk/sys/dev/nand/nandsim_log.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandsim_swap.c
===================================================================
--- trunk/sys/dev/nand/nandsim_swap.c	                        (rev 0)
+++ trunk/sys/dev/nand/nandsim_swap.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,382 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nandsim_swap.c 241896 2012-10-22 17:50:54Z kib $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+#include <sys/fcntl.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/lock.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <dev/nand/nandsim_chip.h>
+#include <dev/nand/nandsim_swap.h>
+
+static int  init_block_state(struct chip_swap *);
+static void destroy_block_state(struct chip_swap *);
+
+static int  create_buffers(struct chip_swap *);
+static void destroy_buffers(struct chip_swap *);
+
+static int  swap_file_open(struct chip_swap *, const char *);
+static void swap_file_close(struct chip_swap *);
+static int  swap_file_write(struct chip_swap *, struct block_state *);
+static int  swap_file_read(struct chip_swap *, struct block_state *);
+
+#define	CHIP_SWAP_CMODE		0600
+#define	CHIP_SWAP_BLOCKSPACES	2
+
+static int
+init_block_state(struct chip_swap *swap)
+{
+	struct block_state *blk_state;
+	int i;
+
+	if (swap == NULL)
+		return (-1);
+
+	blk_state = malloc(swap->nof_blks * sizeof(struct block_state),
+	    M_NANDSIM, M_WAITOK | M_ZERO);
+
+	for (i = 0; i < swap->nof_blks; i++)
+		blk_state[i].offset = 0xffffffff;
+
+	swap->blk_state = blk_state;
+
+	return (0);
+}
+
+static void
+destroy_block_state(struct chip_swap *swap)
+{
+
+	if (swap == NULL)
+		return;
+
+	if (swap->blk_state != NULL)
+		free(swap->blk_state, M_NANDSIM);
+}
+
+static int
+create_buffers(struct chip_swap *swap)
+{
+	struct block_space *block_space;
+	void *block;
+	int i;
+
+	for (i = 0; i < CHIP_SWAP_BLOCKSPACES; i++) {
+		block_space = malloc(sizeof(*block_space), M_NANDSIM, M_WAITOK);
+		block = malloc(swap->blk_size, M_NANDSIM, M_WAITOK);
+		block_space->blk_ptr = block;
+		SLIST_INSERT_HEAD(&swap->free_bs, block_space, free_link);
+		nand_debug(NDBG_SIM,"created blk_space %p[%p]\n", block_space,
+		    block);
+	}
+
+	if (i == 0)
+		return (-1);
+
+	return (0);
+}
+
+static void
+destroy_buffers(struct chip_swap *swap)
+{
+	struct block_space *blk_space;
+
+	if (swap == NULL)
+		return;
+
+	blk_space = SLIST_FIRST(&swap->free_bs);
+	while (blk_space) {
+		SLIST_REMOVE_HEAD(&swap->free_bs, free_link);
+		nand_debug(NDBG_SIM,"destroyed blk_space %p[%p]\n",
+		    blk_space, blk_space->blk_ptr);
+		free(blk_space->blk_ptr, M_NANDSIM);
+		free(blk_space, M_NANDSIM);
+		blk_space = SLIST_FIRST(&swap->free_bs);
+	}
+
+	blk_space = STAILQ_FIRST(&swap->used_bs);
+	while (blk_space) {
+		STAILQ_REMOVE_HEAD(&swap->used_bs, used_link);
+		nand_debug(NDBG_SIM,"destroyed blk_space %p[%p]\n",
+		    blk_space, blk_space->blk_ptr);
+		free(blk_space->blk_ptr, M_NANDSIM);
+		free(blk_space, M_NANDSIM);
+		blk_space = STAILQ_FIRST(&swap->used_bs);
+	}
+}
+
+static int
+swap_file_open(struct chip_swap *swap, const char *swap_file)
+{
+	struct nameidata nd;
+	int flags, error;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, swap_file,
+	    curthread);
+
+	flags = FWRITE | FREAD | O_NOFOLLOW | O_CREAT | O_TRUNC;
+
+	error = vn_open(&nd, &flags, CHIP_SWAP_CMODE, NULL);
+	if (error) {
+		nand_debug(NDBG_SIM,"Cannot create swap file %s", swap_file);
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		return (error);
+	}
+
+	swap->swap_cred = crhold(curthread->td_ucred);
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+
+	/* We just unlock so we hold a reference */
+	VOP_UNLOCK(nd.ni_vp, 0);
+
+	swap->swap_vp = nd.ni_vp;
+
+	return (0);
+}
+
+static void
+swap_file_close(struct chip_swap *swap)
+{
+
+	if (swap == NULL)
+		return;
+
+	if (swap->swap_vp == NULL)
+		return;
+
+	vn_close(swap->swap_vp, FWRITE, swap->swap_cred, curthread);
+	crfree(swap->swap_cred);
+}
+
+static int
+swap_file_write(struct chip_swap *swap, struct block_state *blk_state)
+{
+	struct block_space *blk_space;
+	struct thread *td;
+	struct mount *mp;
+	struct vnode *vp;
+	struct uio auio;
+	struct iovec aiov;
+
+	if (swap == NULL || blk_state == NULL)
+		return (-1);
+
+	blk_space = blk_state->blk_sp;
+	if (blk_state->offset == -1) {
+		blk_state->offset = swap->swap_offset;
+		swap->swap_offset += swap->blk_size;
+	}
+
+	nand_debug(NDBG_SIM,"saving %p[%p] at %x\n",
+	    blk_space, blk_space->blk_ptr, blk_state->offset);
+
+	bzero(&aiov, sizeof(aiov));
+	bzero(&auio, sizeof(auio));
+
+	aiov.iov_base = blk_space->blk_ptr;
+	aiov.iov_len = swap->blk_size;
+	td = curthread;
+	vp = swap->swap_vp;
+
+	auio.uio_iov = &aiov;
+	auio.uio_offset = blk_state->offset;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = swap->blk_size;
+	auio.uio_td = td;
+
+	vn_start_write(vp, &mp, V_WAIT);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	VOP_WRITE(vp, &auio, IO_UNIT, swap->swap_cred);
+	VOP_UNLOCK(vp, 0);
+	vn_finished_write(mp);
+
+	return (0);
+}
+
+static int
+swap_file_read(struct chip_swap *swap, struct block_state *blk_state)
+{
+	struct block_space *blk_space;
+	struct thread *td;
+	struct vnode *vp;
+	struct uio auio;
+	struct iovec aiov;
+
+	if (swap == NULL || blk_state == NULL)
+		return (-1);
+
+	blk_space = blk_state->blk_sp;
+
+	nand_debug(NDBG_SIM,"restore %p[%p] at %x\n",
+	    blk_space, blk_space->blk_ptr, blk_state->offset);
+
+	bzero(&aiov, sizeof(aiov));
+	bzero(&auio, sizeof(auio));
+
+	aiov.iov_base = blk_space->blk_ptr;
+	aiov.iov_len = swap->blk_size;
+	td = curthread;
+	vp = swap->swap_vp;
+
+	auio.uio_iov = &aiov;
+	auio.uio_offset = blk_state->offset;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = swap->blk_size;
+	auio.uio_td = td;
+
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	VOP_READ(vp, &auio, 0, swap->swap_cred);
+	VOP_UNLOCK(vp, 0);
+
+	return (0);
+}
+
+struct chip_swap *
+nandsim_swap_init(const char *swap_file, uint32_t nof_blks, uint32_t blk_size)
+{
+	struct chip_swap *swap;
+	int err = 0;
+
+	if ((swap_file == NULL) || (nof_blks == 0) || (blk_size == 0))
+		return (NULL);
+
+	swap = malloc(sizeof(*swap), M_NANDSIM, M_WAITOK | M_ZERO);
+
+	SLIST_INIT(&swap->free_bs);
+	STAILQ_INIT(&swap->used_bs);
+	swap->blk_size = blk_size;
+	swap->nof_blks = nof_blks;
+
+	err = init_block_state(swap);
+	if (err) {
+		nandsim_swap_destroy(swap);
+		return (NULL);
+	}
+
+	err = create_buffers(swap);
+	if (err) {
+		nandsim_swap_destroy(swap);
+		return (NULL);
+	}
+
+	err = swap_file_open(swap, swap_file);
+	if (err) {
+		nandsim_swap_destroy(swap);
+		return (NULL);
+	}
+
+	return (swap);
+}
+
+void
+nandsim_swap_destroy(struct chip_swap *swap)
+{
+
+	if (swap == NULL)
+		return;
+
+	destroy_block_state(swap);
+	destroy_buffers(swap);
+	swap_file_close(swap);
+	free(swap, M_NANDSIM);
+}
+
+struct block_space *
+get_bs(struct chip_swap *swap, uint32_t block, uint8_t writing)
+{
+	struct block_state *blk_state, *old_blk_state = NULL;
+	struct block_space *blk_space;
+
+	if (swap == NULL || (block >= swap->nof_blks))
+		return (NULL);
+
+	blk_state = &swap->blk_state[block];
+	nand_debug(NDBG_SIM,"blk_state %x\n", blk_state->status);
+
+	if (blk_state->status & BLOCK_ALLOCATED) {
+		blk_space = blk_state->blk_sp;
+	} else {
+		blk_space = SLIST_FIRST(&swap->free_bs);
+		if (blk_space) {
+			SLIST_REMOVE_HEAD(&swap->free_bs, free_link);
+			STAILQ_INSERT_TAIL(&swap->used_bs, blk_space,
+			    used_link);
+		} else {
+			blk_space = STAILQ_FIRST(&swap->used_bs);
+			old_blk_state = blk_space->blk_state;
+			STAILQ_REMOVE_HEAD(&swap->used_bs, used_link);
+			STAILQ_INSERT_TAIL(&swap->used_bs, blk_space,
+			    used_link);
+			if (old_blk_state->status & BLOCK_DIRTY) {
+				swap_file_write(swap, old_blk_state);
+				old_blk_state->status &= ~BLOCK_DIRTY;
+				old_blk_state->status |= BLOCK_SWAPPED;
+			}
+		}
+	}
+
+	if (blk_space == NULL)
+		return (NULL);
+
+	if (old_blk_state != NULL) {
+		old_blk_state->status &= ~BLOCK_ALLOCATED;
+		old_blk_state->blk_sp = NULL;
+	}
+
+	blk_state->blk_sp = blk_space;
+	blk_space->blk_state = blk_state;
+
+	if (!(blk_state->status & BLOCK_ALLOCATED)) {
+		if (blk_state->status & BLOCK_SWAPPED)
+			swap_file_read(swap, blk_state);
+		else
+			memset(blk_space->blk_ptr, 0xff, swap->blk_size);
+		blk_state->status |= BLOCK_ALLOCATED;
+	}
+
+	if (writing)
+		blk_state->status |= BLOCK_DIRTY;
+
+	nand_debug(NDBG_SIM,"get_bs returned %p[%p] state %x\n", blk_space,
+	    blk_space->blk_ptr, blk_state->status);
+
+	return (blk_space);
+}


Property changes on: trunk/sys/dev/nand/nandsim_swap.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nandsim_swap.h
===================================================================
--- trunk/sys/dev/nand/nandsim_swap.h	                        (rev 0)
+++ trunk/sys/dev/nand/nandsim_swap.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,65 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nandsim_swap.h 235537 2012-05-17 10:11:18Z gber $
+ */
+
+#ifndef _NANDSIM_SWAP_CHIP_H_
+#define _NANDSIM_SWAP_CHIP_H_
+
+struct block_space {
+	SLIST_ENTRY(block_space)	free_link;
+	STAILQ_ENTRY(block_space)	used_link;
+	struct block_state		*blk_state;
+	uint8_t				*blk_ptr;
+};
+
+#define	BLOCK_ALLOCATED	0x1
+#define	BLOCK_SWAPPED	0x2
+#define	BLOCK_DIRTY	0x4
+
+struct block_state {
+	struct block_space	*blk_sp;
+	uint32_t		offset;
+	uint8_t			status;
+};
+
+struct chip_swap {
+	struct block_state		*blk_state;
+	SLIST_HEAD(,block_space)	free_bs;
+	STAILQ_HEAD(,block_space)	used_bs;
+	struct ucred			*swap_cred;
+	struct vnode			*swap_vp;
+	uint32_t			swap_offset;
+	uint32_t			blk_size;
+	uint32_t			nof_blks;
+};
+
+struct chip_swap *nandsim_swap_init(const char *, uint32_t, uint32_t);
+void nandsim_swap_destroy(struct chip_swap *);
+struct block_space *get_bs(struct chip_swap *, uint32_t, uint8_t);
+
+#endif /* _NANDSIM_SWAP_CHIP_H_ */


Property changes on: trunk/sys/dev/nand/nandsim_swap.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nfc_at91.c
===================================================================
--- trunk/sys/dev/nand/nfc_at91.c	                        (rev 0)
+++ trunk/sys/dev/nand/nfc_at91.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,294 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2013 Ian Lepore.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Atmel at91-family integrated NAND controller driver.
+ *
+ * This code relies on the board setup code (in at91/board_whatever.c) having
+ * set up the EBI and SMC registers appropriately for whatever type of nand part
+ * is on the board.
+ */
+
+#include "opt_platform.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nfc_at91.c 266217 2014-05-16 12:43:45Z ian $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+#include <sys/rman.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/time.h>
+
+#include <machine/bus.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandbus.h>
+#include "nfc_if.h"
+
+#include <dev/nand/nfc_at91.h>
+#include <arm/at91/at91_smc.h>
+
+#ifdef FDT
+#include <dev/fdt/fdt_common.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#endif
+
+/*
+ * Data cycles are triggered by access to any address within the EBI CS3 region
+ * that has A21 and A22 clear.  Command cycles are any access with bit A21
+ * asserted. Address cycles are any access with bit A22 asserted. Or vice versa.
+ * We get these parameters from the nand_param that the board is required to
+ * call at91_enable_nand, and enable the GPIO lines properly (that will be moved
+ * into at91_enable_nand when the great GPIO pin renumbering happens). We use
+ * ale (Address Latch Enable) and cle (Comand Latch Enable) to match the hardware
+ * names used in NAND.
+ */
+#define	AT91_NAND_DATA		0
+
+struct at91_nand_softc {
+	struct nand_softc	nand_sc;
+	struct resource		*res;
+	struct at91_nand_params *nand_param;
+};
+
+static struct at91_nand_params nand_param;
+
+static int	at91_nand_attach(device_t);
+static int	at91_nand_probe(device_t);
+static uint8_t	at91_nand_read_byte(device_t);
+static void	at91_nand_read_buf(device_t, void *, uint32_t);
+static int	at91_nand_read_rnb(device_t);
+static int	at91_nand_select_cs(device_t, uint8_t);
+static int	at91_nand_send_command(device_t, uint8_t);
+static int	at91_nand_send_address(device_t, uint8_t);
+static void	at91_nand_write_buf(device_t, void *, uint32_t);
+
+void
+at91_enable_nand(const struct at91_nand_params *np)
+{
+	nand_param = *np;
+}
+
+static inline u_int8_t
+dev_read_1(struct at91_nand_softc *sc, bus_size_t offset)
+{
+	return bus_read_1(sc->res, offset);
+}
+
+static inline void
+dev_write_1(struct at91_nand_softc *sc, bus_size_t offset, u_int8_t value)
+{
+	bus_write_1(sc->res, offset, value);
+}
+
+static int
+at91_nand_probe(device_t dev)
+{
+#ifdef FDT
+	if (!ofw_bus_is_compatible(dev, "atmel,at91rm9200-nand"))
+		return (ENXIO);
+#endif
+	device_set_desc(dev, "AT91 Integrated NAND controller");
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+at91_nand_attach(device_t dev)
+{
+	struct at91_nand_softc *sc;
+	int err, rid;
+
+	sc = device_get_softc(dev);
+	sc->nand_param = &nand_param;
+	if (sc->nand_param->width != 8 && sc->nand_param->width != 16) {
+		device_printf(dev, "Bad bus width (%d) defaulting to 8 bits\n",
+		    sc->nand_param->width);
+		sc->nand_param->width = 8;
+	}
+	at91_ebi_enable(sc->nand_param->cs);
+
+	rid = 0;
+	sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
+	    RF_ACTIVE);
+	if (sc->res == NULL) {
+		device_printf(dev, "could not allocate resources!\n");
+		return (ENXIO);
+	}
+
+	nand_init(&sc->nand_sc, dev, NAND_ECC_SOFT, 0, 0, NULL, NULL);
+
+	err = nandbus_create(dev);
+
+	return (err);
+}
+
+static int
+at91_nand_send_command(device_t dev, uint8_t command)
+{
+	struct at91_nand_softc *sc;
+
+        nand_debug(NDBG_DRV,"at91_nand_send_command: 0x%02x", command);
+
+	sc = device_get_softc(dev);
+	dev_write_1(sc, sc->nand_param->cle, command);
+	return (0);
+}
+
+static int
+at91_nand_send_address(device_t dev, uint8_t addr)
+{
+	struct at91_nand_softc *sc;
+
+        nand_debug(NDBG_DRV,"at91_nand_send_address: x%02x", addr);
+
+	sc = device_get_softc(dev);
+	dev_write_1(sc, sc->nand_param->ale, addr);
+	return (0);
+}
+
+static uint8_t
+at91_nand_read_byte(device_t dev)
+{
+	struct at91_nand_softc *sc;
+	uint8_t data;
+
+	sc = device_get_softc(dev);
+	data = dev_read_1(sc, AT91_NAND_DATA);
+
+        nand_debug(NDBG_DRV,"at91_nand_read_byte: 0x%02x", data);
+
+	return (data);
+}
+
+
+static void
+at91_nand_dump_buf(const char *op, void* buf, uint32_t len)
+{
+	int i;
+	uint8_t *b = buf;
+
+	printf("at91_nand_%s_buf (hex):", op);
+	for (i = 0; i < len; i++) {
+		if ((i & 0x01f) == 0)
+			printf("\n");
+		printf(" %02x", b[i]);
+	}
+	printf("\n");
+}
+
+static void
+at91_nand_read_buf(device_t dev, void* buf, uint32_t len)
+{
+	struct at91_nand_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	bus_read_multi_1(sc->res, AT91_NAND_DATA, buf, len);
+
+	if (nand_debug_flag & NDBG_DRV)
+		at91_nand_dump_buf("read", buf, len);
+}
+
+static void
+at91_nand_write_buf(device_t dev, void* buf, uint32_t len)
+{
+	struct at91_nand_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	if (nand_debug_flag & NDBG_DRV)
+		at91_nand_dump_buf("write", buf, len);
+
+	bus_write_multi_1(sc->res, AT91_NAND_DATA, buf, len);
+}
+
+static int
+at91_nand_select_cs(device_t dev, uint8_t cs)
+{
+
+	if (cs > 0)
+		return (ENODEV);
+
+	return (0);
+}
+
+static int
+at91_nand_read_rnb(device_t dev)
+{
+#if 0
+	/*
+         * XXX There's no way for this code to know which GPIO pin (if any) is
+         * attached to the chip's RNB line.  Not to worry, nothing calls this;
+         * at higher layers, all the nand code uses status commands.
+         */
+	uint32_t bits;
+
+	bits = at91_pio_gpio_get(AT91RM92_PIOD_BASE, AT91C_PIO_PD15);
+	nand_debug(NDBG_DRV,"at91_nand: read_rnb: %#x", bits);
+	return (bits != 0); /* ready */
+#endif	
+	panic("at91_nand_read_rnb() is not implemented\n");
+	return (0);
+}
+
+static device_method_t at91_nand_methods[] = {
+	DEVMETHOD(device_probe,		at91_nand_probe),
+	DEVMETHOD(device_attach,	at91_nand_attach),
+
+	DEVMETHOD(nfc_send_command,	at91_nand_send_command),
+	DEVMETHOD(nfc_send_address,	at91_nand_send_address),
+	DEVMETHOD(nfc_read_byte,	at91_nand_read_byte),
+	DEVMETHOD(nfc_read_buf,		at91_nand_read_buf),
+	DEVMETHOD(nfc_write_buf,	at91_nand_write_buf),
+	DEVMETHOD(nfc_select_cs,	at91_nand_select_cs),
+	DEVMETHOD(nfc_read_rnb,		at91_nand_read_rnb),
+
+	DEVMETHOD_END
+};
+
+static driver_t at91_nand_driver = {
+	"nand",
+	at91_nand_methods,
+	sizeof(struct at91_nand_softc),
+};
+
+static devclass_t at91_nand_devclass;
+
+#ifdef FDT
+DRIVER_MODULE(at91_nand, simplebus, at91_nand_driver, at91_nand_devclass, 0, 0);
+#else
+DRIVER_MODULE(at91_nand, atmelarm, at91_nand_driver, at91_nand_devclass, 0, 0);
+#endif


Property changes on: trunk/sys/dev/nand/nfc_at91.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nfc_at91.h
===================================================================
--- trunk/sys/dev/nand/nfc_at91.h	                        (rev 0)
+++ trunk/sys/dev/nand/nfc_at91.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,51 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2014 Warner Losh.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nfc_at91.h 266087 2014-05-14 20:31:54Z ian $
+ */
+
+/*
+ * Atmel at91-family integrated NAND controller driver.
+ *
+ * Interface to board setup code to set parameters.
+ */
+
+#ifndef	DEV_NAND_NFC_AT91_H
+#define	DEV_NAND_NFC_AT91_H 1
+
+struct at91_nand_params 
+{
+	uint32_t	ale;		/* Address for ALE (address) NAND cycles */
+	uint32_t	cle;		/* Address for CLE (command) NAND cycles */
+	uint32_t	width;		/* 8 or 16 bits (specify in bits) */
+	uint32_t	cs;		/* Chip Select NAND is connected to */
+	uint32_t	rnb_pin;	/* GPIO pin # for Read/notBusy */
+	uint32_t	nce_pin;	/* GPIO pin # for CE (active low) */
+};
+
+void at91_enable_nand(const struct at91_nand_params *);
+
+#endif /* DEV_NAND_NFC_AT91_H */


Property changes on: trunk/sys/dev/nand/nfc_at91.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nfc_fsl.c
===================================================================
--- trunk/sys/dev/nand/nfc_fsl.c	                        (rev 0)
+++ trunk/sys/dev/nand/nfc_fsl.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,717 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2012 Juniper Networks, Inc.
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * TODO :
+ *
+ *  -- test support for small pages
+ *  -- support for reading ONFI parameters
+ *  -- support for cached and interleaving commands
+ *  -- proper setting of AL bits in FMR
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nfc_fsl.c 238046 2012-07-03 01:00:29Z marcel $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+#include <sys/kdb.h>
+
+#include <machine/bus.h>
+#include <machine/fdt.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <powerpc/mpc85xx/lbc.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandbus.h>
+
+#include "nfc_fsl.h"
+
+#include "nfc_if.h"
+
+#define LBC_READ(regname)	lbc_read_reg(dev, (LBC85XX_ ## regname))
+#define LBC_WRITE(regname, val)	lbc_write_reg(dev, (LBC85XX_ ## regname), val)
+
+enum addr_type {
+	ADDR_NONE,
+	ADDR_ID,
+	ADDR_ROW,
+	ADDR_ROWCOL
+};
+
+struct fsl_nfc_fcm {
+	/* Read-only after initialization */
+	uint32_t	reg_fmr;
+
+	/* To be preserved across "start_command" */
+	u_int		buf_ofs;
+	u_int		read_ptr;
+	u_int		status:1;
+
+	/* Command state -- cleared by "start_command" */
+	uint32_t	fcm_startzero;
+	uint32_t	reg_fcr;
+	uint32_t	reg_fir;
+	uint32_t	reg_mdr;
+	uint32_t	reg_fbcr;
+	uint32_t	reg_fbar;
+	uint32_t	reg_fpar;
+	u_int		cmdnr;
+	u_int		opnr;
+	u_int		pg_ofs;
+	enum addr_type	addr_type;
+	u_int		addr_bytes;
+	u_int		row_addr;
+	u_int		column_addr;
+	u_int		data_fir:8;
+	uint32_t	fcm_endzero;
+};
+
+struct fsl_nand_softc {
+	struct nand_softc		nand_dev;
+	device_t			dev;
+	struct resource			*res;
+	int				rid;		/* Resourceid */
+	struct lbc_devinfo		*dinfo;
+	struct fsl_nfc_fcm		fcm;
+	uint8_t				col_cycles;
+	uint8_t				row_cycles;
+	uint16_t			pgsz;		/* Page size */
+};
+
+static int	fsl_nand_attach(device_t dev);
+static int	fsl_nand_probe(device_t dev);
+static int	fsl_nand_detach(device_t dev);
+
+static int	fsl_nfc_select_cs(device_t dev, uint8_t cs);
+static int	fsl_nfc_read_rnb(device_t dev);
+static int	fsl_nfc_send_command(device_t dev, uint8_t command);
+static int	fsl_nfc_send_address(device_t dev, uint8_t address);
+static uint8_t	fsl_nfc_read_byte(device_t dev);
+static int	fsl_nfc_start_command(device_t dev);
+static void	fsl_nfc_read_buf(device_t dev, void *buf, uint32_t len);
+static void	fsl_nfc_write_buf(device_t dev, void *buf, uint32_t len);
+
+static device_method_t fsl_nand_methods[] = {
+	DEVMETHOD(device_probe,		fsl_nand_probe),
+	DEVMETHOD(device_attach,	fsl_nand_attach),
+	DEVMETHOD(device_detach,	fsl_nand_detach),
+
+	DEVMETHOD(nfc_select_cs,	fsl_nfc_select_cs),
+	DEVMETHOD(nfc_read_rnb,		fsl_nfc_read_rnb),
+	DEVMETHOD(nfc_start_command,	fsl_nfc_start_command),
+	DEVMETHOD(nfc_send_command,	fsl_nfc_send_command),
+	DEVMETHOD(nfc_send_address,	fsl_nfc_send_address),
+	DEVMETHOD(nfc_read_byte,	fsl_nfc_read_byte),
+	DEVMETHOD(nfc_read_buf,		fsl_nfc_read_buf),
+	DEVMETHOD(nfc_write_buf,	fsl_nfc_write_buf),
+	{ 0, 0 },
+};
+
+static driver_t fsl_nand_driver = {
+	"nand",
+	fsl_nand_methods,
+	sizeof(struct fsl_nand_softc),
+};
+
+static devclass_t fsl_nand_devclass;
+
+DRIVER_MODULE(fsl_nand, lbc, fsl_nand_driver, fsl_nand_devclass,
+    0, 0);
+
+static int fsl_nand_build_address(device_t dev, uint32_t page, uint32_t column);
+static int fsl_nand_chip_preprobe(device_t dev, struct nand_id *id);
+
+#ifdef NAND_DEBUG_TIMING
+static device_t fcm_devs[8];
+#endif
+
+#define CMD_SHIFT(cmd_num)	(24 - ((cmd_num) * 8))
+#define OP_SHIFT(op_num)	(28 - ((op_num) * 4))
+
+#define FSL_LARGE_PAGE_SIZE	(2112)
+#define FSL_SMALL_PAGE_SIZE	(528)
+
+static void
+fsl_nand_init_regs(struct fsl_nand_softc *sc)
+{
+	uint32_t or_v, br_v;
+	device_t dev;
+
+	dev = sc->dev;
+
+	sc->fcm.reg_fmr = (15 << FMR_CWTO_SHIFT);
+
+	/*
+	 * Setup 4 row cycles and hope that chip ignores superfluous address
+	 * bytes.
+	 */
+	sc->fcm.reg_fmr |= (2 << FMR_AL_SHIFT);
+
+	/* Reprogram BR(x) */
+	br_v = lbc_read_reg(dev, LBC85XX_BR(sc->dinfo->di_bank));
+	br_v &= 0xffff8000;
+	br_v |= 1 << 11;	/* 8-bit port size */
+	br_v |= 0 << 9;		/* No ECC checking and generation */
+	br_v |= 1 << 5;		/* FCM machine */
+	br_v |= 1;		/* Valid */
+	lbc_write_reg(dev, LBC85XX_BR(sc->dinfo->di_bank), br_v);
+
+	/* Reprogram OR(x) */
+	or_v = lbc_read_reg(dev, LBC85XX_OR(sc->dinfo->di_bank));
+	or_v &= 0xfffffc00;
+	or_v |= 0x03AE;		/* Default POR timing */
+	lbc_write_reg(dev, LBC85XX_OR(sc->dinfo->di_bank), or_v);
+
+	if (or_v & OR_FCM_PAGESIZE) {
+		sc->pgsz = FSL_LARGE_PAGE_SIZE;
+		sc->col_cycles = 2;
+		nand_debug(NDBG_DRV, "%s: large page NAND device at #%d",
+		    device_get_nameunit(dev), sc->dinfo->di_bank);
+	} else {
+		sc->pgsz = FSL_SMALL_PAGE_SIZE;
+		sc->col_cycles = 1;
+		nand_debug(NDBG_DRV, "%s: small page NAND device at #%d",
+		    device_get_nameunit(dev), sc->dinfo->di_bank);
+	}
+}
+
+static int
+fsl_nand_probe(device_t dev)
+{
+
+	if (!ofw_bus_is_compatible(dev, "fsl,elbc-fcm-nand"))
+		return (ENXIO);
+
+	device_set_desc(dev, "Freescale localbus FCM Controller");
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+fsl_nand_attach(device_t dev)
+{
+	struct fsl_nand_softc *sc;
+	struct nand_id id;
+	struct nand_params *param;
+	uint32_t num_pages;
+
+	sc = device_get_softc(dev);
+	sc->dev = dev;
+	sc->dinfo = device_get_ivars(dev);
+
+	sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->rid,
+	    RF_ACTIVE);
+	if (sc->res == NULL) {
+		device_printf(dev, "could not allocate resources!\n");
+		return (ENXIO);
+	}
+
+	bzero(&sc->fcm, sizeof(sc->fcm));
+
+	/* Init register and check if HW ECC turned on */
+	fsl_nand_init_regs(sc);
+
+	/* Chip is probed, so determine number of row address cycles */
+	fsl_nand_chip_preprobe(dev, &id);
+	param = nand_get_params(&id);
+	if (param != NULL) {
+		num_pages = (param->chip_size << 20) / param->page_size;
+		while(num_pages) {
+			sc->row_cycles++;
+			num_pages >>= 8;
+		}
+
+		sc->fcm.reg_fmr &= ~(FMR_AL);
+		sc->fcm.reg_fmr |= (sc->row_cycles - 2) << FMR_AL_SHIFT;
+	}
+
+	nand_init(&sc->nand_dev, dev, NAND_ECC_SOFT, 0, 0, NULL, NULL);
+
+#ifdef NAND_DEBUG_TIMING
+	fcm_devs[sc->dinfo->di_bank] = dev;
+#endif
+
+	return (nandbus_create(dev));
+}
+
+static int
+fsl_nand_detach(device_t dev)
+{
+	struct fsl_nand_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	if (sc->res != NULL)
+		bus_release_resource(dev, SYS_RES_MEMORY, sc->rid, sc->res);
+
+	return (0);
+}
+
+static int
+fsl_nfc_select_cs(device_t dev, uint8_t cs)
+{
+
+	// device_printf(dev, "%s(cs=%u)\n", __func__, cs);
+	return ((cs > 0) ? EINVAL : 0);
+}
+
+static int
+fsl_nfc_read_rnb(device_t dev)
+{
+
+	// device_printf(dev, "%s()\n", __func__);
+	return (0);
+}
+
+static int
+fsl_nfc_send_command(device_t dev, uint8_t command)
+{
+	struct fsl_nand_softc *sc;
+	struct fsl_nfc_fcm *fcm;
+	uint8_t	fir_op;
+
+	// device_printf(dev, "%s(command=%u)\n", __func__, command);
+
+	sc = device_get_softc(dev);
+	fcm = &sc->fcm;
+
+	if (command == NAND_CMD_PROG_END) {
+		fcm->reg_fir |= (FIR_OP_WB << OP_SHIFT(fcm->opnr));
+		fcm->opnr++;
+	}
+	fcm->reg_fcr |= command << CMD_SHIFT(fcm->cmdnr);
+	fir_op = (fcm->cmdnr == 0) ? FIR_OP_CW0 : FIR_OP_CM(fcm->cmdnr);
+	fcm->cmdnr++;
+
+	fcm->reg_fir |= (fir_op << OP_SHIFT(fcm->opnr));
+	fcm->opnr++;
+
+	switch (command) {
+	case NAND_CMD_READ_ID:
+		fcm->data_fir = FIR_OP_RBW;
+		fcm->addr_type = ADDR_ID;
+		break;
+	case NAND_CMD_SMALLOOB:
+		fcm->pg_ofs += 256;
+		/*FALLTHROUGH*/
+	case NAND_CMD_SMALLB:
+		fcm->pg_ofs += 256;
+		/*FALLTHROUGH*/
+	case NAND_CMD_READ: /* NAND_CMD_SMALLA */
+		fcm->data_fir = FIR_OP_RBW;
+		fcm->addr_type = ADDR_ROWCOL;
+		break;
+	case NAND_CMD_STATUS:
+		fcm->data_fir = FIR_OP_RS;
+		fcm->status = 1;
+		break;
+	case NAND_CMD_ERASE:
+		fcm->addr_type = ADDR_ROW;
+		break;
+	case NAND_CMD_PROG:
+		fcm->addr_type = ADDR_ROWCOL;
+		break;
+	}
+	return (0);
+}
+
+static int
+fsl_nfc_send_address(device_t dev, uint8_t addr)
+{
+	struct fsl_nand_softc *sc;
+	struct fsl_nfc_fcm *fcm;
+	uint32_t addr_bits;
+
+	// device_printf(dev, "%s(address=%u)\n", __func__, addr);
+
+	sc = device_get_softc(dev);
+	fcm = &sc->fcm;
+
+	KASSERT(fcm->addr_type != ADDR_NONE,
+	    ("controller doesn't expect address cycle"));
+
+	addr_bits = addr;
+
+	if (fcm->addr_type == ADDR_ID) {
+		fcm->reg_fir |= (FIR_OP_UA << OP_SHIFT(fcm->opnr));
+		fcm->opnr++;
+
+		fcm->reg_fbcr = 5;
+		fcm->reg_fbar = 0;
+		fcm->reg_fpar = 0;
+		fcm->reg_mdr = addr_bits;
+		fcm->buf_ofs = 0;
+		fcm->read_ptr = 0;
+		return (0);
+	}
+
+	if (fcm->addr_type == ADDR_ROW) {
+		addr_bits <<= fcm->addr_bytes * 8;
+		fcm->row_addr |= addr_bits;
+		fcm->addr_bytes++;
+		if (fcm->addr_bytes < sc->row_cycles)
+			return (0);
+	} else {
+		if (fcm->addr_bytes < sc->col_cycles) {
+			addr_bits <<= fcm->addr_bytes * 8;
+			fcm->column_addr |= addr_bits;
+		} else {
+			addr_bits <<= (fcm->addr_bytes - sc->col_cycles) * 8;
+			fcm->row_addr |= addr_bits;
+		}
+		fcm->addr_bytes++;
+		if (fcm->addr_bytes < (sc->row_cycles + sc->col_cycles))
+			return (0);
+	}
+
+	return (fsl_nand_build_address(dev, fcm->row_addr, fcm->column_addr));
+}
+
+static int
+fsl_nand_build_address(device_t dev, uint32_t row, uint32_t column)
+{
+	struct fsl_nand_softc *sc;
+	struct fsl_nfc_fcm *fcm;
+	uint32_t byte_count = 0;
+	uint32_t block_address = 0;
+	uint32_t page_address = 0;
+
+	sc = device_get_softc(dev);
+	fcm = &sc->fcm;
+
+	fcm->read_ptr = 0;
+	fcm->buf_ofs = 0;
+
+	if (fcm->addr_type == ADDR_ROWCOL) {
+		fcm->reg_fir |= (FIR_OP_CA << OP_SHIFT(fcm->opnr));
+		fcm->opnr++;
+
+		column += fcm->pg_ofs;
+		fcm->pg_ofs = 0;
+
+		page_address |= column;
+
+		if (column != 0) {
+			byte_count = sc->pgsz - column;
+			fcm->read_ptr = column;
+		}
+	}
+
+	fcm->reg_fir |= (FIR_OP_PA << OP_SHIFT(fcm->opnr));
+	fcm->opnr++;
+
+	if (sc->pgsz == FSL_LARGE_PAGE_SIZE) {
+		block_address = row >> 6;
+		page_address |= ((row << FPAR_LP_PI_SHIFT) & FPAR_LP_PI);
+		fcm->buf_ofs = (row & 1) * 4096;
+	} else {
+		block_address = row >> 5;
+		page_address |= ((row << FPAR_SP_PI_SHIFT) & FPAR_SP_PI);
+		fcm->buf_ofs = (row & 7) * 1024;
+	}
+
+	fcm->reg_fbcr = byte_count;
+	fcm->reg_fbar = block_address;
+	fcm->reg_fpar = page_address;
+	return (0);
+}
+
+static int
+fsl_nfc_start_command(device_t dev)
+{
+	struct fsl_nand_softc *sc;
+	struct fsl_nfc_fcm *fcm;
+	uint32_t fmr, ltesr_v;
+	int error, timeout;
+
+	// device_printf(dev, "%s()\n", __func__);
+
+	sc = device_get_softc(dev);
+	fcm = &sc->fcm;
+
+	fmr = fcm->reg_fmr | FMR_OP;
+
+	if (fcm->data_fir)
+		fcm->reg_fir |= (fcm->data_fir << OP_SHIFT(fcm->opnr));
+
+	LBC_WRITE(FIR, fcm->reg_fir);
+	LBC_WRITE(FCR, fcm->reg_fcr);
+
+	LBC_WRITE(FMR, fmr);
+
+	LBC_WRITE(FBCR, fcm->reg_fbcr);
+	LBC_WRITE(FBAR, fcm->reg_fbar);
+	LBC_WRITE(FPAR, fcm->reg_fpar);
+
+	if (fcm->addr_type == ADDR_ID)
+		LBC_WRITE(MDR, fcm->reg_mdr);
+
+	nand_debug(NDBG_DRV, "BEFORE:\nFMR=%#x, FIR=%#x, FCR=%#x", fmr,
+	    fcm->reg_fir, fcm->reg_fcr);
+	nand_debug(NDBG_DRV, "MDR=%#x, FBAR=%#x, FPAR=%#x, FBCR=%#x",
+	    LBC_READ(MDR), fcm->reg_fbar, fcm->reg_fpar, fcm->reg_fbcr);
+
+	LBC_WRITE(LSOR, sc->dinfo->di_bank);
+
+	timeout = (cold) ? FSL_FCM_WAIT_TIMEOUT : ~0;
+	error = 0;
+	ltesr_v = LBC_READ(LTESR);
+	while (!error && (ltesr_v & LTESR_CC) == 0) {
+		if (cold) {
+			DELAY(1000);
+			timeout--;
+			if (timeout < 0)
+				error = EWOULDBLOCK;
+		} else
+			error = tsleep(device_get_parent(sc->dev), PRIBIO,
+			    "nfcfsl", hz);
+		ltesr_v = LBC_READ(LTESR);
+	}
+	if (error)
+		nand_debug(NDBG_DRV, "Command complete wait timeout\n");
+
+	nand_debug(NDBG_DRV, "AFTER:\nLTESR=%#x, LTEDR=%#x, LTEIR=%#x,"
+	    " LTEATR=%#x, LTEAR=%#x, LTECCR=%#x", ltesr_v,
+	    LBC_READ(LTEDR), LBC_READ(LTEIR), LBC_READ(LTEATR),
+	    LBC_READ(LTEAR), LBC_READ(LTECCR));
+
+	bzero(&fcm->fcm_startzero,
+	    __rangeof(struct fsl_nfc_fcm, fcm_startzero, fcm_endzero));
+
+	if (fcm->status)
+		sc->fcm.reg_mdr = LBC_READ(MDR);
+
+	/* Even if timeout occured, we should perform steps below */
+	LBC_WRITE(LTESR, ltesr_v);
+	LBC_WRITE(LTEATR, 0);
+
+	return (error);
+}
+
+static uint8_t
+fsl_nfc_read_byte(device_t dev)
+{
+	struct fsl_nand_softc *sc = device_get_softc(dev);
+	uint32_t offset;
+
+	// device_printf(dev, "%s()\n", __func__);
+
+	/*
+	 * LBC controller allows us to read status into a MDR instead of FCM
+	 * buffer. If last operation requested before read_byte() was STATUS,
+	 * then return MDR instead of reading a single byte from a buffer.
+	 */
+	if (sc->fcm.status) {
+		sc->fcm.status = 0;
+		return (sc->fcm.reg_mdr);
+	}
+
+	KASSERT(sc->fcm.read_ptr < sc->pgsz,
+	    ("Attempt to read beyond buffer %x %x", sc->fcm.read_ptr,
+	    sc->pgsz));
+
+	offset = sc->fcm.buf_ofs + sc->fcm.read_ptr;
+	sc->fcm.read_ptr++;
+	return (bus_read_1(sc->res, offset));
+}
+
+static void
+fsl_nfc_read_buf(device_t dev, void *buf, uint32_t len)
+{
+	struct fsl_nand_softc *sc = device_get_softc(dev);
+	uint32_t offset;
+	int bytesleft = 0;
+
+	// device_printf(dev, "%s(buf=%p, len=%u)\n", __func__, buf, len);
+
+	nand_debug(NDBG_DRV, "REQUEST OF 0x%0x B (BIB=0x%0x, NTR=0x%0x)",
+	    len, sc->pgsz, sc->fcm.read_ptr);
+
+	bytesleft = MIN((unsigned int)len, sc->pgsz - sc->fcm.read_ptr);
+
+	offset = sc->fcm.buf_ofs + sc->fcm.read_ptr;
+	bus_read_region_1(sc->res, offset, buf, bytesleft);
+	sc->fcm.read_ptr += bytesleft;
+}
+
+static void
+fsl_nfc_write_buf(device_t dev, void *buf, uint32_t len)
+{
+	struct fsl_nand_softc *sc = device_get_softc(dev);
+	uint32_t offset;
+	int bytesleft = 0;
+
+	// device_printf(dev, "%s(buf=%p, len=%u)\n", __func__, buf, len);
+
+	KASSERT(len <= sc->pgsz - sc->fcm.read_ptr,
+	    ("Attempt to write beyond buffer"));
+
+	bytesleft = MIN((unsigned int)len, sc->pgsz - sc->fcm.read_ptr);
+
+	nand_debug(NDBG_DRV, "REQUEST TO WRITE 0x%0x (BIB=0x%0x, NTR=0x%0x)",
+	    bytesleft, sc->pgsz, sc->fcm.read_ptr);
+
+	offset = sc->fcm.buf_ofs + sc->fcm.read_ptr;
+	bus_write_region_1(sc->res, offset, buf, bytesleft);
+	sc->fcm.read_ptr += bytesleft;
+}
+
+static int
+fsl_nand_chip_preprobe(device_t dev, struct nand_id *id)
+{
+
+	if (fsl_nfc_send_command(dev, NAND_CMD_RESET) != 0)
+		return (ENXIO);
+
+	if (fsl_nfc_start_command(dev) != 0)
+		return (ENXIO);
+
+	DELAY(1000);
+
+	if (fsl_nfc_send_command(dev, NAND_CMD_READ_ID))
+		return (ENXIO);
+
+	if (fsl_nfc_send_address(dev, 0))
+		return (ENXIO);
+
+	if (fsl_nfc_start_command(dev) != 0)
+		return (ENXIO);
+
+	DELAY(25);
+
+	id->man_id = fsl_nfc_read_byte(dev);
+	id->dev_id = fsl_nfc_read_byte(dev);
+
+	nand_debug(NDBG_DRV, "manufacturer id: %x chip id: %x",
+	    id->man_id, id->dev_id);
+
+	return (0);
+}
+
+#ifdef NAND_DEBUG_TIMING
+
+static SYSCTL_NODE(_debug, OID_AUTO, fcm, CTLFLAG_RD, 0, "FCM timing");
+
+static u_int csct = 1;	/* 22:    Chip select to command time (trlx). */
+SYSCTL_UINT(_debug_fcm, OID_AUTO, csct, CTLFLAG_RW, &csct, 1,
+    "Chip select to command time: determines how far in advance -LCSn is "
+    "asserted prior to any bus activity during a NAND Flash access handled "
+    "by the FCM. This helps meet chip-select setup times for slow memories.");
+
+static u_int cst = 1;	/* 23:    Command setup time (trlx). */
+SYSCTL_UINT(_debug_fcm, OID_AUTO, cst, CTLFLAG_RW, &cst, 1,
+    "Command setup time: determines the delay of -LFWE assertion relative to "
+    "the command, address, or data change when the external memory access "
+    "is handled by the FCM.");
+
+static u_int cht = 1;	/* 24:    Command hold time (trlx). */
+SYSCTL_UINT(_debug_fcm, OID_AUTO, cht, CTLFLAG_RW, &cht, 1,
+    "Command hold time: determines the -LFWE negation prior to the command, "
+    "address, or data change when the external memory access is handled by "
+    "the FCM.");
+
+static u_int scy = 2;	/* 25-27: Cycle length in bus clocks */
+SYSCTL_UINT(_debug_fcm, OID_AUTO, scy, CTLFLAG_RW, &scy, 2,
+    "Cycle length in bus clocks: see RM");
+
+static u_int rst = 1;	/* 28:    Read setup time (trlx). */
+SYSCTL_UINT(_debug_fcm, OID_AUTO, rst, CTLFLAG_RW, &rst, 1,
+    "Read setup time: determines the delay of -LFRE assertion relative to "
+    "sampling of read data when the external memory access is handled by "
+    "the FCM.");
+
+static u_int trlx = 1;	/* 29:    Timing relaxed. */
+SYSCTL_UINT(_debug_fcm, OID_AUTO, trlx, CTLFLAG_RW, &trlx, 1,
+    "Timing relaxed: modifies the settings of timing parameters for slow "
+    "memories. See RM");
+
+static u_int ehtr = 1;	/* 30:    Extended hold time on read accesses. */
+SYSCTL_UINT(_debug_fcm, OID_AUTO, ehtr, CTLFLAG_RW, &ehtr, 1,
+    "Extended hold time on read accesses: indicates with TRLX how many "
+    "cycles are inserted between a read access from the current bank and "
+    "the next access.");
+
+static u_int
+fsl_nand_get_timing(void)
+{
+	u_int timing;
+
+	timing = ((csct & 1) << 9) | ((cst & 1) << 8) | ((cht & 1) << 7) |
+	    ((scy & 7) << 4) | ((rst & 1) << 3) | ((trlx & 1) << 2) |
+	    ((ehtr & 1) << 1);
+
+	printf("nfc_fsl: timing = %u\n", timing);
+	return (timing);
+}
+
+static int
+fsl_sysctl_program(SYSCTL_HANDLER_ARGS)
+{
+	struct fsl_nand_softc *sc;
+	int error, i;
+	device_t dev;
+	uint32_t or_v;
+
+	error = sysctl_wire_old_buffer(req, sizeof(int));
+	if (error == 0) {
+		i = 0;
+		error = sysctl_handle_int(oidp, &i, 0, req);
+	}
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	for (i = 0; i < 8; i++) {
+		dev = fcm_devs[i];
+		if (dev == NULL)
+			continue;
+		sc = device_get_softc(dev);
+
+		/* Reprogram OR(x) */
+		or_v = lbc_read_reg(dev, LBC85XX_OR(sc->dinfo->di_bank));
+		or_v &= 0xfffffc00;
+		or_v |= fsl_nand_get_timing();
+		lbc_write_reg(dev, LBC85XX_OR(sc->dinfo->di_bank), or_v);
+	}
+	return (0);
+}
+
+SYSCTL_PROC(_debug_fcm, OID_AUTO, program, CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+    fsl_sysctl_program, "I", "write to program FCM with current values");
+
+#endif /* NAND_DEBUG_TIMING */


Property changes on: trunk/sys/dev/nand/nfc_fsl.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nfc_fsl.h
===================================================================
--- trunk/sys/dev/nand/nfc_fsl.h	                        (rev 0)
+++ trunk/sys/dev/nand/nfc_fsl.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,98 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2012 Juniper Networks, Inc.
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/nand/nfc_fsl.h 238046 2012-07-03 01:00:29Z marcel $
+ */
+
+#ifndef _NAND_NFC_FSL_H_
+#define	_NAND_NFC_FSL_H_
+
+/* LBC BR/OR Registers layout definitions */
+#define BR_V		0x00000001
+#define BR_V_SHIFT	0
+#define BR_MSEL		0x000000E0
+#define BR_MSEL_SHIFT	5
+#define BR_DECC_CHECK_MODE	0x00000600
+#define BR_DECC_CHECK_GEN	0x00000400
+
+#define OR_FCM_PAGESIZE		0x00000400
+
+/* Options definitions */
+#define NAND_OPT_ECC_MODE_HW	1
+#define NAND_OPT_ECC_MODE_SOFT	(1 << 1)
+
+/* FMR - Flash Mode Register */
+#define FMR_CWTO	0xF000
+#define FMR_CWTO_SHIFT	12
+#define FMR_BOOT	0x0800
+#define FMR_ECCM	0x0100
+#define FMR_AL		0x0030
+#define FMR_AL_SHIFT	4
+#define FMR_OP		0x0003
+#define FMR_OP_SHIFT	0
+
+#define FIR_OP_NOP	0x0 /* No operation and end of sequence */
+#define FIR_OP_CA	0x1 /* Issue current column address */
+#define FIR_OP_PA	0x2 /* Issue current block+page address */
+#define FIR_OP_UA	0x3 /* Issue user defined address */
+#define	FIR_OP_CM(x)	(4 + (x))	/* Issue command from FCR[CMD(x)] */
+#define FIR_OP_WB	0x8 /* Write FBCR bytes from FCM buffer */
+#define FIR_OP_WS	0x9 /* Write 1 or 2 bytes from MDR[AS] */
+#define FIR_OP_RB	0xA /* Read FBCR bytes to FCM buffer */
+#define FIR_OP_RS	0xB /* Read 1 or 2 bytes to MDR[AS] */
+#define FIR_OP_CW0	0xC /* Wait then issue FCR[CMD0] */
+#define FIR_OP_CW1	0xD /* Wait then issue FCR[CMD1] */
+#define FIR_OP_RBW	0xE /* Wait then read FBCR bytes */
+#define FIR_OP_RSW	0xF /* Wait then read 1 or 2 bytes */
+
+/* LTESR - Transfer Error Status Register */
+#define LTESR_BM	0x80000000
+#define LTESR_FCT	0x40000000
+#define LTESR_PAR	0x20000000
+#define LTESR_WP	0x04000000
+#define LTESR_ATMW	0x00800000
+#define LTESR_ATMR	0x00400000
+#define LTESR_CS	0x00080000
+#define LTESR_CC	0x00000001
+
+#define LTESR_NAND_MASK	(LTESR_FCT | LTESR_CC | LTESR_CS)
+
+/* FPAR - Flash Page Address Register */
+#define FPAR_SP_PI		0x00007C00
+#define FPAR_SP_PI_SHIFT	10
+#define FPAR_SP_MS		0x00000200
+#define FPAR_SP_CI		0x000001FF
+#define FPAR_SP_CI_SHIFT	0
+#define FPAR_LP_PI		0x0003F000
+#define FPAR_LP_PI_SHIFT	12
+#define FPAR_LP_MS		0x00000800
+#define FPAR_LP_CI		0x000007FF
+#define FPAR_LP_CI_SHIFT	0
+
+#define FSL_FCM_WAIT_TIMEOUT	10
+
+#endif /* _NAND_NFC_FSL_H_ */


Property changes on: trunk/sys/dev/nand/nfc_fsl.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/nand/nfc_if.m
===================================================================
--- trunk/sys/dev/nand/nfc_if.m	                        (rev 0)
+++ trunk/sys/dev/nand/nfc_if.m	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,166 @@
+/* $MidnightBSD$ */
+#-
+# Copyright (C) 2009-2012 Semihalf
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD: stable/10/sys/dev/nand/nfc_if.m 235537 2012-05-17 10:11:18Z gber $
+
+# NAND controller interface description
+#
+
+#include <sys/bus.h>
+#include <dev/nand/nand.h>
+
+INTERFACE nfc;
+
+CODE {
+	static int nfc_default_method(device_t dev)
+	{
+		return (0);
+	}
+
+	static int nfc_softecc_get(device_t dev, void *buf, int pagesize, 
+	    void *ecc, int *needwrite)
+	{
+		*needwrite = 1;
+		return (nand_softecc_get(dev, buf, pagesize, ecc));
+	}
+
+	static int nfc_softecc_correct(device_t dev, void *buf, int pagesize,
+	    void *readecc, void *calcecc)
+	{
+		return (nand_softecc_correct(dev, buf, pagesize, readecc,
+		    calcecc));
+	}
+};
+
+# Send command to a NAND chip
+#
+# Return values:
+# 0: Success
+#
+METHOD int send_command {
+	device_t dev;
+	uint8_t command;
+};
+
+# Send address to a NAND chip
+#
+# Return values:
+# 0: Success
+#
+METHOD int send_address {
+	device_t dev;
+	uint8_t address;
+};
+
+# Read byte
+#
+# Return values:
+# byte read
+#
+METHOD uint8_t read_byte {
+	device_t dev;
+};
+
+# Write byte
+#
+METHOD void write_byte {
+	device_t dev;
+	uint8_t byte;
+};
+
+# Read word
+#
+# Return values:
+# word read
+#
+METHOD uint16_t read_word {
+	device_t dev;
+};
+
+# Write word
+#
+METHOD void write_word {
+	device_t dev;
+	uint16_t word;
+};
+
+# Read buf
+#
+METHOD void read_buf {
+	device_t dev;
+	void *buf;
+	uint32_t len;
+};
+
+# Write buf
+#
+METHOD void write_buf {
+	device_t dev;
+	void *buf;
+	uint32_t len;
+};
+
+# Select CS
+#
+METHOD int select_cs {
+	device_t dev;
+	uint8_t cs;
+};
+
+# Read ready/busy signal
+#
+METHOD int read_rnb {
+	device_t dev;
+};
+
+# Start command
+#
+# Return values:
+# 0: Success
+#
+METHOD int start_command {
+	device_t dev;
+} DEFAULT nfc_default_method;
+
+# Generate ECC or get it from H/W
+#
+METHOD int get_ecc {
+	device_t dev;
+	void *buf;
+	int pagesize;
+	void *ecc;
+	int *needwrite;
+} DEFAULT nfc_softecc_get;
+
+# Correct ECC
+#
+METHOD int correct_ecc {
+	device_t dev;
+	void *buf;
+	int pagesize;
+	void *readecc;
+	void *calcecc;
+} DEFAULT nfc_softecc_correct;


Property changes on: trunk/sys/dev/nand/nfc_if.m
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: trunk/sys/dev/nand/nfc_mv.c
===================================================================
--- trunk/sys/dev/nand/nfc_mv.c	                        (rev 0)
+++ trunk/sys/dev/nand/nfc_mv.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,237 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2009-2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Integrated NAND controller driver */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/nand/nfc_mv.c 235537 2012-05-17 10:11:18Z gber $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+#include <sys/rman.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/time.h>
+
+#include <machine/bus.h>
+#include <machine/fdt.h>
+#include <arm/mv/mvvar.h>
+#include <arm/mv/mvwin.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <dev/nand/nand.h>
+#include <dev/nand/nandbus.h>
+#include "nfc_if.h"
+
+#define MV_NAND_DATA	(0x00)
+#define MV_NAND_COMMAND	(0x01)
+#define MV_NAND_ADDRESS	(0x02)
+
+struct mv_nand_softc {
+	struct nand_softc 	nand_dev;
+	bus_space_handle_t 	sc_handle;
+	bus_space_tag_t		sc_tag;
+	struct resource		*res;
+	int			rid;
+};
+
+static int	mv_nand_attach(device_t);
+static int	mv_nand_probe(device_t);
+static int	mv_nand_send_command(device_t, uint8_t);
+static int	mv_nand_send_address(device_t, uint8_t);
+static uint8_t	mv_nand_read_byte(device_t);
+static void	mv_nand_read_buf(device_t, void *, uint32_t);
+static void	mv_nand_write_buf(device_t, void *, uint32_t);
+static int	mv_nand_select_cs(device_t, uint8_t);
+static int	mv_nand_read_rnb(device_t);
+
+static device_method_t mv_nand_methods[] = {
+	DEVMETHOD(device_probe,		mv_nand_probe),
+	DEVMETHOD(device_attach,	mv_nand_attach),
+
+	DEVMETHOD(nfc_send_command,	mv_nand_send_command),
+	DEVMETHOD(nfc_send_address,	mv_nand_send_address),
+	DEVMETHOD(nfc_read_byte,	mv_nand_read_byte),
+	DEVMETHOD(nfc_read_buf,		mv_nand_read_buf),
+	DEVMETHOD(nfc_write_buf,	mv_nand_write_buf),
+	DEVMETHOD(nfc_select_cs,	mv_nand_select_cs),
+	DEVMETHOD(nfc_read_rnb,		mv_nand_read_rnb),
+
+	{ 0, 0 },
+};
+
+static driver_t mv_nand_driver = {
+	"nand",
+	mv_nand_methods,
+	sizeof(struct mv_nand_softc),
+};
+
+static devclass_t mv_nand_devclass;
+DRIVER_MODULE(mv_nand, localbus, mv_nand_driver, mv_nand_devclass, 0, 0);
+
+static int
+mv_nand_probe(device_t dev)
+{
+
+	if (!ofw_bus_is_compatible(dev, "mrvl,nfc"))
+		return (ENXIO);
+
+	device_set_desc(dev, "Marvell NAND controller");
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+mv_nand_attach(device_t dev)
+{
+	struct mv_nand_softc *sc;
+	int err;
+
+	sc = device_get_softc(dev);
+	sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->rid,
+	    RF_ACTIVE);
+	if (sc->res == NULL) {
+		device_printf(dev, "could not allocate resources!\n");
+		return (ENXIO);
+	}
+
+	sc->sc_tag = rman_get_bustag(sc->res);
+	sc->sc_handle = rman_get_bushandle(sc->res);
+
+	nand_init(&sc->nand_dev, dev, NAND_ECC_SOFT, 0, 0, NULL, NULL);
+
+	err = nandbus_create(dev);
+
+	return (err);
+}
+
+static int
+mv_nand_send_command(device_t dev, uint8_t command)
+{
+	struct mv_nand_softc *sc;
+
+	nand_debug(NDBG_DRV,"mv_nand: send command %x", command);
+
+	sc = device_get_softc(dev);
+	bus_space_write_1(sc->sc_tag, sc->sc_handle, MV_NAND_COMMAND, command);
+	return (0);
+}
+
+static int
+mv_nand_send_address(device_t dev, uint8_t addr)
+{
+	struct mv_nand_softc *sc;
+
+	nand_debug(NDBG_DRV,"mv_nand: send address %x", addr);
+
+	sc = device_get_softc(dev);
+	bus_space_write_1(sc->sc_tag, sc->sc_handle, MV_NAND_ADDRESS, addr);
+	return (0);
+}
+
+static uint8_t
+mv_nand_read_byte(device_t dev)
+{
+	struct mv_nand_softc *sc;
+	uint8_t data;
+
+	sc = device_get_softc(dev);
+	data = bus_space_read_1(sc->sc_tag, sc->sc_handle, MV_NAND_DATA);
+
+	nand_debug(NDBG_DRV,"mv_nand: read %x", data);
+
+	return (data);
+}
+
+static void
+mv_nand_read_buf(device_t dev, void* buf, uint32_t len)
+{
+	struct mv_nand_softc *sc;
+	int i;
+	uint8_t *b = (uint8_t*)buf;
+
+	sc = device_get_softc(dev);
+
+	for (i = 0; i < len; i++) {
+		b[i] = bus_space_read_1(sc->sc_tag, sc->sc_handle,
+		    MV_NAND_DATA);
+#ifdef NAND_DEBUG
+		if (!(i % 16))
+			printf("%s", i == 0 ? "mv_nand:\n" : "\n");
+		printf(" %x", b[i]);
+		if (i == len - 1)
+			printf("\n");
+#endif
+	}
+}
+
+static void
+mv_nand_write_buf(device_t dev, void* buf, uint32_t len)
+{
+	struct mv_nand_softc *sc;
+	int i;
+	uint8_t *b = (uint8_t*)buf;
+
+	sc = device_get_softc(dev);
+
+	for (i = 0; i < len; i++) {
+#ifdef NAND_DEBUG
+		if (!(i % 16))
+			printf("%s", i == 0 ? "mv_nand:\n" : "\n");
+		printf(" %x", b[i]);
+		if (i == len - 1)
+			printf("\n");
+#endif
+		bus_space_write_1(sc->sc_tag, sc->sc_handle, MV_NAND_DATA,
+		    b[i]);
+	}
+}
+
+static int
+mv_nand_select_cs(device_t dev, uint8_t cs)
+{
+
+	if (cs > 0)
+		return (ENODEV);
+
+	return (0);
+}
+
+static int
+mv_nand_read_rnb(device_t dev)
+{
+
+	/* no-op */
+	return (0); /* ready */
+}


Property changes on: trunk/sys/dev/nand/nfc_mv.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/netmap/if_em_netmap.h
===================================================================
--- trunk/sys/dev/netmap/if_em_netmap.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/netmap/if_em_netmap.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -24,10 +25,9 @@
  */
 
 /*
- * $MidnightBSD$
- * $Id: if_em_netmap.h,v 1.2 2013-01-08 03:53:24 laffer1 Exp $
+ * $FreeBSD: stable/10/sys/dev/netmap/if_em_netmap.h 308136 2016-10-31 16:48:16Z sbruno $
  *
- * netmap support for em.
+ * netmap support for: em.
  *
  * For more details on netmap support please see ixgbe_netmap.h
  */
@@ -40,39 +40,6 @@
 #include <dev/netmap/netmap_kern.h>
 
 
-static void	em_netmap_block_tasks(struct adapter *);
-static void	em_netmap_unblock_tasks(struct adapter *);
-
-
-static void
-em_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
-{
-	struct adapter *adapter = ifp->if_softc;
-
-	ASSERT(queueid < adapter->num_queues);
-	switch (what) {
-	case NETMAP_CORE_LOCK:
-		EM_CORE_LOCK(adapter);
-		break;
-	case NETMAP_CORE_UNLOCK:
-		EM_CORE_UNLOCK(adapter);
-		break;
-	case NETMAP_TX_LOCK:
-		EM_TX_LOCK(&adapter->tx_rings[queueid]);
-		break;
-	case NETMAP_TX_UNLOCK:
-		EM_TX_UNLOCK(&adapter->tx_rings[queueid]);
-		break;
-	case NETMAP_RX_LOCK:
-		EM_RX_LOCK(&adapter->rx_rings[queueid]);
-		break;
-	case NETMAP_RX_UNLOCK:
-		EM_RX_UNLOCK(&adapter->rx_rings[queueid]);
-		break;
-	}
-}
-
-
 // XXX do we need to block/unblock the tasks ?
 static void
 em_netmap_block_tasks(struct adapter *adapter)
@@ -104,7 +71,7 @@
 		struct rx_ring *rxr = adapter->rx_rings;
 		int i;
 
-		for (i = 0; i < adapter->num_queues; i++) {
+		for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
 			taskqueue_unblock(txr->tq);
 			taskqueue_unblock(rxr->tq);
 		}
@@ -115,18 +82,15 @@
 
 
 /*
- * Register/unregister routine
+ * Register/unregister. We are already under netmap lock.
  */
 static int
-em_netmap_reg(struct ifnet *ifp, int onoff)
+em_netmap_reg(struct netmap_adapter *na, int onoff)
 {
+	struct ifnet *ifp = na->ifp;
 	struct adapter *adapter = ifp->if_softc;
-	struct netmap_adapter *na = NA(ifp);
-	int error = 0;
 
-	if (na == NULL)
-		return EINVAL;	/* no netmap support here */
-
+	EM_CORE_LOCK(adapter);
 	em_disable_intr(adapter);
 
 	/* Tell the stack that the interface is no longer active */
@@ -133,27 +97,16 @@
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	em_netmap_block_tasks(adapter);
-
+	/* enable or disable flags and callbacks in na and ifp */
 	if (onoff) {
-		ifp->if_capenable |= IFCAP_NETMAP;
-
-		na->if_transmit = ifp->if_transmit;
-		ifp->if_transmit = netmap_start;
-
-		em_init_locked(adapter);
-		if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
-			error = ENOMEM;
-			goto fail;
-		}
+		nm_set_native_flags(na);
 	} else {
-fail:
-		/* return to non-netmap mode */
-		ifp->if_transmit = na->if_transmit;
-		ifp->if_capenable &= ~IFCAP_NETMAP;
-		em_init_locked(adapter);	/* also enable intr */
+		nm_clear_native_flags(na);
 	}
+	em_init_locked(adapter);	/* also enable intr */
 	em_netmap_unblock_tasks(adapter);
-	return (error);
+	EM_CORE_UNLOCK(adapter);
+	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
 }
 
 
@@ -161,100 +114,93 @@
  * Reconcile kernel and user view of the transmit ring.
  */
 static int
-em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+em_netmap_txsync(struct netmap_kring *kring, int flags)
 {
-	struct adapter *adapter = ifp->if_softc;
-	struct tx_ring *txr = &adapter->tx_rings[ring_nr];
-	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->tx_rings[ring_nr];
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
-	u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
-
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
 	/* generate an interrupt approximately every half ring */
 	u_int report_frequency = kring->nkr_num_slots >> 1;
 
-	k = ring->cur;
-	if (k > lim)
-		return netmap_ring_reinit(kring);
+	/* device-specific */
+	struct adapter *adapter = ifp->if_softc;
+	struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
 
-	if (do_lock)
-		EM_TX_LOCK(txr);
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 			BUS_DMASYNC_POSTREAD);
 
 	/*
-	 * Process new packets to send. j is the current index in the
-	 * netmap ring, l is the corresponding index in the NIC ring.
+	 * First part: process new packets to send.
 	 */
-	j = kring->nr_hwcur;
-	if (j != k) {	/* we have new packets to send */
-		l = netmap_idx_k2n(kring, j);
-		for (n = 0; j != k; n++) {
-			/* slot is the current slot in the netmap ring */
-			struct netmap_slot *slot = &ring->slot[j];
-			/* curr is the current slot in the nic ring */
-			struct e1000_tx_desc *curr = &txr->tx_base[l];
-			struct em_buffer *txbuf = &txr->tx_buffers[l];
-			int flags = ((slot->flags & NS_REPORT) ||
-				j == 0 || j == report_frequency) ?
-					E1000_TXD_CMD_RS : 0;
+
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {	/* we have new packets to send */
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			u_int len = slot->len;
 			uint64_t paddr;
-			void *addr = PNMB(slot, &paddr);
-			u_int len = slot->len;
+			void *addr = PNMB(na, slot, &paddr);
 
-			if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
-				if (do_lock)
-					EM_TX_UNLOCK(txr);
-				return netmap_ring_reinit(kring);
-			}
+			/* device-specific */
+			struct e1000_tx_desc *curr = &txr->tx_base[nic_i];
+			struct em_txbuffer *txbuf = &txr->tx_buffers[nic_i];
+			int flags = (slot->flags & NS_REPORT ||
+				nic_i == 0 || nic_i == report_frequency) ?
+				E1000_TXD_CMD_RS : 0;
 
-			slot->flags &= ~NS_REPORT;
+			NM_CHECK_ADDR_LEN(na, addr, len);
+
 			if (slot->flags & NS_BUF_CHANGED) {
 				curr->buffer_addr = htole64(paddr);
 				/* buffer has changed, reload map */
-				netmap_reload_map(txr->txtag, txbuf->map, addr);
-				slot->flags &= ~NS_BUF_CHANGED;
+				netmap_reload_map(na, txr->txtag, txbuf->map, addr);
 			}
+			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+			/* Fill the slot in the NIC ring. */
 			curr->upper.data = 0;
 			curr->lower.data = htole32(adapter->txd_cmd | len |
 				(E1000_TXD_CMD_EOP | flags) );
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 				BUS_DMASYNC_PREWRITE);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		kring->nr_hwcur = k; /* the saved ring->cur */
-		kring->nr_hwavail -= n;
+		kring->nr_hwcur = head;
 
+		/* synchronize the NIC ring */
 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
-		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
-		E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+		/* (re)start the tx unit up to slot nic_i (excluded) */
+		E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
 	}
 
-	if (n == 0 || kring->nr_hwavail < 1) {
-		int delta;
-
+	/*
+	 * Second part: reclaim buffers for completed transmissions.
+	 */
+	if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
 		/* record completed transmissions using TDH */
-		l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
-		if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
-			D("TDH wrap %d", l);
-			l -= kring->nkr_num_slots;
+		nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
+		if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
+			D("TDH wrap %d", nic_i);
+			nic_i -= kring->nkr_num_slots;
 		}
-		delta = l - txr->next_to_clean;
-		if (delta) {
-			/* some completed, increment hwavail. */
-			if (delta < 0)
-				delta += kring->nkr_num_slots;
-			txr->next_to_clean = l;
-			kring->nr_hwavail += delta;
+		if (nic_i != txr->next_to_clean) {
+			txr->next_to_clean = nic_i;
+			kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
 		}
 	}
-	/* update avail to what the kernel knows */
-	ring->avail = kring->nr_hwavail;
 
-	if (do_lock)
-		EM_TX_UNLOCK(txr);
+	nm_txsync_finalize(kring);
+
 	return 0;
 }
 
@@ -263,111 +209,108 @@
  * Reconcile kernel and user view of the receive ring.
  */
 static int
-em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+em_netmap_rxsync(struct netmap_kring *kring, int flags)
 {
-	struct adapter *adapter = ifp->if_softc;
-	struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
-	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->rx_rings[ring_nr];
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
-	u_int j, l, n, lim = kring->nkr_num_slots - 1;
-	int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
-	u_int k = ring->cur, resvd = ring->reserved;
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = nm_rxsync_prologue(kring);
+	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
 
-	k = ring->cur;
-	if (k > lim)
+	/* device-specific */
+	struct adapter *adapter = ifp->if_softc;
+	struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
+
+	if (head > lim)
 		return netmap_ring_reinit(kring);
 
-	if (do_lock)
-		EM_RX_LOCK(rxr);
-
 	/* XXX check sync modes */
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/*
-	 * Import newly received packets into the netmap ring.
-	 * j is an index in the netmap ring, l in the NIC ring.
+	 * First part: import newly received packets.
 	 */
-	l = rxr->next_to_check;
-	j = netmap_idx_n2k(kring, l);
 	if (netmap_no_pendintr || force_update) {
 		uint16_t slot_flags = kring->nkr_slot_flags;
 
-		for (n = 0; ; n++) {
-			struct e1000_rx_desc *curr = &rxr->rx_base[l];
-			uint32_t staterr = le32toh(curr->status);
+		nic_i = rxr->next_to_check;
+		nm_i = netmap_idx_n2k(kring, nic_i);
 
+		for (n = 0; ; n++) { // XXX no need to count
+			union e1000_rx_desc_extended *curr = &rxr->rx_base[nic_i];
+			uint32_t staterr = le32toh(curr->wb.upper.status_error);
+
 			if ((staterr & E1000_RXD_STAT_DD) == 0)
 				break;
-			ring->slot[j].len = le16toh(curr->length);
-			ring->slot[j].flags = slot_flags;
-			bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
+			ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
+			ring->slot[nm_i].flags = slot_flags;
+			bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map,
 				BUS_DMASYNC_POSTREAD);
-			j = (j == lim) ? 0 : j + 1;
+			nm_i = nm_next(nm_i, lim);
 			/* make sure next_to_refresh follows next_to_check */
-			rxr->next_to_refresh = l;	// XXX
-			l = (l == lim) ? 0 : l + 1;
+			rxr->next_to_refresh = nic_i;	// XXX
+			nic_i = nm_next(nic_i, lim);
 		}
 		if (n) { /* update the state variables */
-			rxr->next_to_check = l;
-			kring->nr_hwavail += n;
+			rxr->next_to_check = nic_i;
+			kring->nr_hwtail = nm_i;
 		}
 		kring->nr_kflags &= ~NKR_PENDINTR;
 	}
 
-	/* skip past packets that userspace has released */
-	j = kring->nr_hwcur;	/* netmap ring index */
-	if (resvd > 0) {
-		if (resvd + ring->avail >= lim + 1) {
-			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
-			ring->reserved = resvd = 0; // XXX panic...
-		}
-		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
-	}
-        if (j != k) { /* userspace has released some packets. */
-		l = netmap_idx_k2n(kring, j); /* NIC ring index */
-		for (n = 0; j != k; n++) {
-			struct netmap_slot *slot = &ring->slot[j];
-			struct e1000_rx_desc *curr = &rxr->rx_base[l];
-			struct em_buffer *rxbuf = &rxr->rx_buffers[l];
+	/*
+	 * Second part: skip past packets that userspace has released.
+	 */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
 			uint64_t paddr;
-			void *addr = PNMB(slot, &paddr);
+			void *addr = PNMB(na, slot, &paddr);
 
-			if (addr == netmap_buffer_base) { /* bad buf */
-				if (do_lock)
-					EM_RX_UNLOCK(rxr);
-				return netmap_ring_reinit(kring);
-			}
+			union e1000_rx_desc_extended *curr = &rxr->rx_base[nic_i];
+			struct em_rxbuffer *rxbuf = &rxr->rx_buffers[nic_i];
 
+			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
+				goto ring_reset;
+
+			curr->read.buffer_addr = htole64(paddr);
 			if (slot->flags & NS_BUF_CHANGED) {
-				curr->buffer_addr = htole64(paddr);
 				/* buffer has changed, reload map */
-				netmap_reload_map(rxr->rxtag, rxbuf->map, addr);
+				netmap_reload_map(na, rxr->rxtag, rxbuf->map, addr);
 				slot->flags &= ~NS_BUF_CHANGED;
 			}
-			curr->status = 0;
+			curr->wb.upper.status_error = 0;
 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 			    BUS_DMASYNC_PREREAD);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		kring->nr_hwavail -= n;
-		kring->nr_hwcur = k;
+		kring->nr_hwcur = head;
+
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		/*
 		 * IMPORTANT: we must leave one free slot in the ring,
-		 * so move l back by one unit
+		 * so move nic_i back by one unit
 		 */
-		l = (l == 0) ? lim : l - 1;
-		E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
+		nic_i = nm_prev(nic_i, lim);
+		E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
 	}
-	/* tell userspace that there are new packets */
-	ring->avail = kring->nr_hwavail - resvd;
-	if (do_lock)
-		EM_RX_UNLOCK(rxr);
+
+	/* tell userspace that there might be new packets */
+	nm_rxsync_finalize(kring);
+
 	return 0;
+
+ring_reset:
+	return netmap_ring_reinit(kring);
 }
 
 
@@ -379,14 +322,14 @@
 	bzero(&na, sizeof(na));
 
 	na.ifp = adapter->ifp;
-	na.separate_locks = 1;
+	na.na_flags = NAF_BDG_MAYSLEEP;
 	na.num_tx_desc = adapter->num_tx_desc;
 	na.num_rx_desc = adapter->num_rx_desc;
 	na.nm_txsync = em_netmap_txsync;
 	na.nm_rxsync = em_netmap_rxsync;
-	na.nm_lock = em_netmap_lock_wrapper;
 	na.nm_register = em_netmap_reg;
-	netmap_attach(&na, adapter->num_queues);
+	na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
+	netmap_attach(&na);
 }
 
 /* end of file */

Modified: trunk/sys/dev/netmap/if_igb_netmap.h
===================================================================
--- trunk/sys/dev/netmap/if_igb_netmap.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/netmap/if_igb_netmap.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * Copyright (C) 2011 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -24,8 +25,7 @@
  */
 
 /*
- * $MidnightBSD$
- * $Id: if_igb_netmap.h,v 1.2 2013-01-08 03:53:24 laffer1 Exp $
+ * $FreeBSD: stable/10/sys/dev/netmap/if_igb_netmap.h 270252 2014-08-20 23:34:36Z luigi $
  *
  * Netmap support for igb, partly contributed by Ahmed Kooli
  * For details on netmap support please see ixgbe_netmap.h
@@ -38,76 +38,43 @@
 #include <vm/pmap.h>    /* vtophys ? */
 #include <dev/netmap/netmap_kern.h>
 
-
 /*
- * wrapper to export locks to the generic code
+ * Adaptation to different versions of the driver.
  */
-static void
-igb_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
-{
-	struct adapter *adapter = ifp->if_softc;
 
-	ASSERT(queueid < adapter->num_queues);
-	switch (what) {
-	case NETMAP_CORE_LOCK:
-		IGB_CORE_LOCK(adapter);
-		break;
-	case NETMAP_CORE_UNLOCK:
-		IGB_CORE_UNLOCK(adapter);
-		break;
-	case NETMAP_TX_LOCK:
-		IGB_TX_LOCK(&adapter->tx_rings[queueid]);
-		break;
-	case NETMAP_TX_UNLOCK:
-		IGB_TX_UNLOCK(&adapter->tx_rings[queueid]);
-		break;
-	case NETMAP_RX_LOCK:
-		IGB_RX_LOCK(&adapter->rx_rings[queueid]);
-		break;
-	case NETMAP_RX_UNLOCK:
-		IGB_RX_UNLOCK(&adapter->rx_rings[queueid]);
-		break;
-	}
-}
+#ifndef IGB_MEDIA_RESET
+/* at the same time as IGB_MEDIA_RESET was defined, the
+ * tx buffer descriptor was renamed, so use this to revert
+ * back to the old name.
+ */
+#define igb_tx_buf igb_tx_buffer
+#endif
 
 
 /*
- * register-unregister routine
+ * Register/unregister. We are already under netmap lock.
  */
 static int
-igb_netmap_reg(struct ifnet *ifp, int onoff)
+igb_netmap_reg(struct netmap_adapter *na, int onoff)
 {
+	struct ifnet *ifp = na->ifp;
 	struct adapter *adapter = ifp->if_softc;
-	struct netmap_adapter *na = NA(ifp);
-	int error = 0;
 
-	if (na == NULL)
-		return EINVAL;	/* no netmap support here */
-
+	IGB_CORE_LOCK(adapter);
 	igb_disable_intr(adapter);
 
 	/* Tell the stack that the interface is no longer active */
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
+	/* enable or disable flags and callbacks in na and ifp */
 	if (onoff) {
-		ifp->if_capenable |= IFCAP_NETMAP;
-
-		na->if_transmit = ifp->if_transmit;
-		ifp->if_transmit = netmap_start;
-
-		igb_init_locked(adapter);
-		if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
-			error = ENOMEM;
-			goto fail;
-		}
+		nm_set_native_flags(na);
 	} else {
-fail:
-		/* restore if_transmit */
-		ifp->if_transmit = na->if_transmit;
-		ifp->if_capenable &= ~IFCAP_NETMAP;
-		igb_init_locked(adapter);	/* also enable intr */
+		nm_clear_native_flags(na);
 	}
-	return (error);
+	igb_init_locked(adapter);	/* also enable intr */
+	IGB_CORE_UNLOCK(adapter);
+	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
 }
 
 
@@ -115,65 +82,59 @@
  * Reconcile kernel and user view of the transmit ring.
  */
 static int
-igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+igb_netmap_txsync(struct netmap_kring *kring, int flags)
 {
-	struct adapter *adapter = ifp->if_softc;
-	struct tx_ring *txr = &adapter->tx_rings[ring_nr];
-	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->tx_rings[ring_nr];
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
-	u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
-
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
 	/* generate an interrupt approximately every half ring */
 	u_int report_frequency = kring->nkr_num_slots >> 1;
 
-	k = ring->cur;
-	if (k > lim)
-		return netmap_ring_reinit(kring);
+	/* device-specific */
+	struct adapter *adapter = ifp->if_softc;
+	struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
+	/* 82575 needs the queue index added */
+	u32 olinfo_status =
+	    (adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
 
-	if (do_lock)
-		IGB_TX_LOCK(txr);
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
-	    BUS_DMASYNC_POSTREAD);
+			BUS_DMASYNC_POSTREAD);
 
-	/* check for new packets to send.
-	 * j indexes the netmap ring, l indexes the nic ring, and
-	 *      j = kring->nr_hwcur, l = E1000_TDT (not tracked),
-	 *      j == (l + kring->nkr_hwofs) % ring_size
+	/*
+	 * First part: process new packets to send.
 	 */
-	j = kring->nr_hwcur;
-	if (j != k) {	/* we have new packets to send */
-		/* 82575 needs the queue index added */
-		u32 olinfo_status =
-		    (adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
 
-		l = netmap_idx_k2n(kring, j);
-		for (n = 0; j != k; n++) {
-			/* slot is the current slot in the netmap ring */
-			struct netmap_slot *slot = &ring->slot[j];
-			/* curr is the current slot in the nic ring */
-			union e1000_adv_tx_desc *curr =
-			    (union e1000_adv_tx_desc *)&txr->tx_base[l];
-			struct igb_tx_buffer *txbuf = &txr->tx_buffers[l];
-			int flags = ((slot->flags & NS_REPORT) ||
-				j == 0 || j == report_frequency) ?
-					E1000_ADVTXD_DCMD_RS : 0;
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {	/* we have new packets to send */
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			u_int len = slot->len;
 			uint64_t paddr;
-			void *addr = PNMB(slot, &paddr);
-			u_int len = slot->len;
+			void *addr = PNMB(na, slot, &paddr);
 
-			if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
-				if (do_lock)
-					IGB_TX_UNLOCK(txr);
-				return netmap_ring_reinit(kring);
-			}
+			/* device-specific */
+			union e1000_adv_tx_desc *curr =
+			    (union e1000_adv_tx_desc *)&txr->tx_base[nic_i];
+			struct igb_tx_buf *txbuf = &txr->tx_buffers[nic_i];
+			int flags = (slot->flags & NS_REPORT ||
+				nic_i == 0 || nic_i == report_frequency) ?
+				E1000_ADVTXD_DCMD_RS : 0;
 
-			slot->flags &= ~NS_REPORT;
+			NM_CHECK_ADDR_LEN(na, addr, len);
+
 			if (slot->flags & NS_BUF_CHANGED) {
 				/* buffer has changed, reload map */
-				netmap_reload_map(txr->txtag, txbuf->map, addr);
-				slot->flags &= ~NS_BUF_CHANGED;
+				netmap_reload_map(na, txr->txtag, txbuf->map, addr);
 			}
+			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+			/* Fill the slot in the NIC ring. */
 			curr->read.buffer_addr = htole64(paddr);
 			// XXX check olinfo and cmd_type_len
 			curr->read.olinfo_status =
@@ -181,51 +142,47 @@
 				(len<< E1000_ADVTXD_PAYLEN_SHIFT));
 			curr->read.cmd_type_len =
 			    htole32(len | E1000_ADVTXD_DTYP_DATA |
-				    E1000_ADVTXD_DCMD_IFCS |
-				    E1000_ADVTXD_DCMD_DEXT |
-				    E1000_ADVTXD_DCMD_EOP | flags);
+			    E1000_ADVTXD_DCMD_IFCS |
+			    E1000_ADVTXD_DCMD_DEXT |
+			    E1000_ADVTXD_DCMD_EOP | flags);
 
+			/* make sure changes to the buffer are synced */
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 				BUS_DMASYNC_PREWRITE);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		kring->nr_hwcur = k; /* the saved ring->cur */
-		kring->nr_hwavail -= n;
+		kring->nr_hwcur = head;
 
 		/* Set the watchdog XXX ? */
 		txr->queue_status = IGB_QUEUE_WORKING;
 		txr->watchdog_time = ticks;
 
+		/* synchronize the NIC ring */
 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
-		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
-		E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+		/* (re)start the tx unit up to slot nic_i (excluded) */
+		E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
 	}
 
-	if (n == 0 || kring->nr_hwavail < 1) {
-		int delta;
-
+	/*
+	 * Second part: reclaim buffers for completed transmissions.
+	 */
+	if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
 		/* record completed transmissions using TDH */
-		l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
-		if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
-			D("TDH wrap %d", l);
-			l -= kring->nkr_num_slots;
+		nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
+		if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
+			D("TDH wrap %d", nic_i);
+			nic_i -= kring->nkr_num_slots;
 		}
-		delta = l - txr->next_to_clean;
-		if (delta) {
-			/* some completed, increment hwavail. */
-			if (delta < 0)
-				delta += kring->nkr_num_slots;
-			txr->next_to_clean = l;
-			kring->nr_hwavail += delta;
-		}
+		txr->next_to_clean = nic_i;
+		kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
 	}
-	/* update avail to what the kernel knows */
-	ring->avail = kring->nr_hwavail;
 
-	if (do_lock)
-		IGB_TX_UNLOCK(txr);
+	nm_txsync_finalize(kring);
+
 	return 0;
 }
 
@@ -234,108 +191,106 @@
  * Reconcile kernel and user view of the receive ring.
  */
 static int
-igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+igb_netmap_rxsync(struct netmap_kring *kring, int flags)
 {
-	struct adapter *adapter = ifp->if_softc;
-	struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
-	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->rx_rings[ring_nr];
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
-	u_int j, l, n, lim = kring->nkr_num_slots - 1;
-	int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
-	u_int k = ring->cur, resvd = ring->reserved;
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = nm_rxsync_prologue(kring);
+	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
 
-	k = ring->cur;
-	if (k > lim)
+	/* device-specific */
+	struct adapter *adapter = ifp->if_softc;
+	struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
+
+	if (head > lim)
 		return netmap_ring_reinit(kring);
 
-	if (do_lock)
-		IGB_RX_LOCK(rxr);
-
 	/* XXX check sync modes */
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
-	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/*
-	 * import newly received packets into the netmap ring.
-	 * j is an index in the netmap ring, l in the NIC ring.
+	 * First part: import newly received packets.
 	 */
-	l = rxr->next_to_check;
-	j = netmap_idx_n2k(kring, l);
 	if (netmap_no_pendintr || force_update) {
 		uint16_t slot_flags = kring->nkr_slot_flags;
 
+		nic_i = rxr->next_to_check;
+		nm_i = netmap_idx_n2k(kring, nic_i);
+
 		for (n = 0; ; n++) {
-			union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
+			union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
 			uint32_t staterr = le32toh(curr->wb.upper.status_error);
 
 			if ((staterr & E1000_RXD_STAT_DD) == 0)
 				break;
-			ring->slot[j].len = le16toh(curr->wb.upper.length);
-			ring->slot[j].flags = slot_flags;
+			ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
+			ring->slot[nm_i].flags = slot_flags;
 			bus_dmamap_sync(rxr->ptag,
-				rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+			    rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
 		if (n) { /* update the state variables */
-			rxr->next_to_check = l;
-			kring->nr_hwavail += n;
+			rxr->next_to_check = nic_i;
+			kring->nr_hwtail = nm_i;
 		}
 		kring->nr_kflags &= ~NKR_PENDINTR;
 	}
 
-	/* skip past packets that userspace has released */
-        j = kring->nr_hwcur;    /* netmap ring index */
-	if (resvd > 0) {
-		if (resvd + ring->avail >= lim + 1) {
-			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
-			ring->reserved = resvd = 0; // XXX panic...
-		}
-		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
-	}
-	if (j != k) { /* userspace has released some packets. */
-		l = netmap_idx_k2n(kring, j);
-		for (n = 0; j != k; n++) {
-			struct netmap_slot *slot = ring->slot + j;
-			union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
-			struct igb_rx_buf *rxbuf = rxr->rx_buffers + l;
+	/*
+	 * Second part: skip past packets that userspace has released.
+	 */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
 			uint64_t paddr;
-			void *addr = PNMB(slot, &paddr);
+			void *addr = PNMB(na, slot, &paddr);
 
-			if (addr == netmap_buffer_base) { /* bad buf */
-				if (do_lock)
-					IGB_RX_UNLOCK(rxr);
-				return netmap_ring_reinit(kring);
-			}
+			union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
+			struct igb_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
 
+			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
+				goto ring_reset;
+
 			if (slot->flags & NS_BUF_CHANGED) {
-				netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
+				/* buffer has changed, reload map */
+				netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr);
 				slot->flags &= ~NS_BUF_CHANGED;
 			}
+			curr->wb.upper.status_error = 0;
 			curr->read.pkt_addr = htole64(paddr);
-			curr->wb.upper.status_error = 0;
 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
-				BUS_DMASYNC_PREREAD);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+			    BUS_DMASYNC_PREREAD);
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		kring->nr_hwavail -= n;
-		kring->nr_hwcur = k;
+		kring->nr_hwcur = head;
+
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
-			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		/*
 		 * IMPORTANT: we must leave one free slot in the ring,
-		 * so move l back by one unit
+		 * so move nic_i back by one unit
 		 */
-		l = (l == 0) ? lim : l - 1;
-		E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
+		nic_i = nm_prev(nic_i, lim);
+		E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
 	}
-	/* tell userspace that there are new packets */
-	ring->avail = kring->nr_hwavail - resvd;
-	if (do_lock)
-		IGB_RX_UNLOCK(rxr);
+
+	/* tell userspace that there might be new packets */
+	nm_rxsync_finalize(kring);
+
 	return 0;
+
+ring_reset:
+	return netmap_ring_reinit(kring);
 }
 
 
@@ -347,13 +302,14 @@
 	bzero(&na, sizeof(na));
 
 	na.ifp = adapter->ifp;
-	na.separate_locks = 1;
+	na.na_flags = NAF_BDG_MAYSLEEP;
 	na.num_tx_desc = adapter->num_tx_desc;
 	na.num_rx_desc = adapter->num_rx_desc;
 	na.nm_txsync = igb_netmap_txsync;
 	na.nm_rxsync = igb_netmap_rxsync;
-	na.nm_lock = igb_netmap_lock_wrapper;
 	na.nm_register = igb_netmap_reg;
-	netmap_attach(&na, adapter->num_queues);
-}	
+	na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
+	netmap_attach(&na);
+}
+
 /* end of file */

Added: trunk/sys/dev/netmap/if_ixl_netmap.h
===================================================================
--- trunk/sys/dev/netmap/if_ixl_netmap.h	                        (rev 0)
+++ trunk/sys/dev/netmap/if_ixl_netmap.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,423 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2015, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: stable/10/sys/dev/netmap/if_ixl_netmap.h 292096 2015-12-11 12:24:11Z smh $
+ *
+ * netmap support for: ixl
+ *
+ * derived from ixgbe
+ * netmap support for a network driver.
+ * This file contains code but only static or inline functions used
+ * by a single driver. To avoid replication of code we just #include
+ * it near the beginning of the standard driver.
+ * For ixl the file is imported in two places, hence the conditional at the
+ * beginning.
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+
+/*
+ * Some drivers may need the following headers. Others
+ * already include them by default
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+ */
+#include <dev/netmap/netmap_kern.h>
+
+int ixl_netmap_txsync(struct netmap_kring *kring, int flags);
+int ixl_netmap_rxsync(struct netmap_kring *kring, int flags);
+
+extern int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip;
+
+#ifdef NETMAP_IXL_MAIN
+/*
+ * device-specific sysctl variables:
+ *
+ * ixl_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
+ *	During regular operations the CRC is stripped, but on some
+ *	hardware reception of frames not multiple of 64 is slower,
+ *	so using crcstrip=0 helps in benchmarks.
+ *
+ * ixl_rx_miss, ixl_rx_miss_bufs:
+ *	count packets that might be missed due to lost interrupts.
+ */
+SYSCTL_DECL(_dev_netmap);
+int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip;
+SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_crcstrip,
+    CTLFLAG_RW, &ixl_crcstrip, 0, "strip CRC on rx frames");
+SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss,
+    CTLFLAG_RW, &ixl_rx_miss, 0, "potentially missed rx intr");
+SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss_bufs,
+    CTLFLAG_RW, &ixl_rx_miss_bufs, 0, "potentially missed rx intr bufs");
+
+
+/*
+ * Register/unregister. We are already under netmap lock.
+ * Only called on the first register or the last unregister.
+ */
+static int
+ixl_netmap_reg(struct netmap_adapter *na, int onoff)
+{
+	struct ifnet *ifp = na->ifp;
+        struct ixl_vsi  *vsi = ifp->if_softc;
+        struct ixl_pf   *pf = (struct ixl_pf *)vsi->back;
+
+	IXL_PF_LOCK(pf);
+	ixl_disable_intr(vsi);
+
+	/* Tell the stack that the interface is no longer active */
+	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+	//set_crcstrip(&adapter->hw, onoff);
+	/* enable or disable flags and callbacks in na and ifp */
+	if (onoff) {
+		nm_set_native_flags(na);
+	} else {
+		nm_clear_native_flags(na);
+	}
+	ixl_init_locked(pf);	/* also enables intr */
+	//set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
+	IXL_PF_UNLOCK(pf);
+	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
+}
+
+
+/*
+ * The attach routine, called near the end of ixl_attach(),
+ * fills the parameters for netmap_attach() and calls it.
+ * It cannot fail, in the worst case (such as no memory)
+ * netmap mode will be disabled and the driver will only
+ * operate in standard mode.
+ */
+static void
+ixl_netmap_attach(struct ixl_vsi *vsi)
+{
+	struct netmap_adapter na;
+
+	bzero(&na, sizeof(na));
+
+	na.ifp = vsi->ifp;
+	na.na_flags = NAF_BDG_MAYSLEEP;
+	// XXX check that queues is set.
+	printf("queues is %p\n", vsi->queues);
+	if (vsi->queues) {
+		na.num_tx_desc = vsi->queues[0].num_desc;
+		na.num_rx_desc = vsi->queues[0].num_desc;
+	}
+	na.nm_txsync = ixl_netmap_txsync;
+	na.nm_rxsync = ixl_netmap_rxsync;
+	na.nm_register = ixl_netmap_reg;
+	na.num_tx_rings = na.num_rx_rings = vsi->num_queues;
+	netmap_attach(&na);
+}
+
+
+#else /* !NETMAP_IXL_MAIN, code for ixl_txrx.c */
+
+/*
+ * Reconcile kernel and user view of the transmit ring.
+ *
+ * All information is in the kring.
+ * Userspace wants to send packets up to the one before kring->rhead,
+ * kernel knows kring->nr_hwcur is the first unsent packet.
+ *
+ * Here we push packets out (as many as possible), and possibly
+ * reclaim buffers from previously completed transmission.
+ *
+ * The caller (netmap) guarantees that there is only one instance
+ * running at any time. Any interference with other driver
+ * methods should be handled by the individual drivers.
+ */
+int
+ixl_netmap_txsync(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
+	struct netmap_ring *ring = kring->ring;
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
+	/*
+	 * interrupts on every tx packet are expensive so request
+	 * them every half ring, or where NS_REPORT is set
+	 */
+	u_int report_frequency = kring->nkr_num_slots >> 1;
+
+	/* device-specific */
+	struct ixl_vsi *vsi = ifp->if_softc;
+	struct ixl_queue *que = &vsi->queues[kring->ring_id];
+	struct tx_ring *txr = &que->txr;
+
+	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
+			BUS_DMASYNC_POSTREAD);
+
+	/*
+	 * First part: process new packets to send.
+	 * nm_i is the current index in the netmap ring,
+	 * nic_i is the corresponding index in the NIC ring.
+	 *
+	 * If we have packets to send (nm_i != head)
+	 * iterate over the netmap ring, fetch length and update
+	 * the corresponding slot in the NIC ring. Some drivers also
+	 * need to update the buffer's physical address in the NIC slot
+	 * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
+	 *
+	 * The netmap_reload_map() calls is especially expensive,
+	 * even when (as in this case) the tag is 0, so do only
+	 * when the buffer has actually changed.
+	 *
+	 * If possible do not set the report/intr bit on all slots,
+	 * but only a few times per ring or when NS_REPORT is set.
+	 *
+	 * Finally, on 10G and faster drivers, it might be useful
+	 * to prefetch the next slot and txr entry.
+	 */
+
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {	/* we have new packets to send */
+		nic_i = netmap_idx_k2n(kring, nm_i);
+
+		__builtin_prefetch(&ring->slot[nm_i]);
+		__builtin_prefetch(&txr->buffers[nic_i]);
+
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			u_int len = slot->len;
+			uint64_t paddr;
+			void *addr = PNMB(na, slot, &paddr);
+
+			/* device-specific */
+			struct i40e_tx_desc *curr = &txr->base[nic_i];
+			struct ixl_tx_buf *txbuf = &txr->buffers[nic_i];
+			u64 flags = (slot->flags & NS_REPORT ||
+				nic_i == 0 || nic_i == report_frequency) ?
+				((u64)I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT) : 0;
+
+			/* prefetch for next round */
+			__builtin_prefetch(&ring->slot[nm_i + 1]);
+			__builtin_prefetch(&txr->buffers[nic_i + 1]);
+
+			NM_CHECK_ADDR_LEN(na, addr, len);
+
+			if (slot->flags & NS_BUF_CHANGED) {
+				/* buffer has changed, reload map */
+				netmap_reload_map(na, txr->dma.tag, txbuf->map, addr);
+			}
+			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+			/* Fill the slot in the NIC ring. */
+			curr->buffer_addr = htole64(paddr);
+			curr->cmd_type_offset_bsz = htole64(
+			    ((u64)len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
+			    flags |
+			    ((u64)I40E_TX_DESC_CMD_EOP << I40E_TXD_QW1_CMD_SHIFT)
+			  ); // XXX more ?
+
+			/* make sure changes to the buffer are synced */
+			bus_dmamap_sync(txr->dma.tag, txbuf->map,
+				BUS_DMASYNC_PREWRITE);
+
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
+		}
+		kring->nr_hwcur = head;
+
+		/* synchronize the NIC ring */
+		bus_dmamap_sync(txr->dma.tag, txr->dma.map,
+			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+		/* (re)start the tx unit up to slot nic_i (excluded) */
+		wr32(vsi->hw, txr->tail, nic_i);
+	}
+
+	/*
+	 * Second part: reclaim buffers for completed transmissions.
+	 */
+	nic_i = LE32_TO_CPU(*(volatile __le32 *)&txr->base[que->num_desc]);
+	if (nic_i != txr->next_to_clean) {
+		/* some tx completed, increment avail */
+		txr->next_to_clean = nic_i;
+		kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
+	}
+
+	nm_txsync_finalize(kring);
+
+	return 0;
+}
+
+
+/*
+ * Reconcile kernel and user view of the receive ring.
+ * Same as for the txsync, this routine must be efficient.
+ * The caller guarantees a single invocations, but races against
+ * the rest of the driver should be handled here.
+ *
+ * On call, kring->rhead is the first packet that userspace wants
+ * to keep, and kring->rcur is the wakeup point.
+ * The kernel has previously reported packets up to kring->rtail.
+ *
+ * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
+ * of whether or not we received an interrupt.
+ */
+int
+ixl_netmap_rxsync(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
+	struct netmap_ring *ring = kring->ring;
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = nm_rxsync_prologue(kring);
+	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
+
+	/* device-specific */
+	struct ixl_vsi *vsi = ifp->if_softc;
+	struct ixl_queue *que = &vsi->queues[kring->ring_id];
+	struct rx_ring *rxr = &que->rxr;
+
+	if (head > lim)
+		return netmap_ring_reinit(kring);
+
+	/* XXX check sync modes */
+	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
+			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+	/*
+	 * First part: import newly received packets.
+	 *
+	 * nm_i is the index of the next free slot in the netmap ring,
+	 * nic_i is the index of the next received packet in the NIC ring,
+	 * and they may differ in case if_init() has been called while
+	 * in netmap mode. For the receive ring we have
+	 *
+	 *	nic_i = rxr->next_check;
+	 *	nm_i = kring->nr_hwtail (previous)
+	 * and
+	 *	nm_i == (nic_i + kring->nkr_hwofs) % ring_size
+	 *
+	 * rxr->next_check is set to 0 on a ring reinit
+	 */
+	if (netmap_no_pendintr || force_update) {
+		int crclen = ixl_crcstrip ? 0 : 4;
+		uint16_t slot_flags = kring->nkr_slot_flags;
+
+		nic_i = rxr->next_check; // or also k2n(kring->nr_hwtail)
+		nm_i = netmap_idx_n2k(kring, nic_i);
+
+		for (n = 0; ; n++) {
+			union i40e_32byte_rx_desc *curr = &rxr->base[nic_i];
+			uint64_t qword = le64toh(curr->wb.qword1.status_error_len);
+			uint32_t staterr = (qword & I40E_RXD_QW1_STATUS_MASK)
+				 >> I40E_RXD_QW1_STATUS_SHIFT;
+
+			if ((staterr & (1<<I40E_RX_DESC_STATUS_DD_SHIFT)) == 0)
+				break;
+			ring->slot[nm_i].len = ((qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
+			    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - crclen;
+			ring->slot[nm_i].flags = slot_flags;
+			bus_dmamap_sync(rxr->ptag,
+			    rxr->buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
+		}
+		if (n) { /* update the state variables */
+			if (netmap_no_pendintr && !force_update) {
+				/* diagnostics */
+				ixl_rx_miss ++;
+				ixl_rx_miss_bufs += n;
+			}
+			rxr->next_check = nic_i;
+			kring->nr_hwtail = nm_i;
+		}
+		kring->nr_kflags &= ~NKR_PENDINTR;
+	}
+
+	/*
+	 * Second part: skip past packets that userspace has released.
+	 * (kring->nr_hwcur to head excluded),
+	 * and make the buffers available for reception.
+	 * As usual nm_i is the index in the netmap ring,
+	 * nic_i is the index in the NIC ring, and
+	 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
+	 */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			uint64_t paddr;
+			void *addr = PNMB(na, slot, &paddr);
+
+			union i40e_32byte_rx_desc *curr = &rxr->base[nic_i];
+			struct ixl_rx_buf *rxbuf = &rxr->buffers[nic_i];
+
+			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
+				goto ring_reset;
+
+			if (slot->flags & NS_BUF_CHANGED) {
+				/* buffer has changed, reload map */
+				netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr);
+				slot->flags &= ~NS_BUF_CHANGED;
+			}
+			curr->read.pkt_addr = htole64(paddr);
+			curr->read.hdr_addr = 0; // XXX needed
+			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
+			    BUS_DMASYNC_PREREAD);
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
+		}
+		kring->nr_hwcur = head;
+
+		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+		/*
+		 * IMPORTANT: we must leave one free slot in the ring,
+		 * so move nic_i back by one unit
+		 */
+		nic_i = nm_prev(nic_i, lim);
+		wr32(vsi->hw, rxr->tail, nic_i);
+	}
+
+	/* tell userspace that there might be new packets */
+	nm_rxsync_finalize(kring);
+
+	return 0;
+
+ring_reset:
+	return netmap_ring_reinit(kring);
+}
+
+#endif /* !NETMAP_IXL_MAIN */
+
+/* end of file */


Property changes on: trunk/sys/dev/netmap/if_ixl_netmap.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/netmap/if_lem_netmap.h
===================================================================
--- trunk/sys/dev/netmap/if_lem_netmap.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/netmap/if_lem_netmap.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -25,14 +26,14 @@
 
 
 /*
- * $MidnightBSD$
- * $Id: if_lem_netmap.h,v 1.2 2013-01-08 03:53:24 laffer1 Exp $
+ * $FreeBSD: stable/10/sys/dev/netmap/if_lem_netmap.h 278779 2015-02-14 19:41:26Z luigi $
  *
- * netmap support for "lem"
+ * netmap support for: lem
  *
  * For details on netmap support please see ixgbe_netmap.h
  */
 
+
 #include <net/netmap.h>
 #include <sys/selinfo.h>
 #include <vm/vm.h>
@@ -39,48 +40,18 @@
 #include <vm/pmap.h>    /* vtophys ? */
 #include <dev/netmap/netmap_kern.h>
 
+extern int netmap_adaptive_io;
 
-static void
-lem_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int ringid)
-{
-	struct adapter *adapter = ifp->if_softc;
-
-	/* only one ring here so ignore the ringid */
-	switch (what) {
-	case NETMAP_CORE_LOCK:
-		EM_CORE_LOCK(adapter);
-		break;
-	case NETMAP_CORE_UNLOCK:
-		EM_CORE_UNLOCK(adapter);
-		break;
-	case NETMAP_TX_LOCK:
-		EM_TX_LOCK(adapter);
-		break;
-	case NETMAP_TX_UNLOCK:
-		EM_TX_UNLOCK(adapter);
-		break;
-	case NETMAP_RX_LOCK:
-		EM_RX_LOCK(adapter);
-		break;
-	case NETMAP_RX_UNLOCK:
-		EM_RX_UNLOCK(adapter);
-		break;
-	}
-}
-
-
 /*
- * Register/unregister
+ * Register/unregister. We are already under netmap lock.
  */
 static int
-lem_netmap_reg(struct ifnet *ifp, int onoff)
+lem_netmap_reg(struct netmap_adapter *na, int onoff)
 {
+	struct ifnet *ifp = na->ifp;
 	struct adapter *adapter = ifp->if_softc;
-	struct netmap_adapter *na = NA(ifp);
-	int error = 0;
 
-	if (na == NULL)
-		return EINVAL;
+	EM_CORE_LOCK(adapter);
 
 	lem_disable_intr(adapter);
 
@@ -92,30 +63,22 @@
 	taskqueue_drain(adapter->tq, &adapter->rxtx_task);
 	taskqueue_drain(adapter->tq, &adapter->link_task);
 #endif /* !EM_LEGCY_IRQ */
+
+	/* enable or disable flags and callbacks in na and ifp */
 	if (onoff) {
-		ifp->if_capenable |= IFCAP_NETMAP;
-
-		na->if_transmit = ifp->if_transmit;
-		ifp->if_transmit = netmap_start;
-
-		lem_init_locked(adapter);
-		if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
-			error = ENOMEM;
-			goto fail;
-		}
+		nm_set_native_flags(na);
 	} else {
-fail:
-		/* return to non-netmap mode */
-		ifp->if_transmit = na->if_transmit;
-		ifp->if_capenable &= ~IFCAP_NETMAP;
-		lem_init_locked(adapter);	/* also enable intr */
+		nm_clear_native_flags(na);
 	}
+	lem_init_locked(adapter);	/* also enable intr */
 
 #ifndef EM_LEGACY_IRQ
 	taskqueue_unblock(adapter->tq); // XXX do we need this ?
 #endif /* !EM_LEGCY_IRQ */
 
-	return (error);
+	EM_CORE_UNLOCK(adapter);
+
+	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
 }
 
 
@@ -123,101 +86,225 @@
  * Reconcile kernel and user view of the transmit ring.
  */
 static int
-lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+lem_netmap_txsync(struct netmap_kring *kring, int flags)
 {
-	struct adapter *adapter = ifp->if_softc;
-	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->tx_rings[ring_nr];
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
-	u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
-
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
 	/* generate an interrupt approximately every half ring */
-	int report_frequency = kring->nkr_num_slots >> 1;
+	u_int report_frequency = kring->nkr_num_slots >> 1;
 
-	/* take a copy of ring->cur now, and never read it again */
-	k = ring->cur;
-	if (k > lim)
-		return netmap_ring_reinit(kring);
+	/* device-specific */
+	struct adapter *adapter = ifp->if_softc;
+#ifdef NIC_PARAVIRT
+	struct paravirt_csb *csb = adapter->csb;
+	uint64_t *csbd = (uint64_t *)(csb + 1);
+#endif /* NIC_PARAVIRT */
 
-	if (do_lock)
-		EM_TX_LOCK(adapter);
 	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
 			BUS_DMASYNC_POSTREAD);
+
 	/*
-	 * Process new packets to send. j is the current index in the
-	 * netmap ring, l is the corresponding index in the NIC ring.
+	 * First part: process new packets to send.
 	 */
-	j = kring->nr_hwcur;
-	if (j != k) {	/* we have new packets to send */
-		l = netmap_idx_k2n(kring, j);
-		for (n = 0; j != k; n++) {
-			/* slot is the current slot in the netmap ring */
-			struct netmap_slot *slot = &ring->slot[j];
-			/* curr is the current slot in the nic ring */
-			struct e1000_tx_desc *curr = &adapter->tx_desc_base[l];
-			struct em_buffer *txbuf = &adapter->tx_buffer_area[l];
-			int flags = ((slot->flags & NS_REPORT) ||
-				j == 0 || j == report_frequency) ?
-					E1000_TXD_CMD_RS : 0;
+
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {	/* we have new packets to send */
+#ifdef NIC_PARAVIRT
+		int do_kick = 0;
+		uint64_t t = 0; // timestamp
+		int n = head - nm_i;
+		if (n < 0)
+			n += lim + 1;
+		if (csb) {
+			t = rdtsc(); /* last timestamp */
+			csbd[16] += t - csbd[0]; /* total Wg */
+			csbd[17] += n;		/* Wg count */
+			csbd[0] = t;
+		}
+#endif /* NIC_PARAVIRT */
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		while (nm_i != head) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			u_int len = slot->len;
 			uint64_t paddr;
-			void *addr = PNMB(slot, &paddr);
-			u_int len = slot->len;
+			void *addr = PNMB(na, slot, &paddr);
 
-			if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
-				if (do_lock)
-					EM_TX_UNLOCK(adapter);
-				return netmap_ring_reinit(kring);
-			}
+			/* device-specific */
+			struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i];
+			struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i];
+			int flags = (slot->flags & NS_REPORT ||
+				nic_i == 0 || nic_i == report_frequency) ?
+				E1000_TXD_CMD_RS : 0;
 
-			slot->flags &= ~NS_REPORT;
+			NM_CHECK_ADDR_LEN(na, addr, len);
+
 			if (slot->flags & NS_BUF_CHANGED) {
 				/* buffer has changed, reload map */
-				netmap_reload_map(adapter->txtag, txbuf->map, addr);
 				curr->buffer_addr = htole64(paddr);
-				slot->flags &= ~NS_BUF_CHANGED;
+				netmap_reload_map(na, adapter->txtag, txbuf->map, addr);
 			}
+			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+			/* Fill the slot in the NIC ring. */
 			curr->upper.data = 0;
-			curr->lower.data =
-			    htole32( adapter->txd_cmd | len |
+			curr->lower.data = htole32(adapter->txd_cmd | len |
 				(E1000_TXD_CMD_EOP | flags) );
+			bus_dmamap_sync(adapter->txtag, txbuf->map,
+				BUS_DMASYNC_PREWRITE);
 
-			bus_dmamap_sync(adapter->txtag, txbuf->map,
-			    BUS_DMASYNC_PREWRITE);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
+			// XXX might try an early kick
 		}
-		kring->nr_hwcur = k; /* the saved ring->cur */
-		kring->nr_hwavail -= n;
+		kring->nr_hwcur = head;
 
+		 /* synchronize the NIC ring */
 		bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
-		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
-		E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), l);
+#ifdef NIC_PARAVIRT
+		/* set unconditionally, then also kick if needed */
+		if (csb) {
+			t = rdtsc();
+			if (csb->host_need_txkick == 2) {
+				/* can compute an update of delta */
+				int64_t delta = t - csbd[3];
+				if (delta < 0)
+					delta = -delta;
+				if (csbd[8] == 0 || delta < csbd[8]) {
+					csbd[8] = delta;
+					csbd[9]++;
+				}
+				csbd[10]++;
+			}
+			csb->guest_tdt = nic_i;
+			csbd[18] += t - csbd[0]; // total wp
+			csbd[19] += n;
+		}
+		if (!csb || !csb->guest_csb_on || (csb->host_need_txkick & 1))
+			do_kick = 1;
+		if (do_kick)
+#endif /* NIC_PARAVIRT */
+		/* (re)start the tx unit up to slot nic_i (excluded) */
+		E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i);
+#ifdef NIC_PARAVIRT
+		if (do_kick) {
+			uint64_t t1 = rdtsc();
+			csbd[20] += t1 - t; // total Np
+			csbd[21]++;
+		}
+#endif /* NIC_PARAVIRT */
 	}
 
-	if (n == 0 || kring->nr_hwavail < 1) {
-		int delta;
+	/*
+	 * Second part: reclaim buffers for completed transmissions.
+	 */
+	if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
+		kring->last_reclaim = ticks;
+		/* record completed transmissions using TDH */
+#ifdef NIC_PARAVIRT
+		/* host updates tdh unconditionally, and we have
+		 * no side effects on reads, so we can read from there
+		 * instead of exiting.
+		 */
+		if (csb) {
+		    static int drain = 0, nodrain=0, good = 0, bad = 0, fail = 0;
+		    u_int x = adapter->next_tx_to_clean;
+		    csbd[19]++; // XXX count reclaims
+		    nic_i = csb->host_tdh;
+		    if (csb->guest_csb_on) {
+			if (nic_i == x) {
+			    bad++;
+		    	    csbd[24]++; // failed reclaims
+			    /* no progress, request kick and retry */
+			    csb->guest_need_txkick = 1;
+			    mb(); // XXX barrier
+		    	    nic_i = csb->host_tdh;
+			} else {
+			    good++;
+			}
+			if (nic_i != x) {
+			    csb->guest_need_txkick = 2;
+			    if (nic_i == csb->guest_tdt)
+				drain++;
+			    else
+				nodrain++;
+#if 1
+			if (netmap_adaptive_io) {
+			    /* new mechanism: last half ring (or so)
+			     * released one slot at a time.
+			     * This effectively makes the system spin.
+			     *
+			     * Take next_to_clean + 1 as a reference.
+			     * tdh must be ahead or equal
+			     * On entry, the logical order is
+			     *		x < tdh = nic_i
+			     * We first push tdh up to avoid wraps.
+			     * The limit is tdh-ll (half ring).
+			     * if tdh-256 < x we report x;
+			     * else we report tdh-256
+			     */
+			    u_int tdh = nic_i;
+			    u_int ll = csbd[15];
+			    u_int delta = lim/8;
+			    if (netmap_adaptive_io == 2 || ll > delta)
+				csbd[15] = ll = delta;
+			    else if (netmap_adaptive_io == 1 && ll > 1) {
+				csbd[15]--;
+			    }
 
-		/* record completed transmissions using TDH */
-		l = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
-		if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
-			D("bad TDH %d", l);
-			l -= kring->nkr_num_slots;
+			    if (nic_i >= kring->nkr_num_slots) {
+				RD(5, "bad nic_i %d on input", nic_i);
+			    }
+			    x = nm_next(x, lim);
+			    if (tdh < x)
+				tdh += lim + 1;
+			    if (tdh <= x + ll) {
+				nic_i = x;
+				csbd[25]++; //report n + 1;
+			    } else {
+				tdh = nic_i;
+				if (tdh < ll)
+				    tdh += lim + 1;
+				nic_i = tdh - ll;
+				csbd[26]++; // report tdh - ll
+			    }
+			}
+#endif
+			} else {
+			    /* we stop, count whether we are idle or not */
+			    int bh_active = csb->host_need_txkick & 2 ? 4 : 0;
+			    csbd[27+ csb->host_need_txkick]++;
+			    if (netmap_adaptive_io == 1) {
+				if (bh_active && csbd[15] > 1)
+				    csbd[15]--;
+				else if (!bh_active && csbd[15] < lim/2)
+				    csbd[15]++;
+			    }
+			    bad--;
+			    fail++;
+			}
+		    }
+		    RD(1, "drain %d nodrain %d good %d retry %d fail %d",
+			drain, nodrain, good, bad, fail);
+		} else
+#endif /* !NIC_PARAVIRT */
+		nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
+		if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
+			D("TDH wrap %d", nic_i);
+			nic_i -= kring->nkr_num_slots;
 		}
-		delta = l - adapter->next_tx_to_clean;
-		if (delta) {
-			/* some tx completed, increment hwavail. */
-			if (delta < 0)
-				delta += kring->nkr_num_slots;
-			adapter->next_tx_to_clean = l;
-			kring->nr_hwavail += delta;
-		}
+		adapter->next_tx_to_clean = nic_i;
+		kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
 	}
-	/* update avail to what the kernel knows */
-	ring->avail = kring->nr_hwavail;
 
-	if (do_lock)
-		EM_TX_UNLOCK(adapter);
+	nm_txsync_finalize(kring);
+
 	return 0;
 }
 
@@ -226,116 +313,167 @@
  * Reconcile kernel and user view of the receive ring.
  */
 static int
-lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+lem_netmap_rxsync(struct netmap_kring *kring, int flags)
 {
-	struct adapter *adapter = ifp->if_softc;
-	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->rx_rings[ring_nr];
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
-	int j, l, n, lim = kring->nkr_num_slots - 1;
-	int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
-	u_int k = ring->cur, resvd = ring->reserved;
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = nm_rxsync_prologue(kring);
+	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
 
-	if (k > lim)
+	/* device-specific */
+	struct adapter *adapter = ifp->if_softc;
+#ifdef NIC_PARAVIRT
+	struct paravirt_csb *csb = adapter->csb;
+	uint32_t csb_mode = csb && csb->guest_csb_on;
+	uint32_t do_host_rxkick = 0;
+#endif /* NIC_PARAVIRT */
+
+	if (head > lim)
 		return netmap_ring_reinit(kring);
 
-	if (do_lock)
-		EM_RX_LOCK(adapter);
-
+#ifdef NIC_PARAVIRT
+	if (csb_mode) {
+		force_update = 1;
+		csb->guest_need_rxkick = 0;
+	}
+#endif /* NIC_PARAVIRT */
 	/* XXX check sync modes */
 	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
 			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/*
-	 * Import newly received packets into the netmap ring.
-	 * j is an index in the netmap ring, l in the NIC ring.
+	 * First part: import newly received packets.
 	 */
-	l = adapter->next_rx_desc_to_check;
-	j = netmap_idx_n2k(kring, l);
 	if (netmap_no_pendintr || force_update) {
 		uint16_t slot_flags = kring->nkr_slot_flags;
 
+		nic_i = adapter->next_rx_desc_to_check;
+		nm_i = netmap_idx_n2k(kring, nic_i);
+
 		for (n = 0; ; n++) {
-			struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
+			struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
 			uint32_t staterr = le32toh(curr->status);
 			int len;
 
+#ifdef NIC_PARAVIRT
+			if (csb_mode) {
+			    if ((staterr & E1000_RXD_STAT_DD) == 0) {
+				/* don't bother to retry if more than 1 pkt */
+				if (n > 1)
+				    break;
+				csb->guest_need_rxkick = 1;
+				wmb();
+				staterr = le32toh(curr->status);
+				if ((staterr & E1000_RXD_STAT_DD) == 0) {
+				    break;
+				} else { /* we are good */
+				   csb->guest_need_rxkick = 0;
+				}
+			    }
+			} else
+#endif /* NIC_PARAVIRT */
 			if ((staterr & E1000_RXD_STAT_DD) == 0)
 				break;
 			len = le16toh(curr->length) - 4; // CRC
 			if (len < 0) {
-				D("bogus pkt size at %d", j);
+				RD(5, "bogus pkt (%d) size %d nic idx %d", n, len, nic_i);
 				len = 0;
 			}
-			ring->slot[j].len = len;
-			ring->slot[j].flags = slot_flags;
+			ring->slot[nm_i].len = len;
+			ring->slot[nm_i].flags = slot_flags;
 			bus_dmamap_sync(adapter->rxtag,
-				adapter->rx_buffer_area[l].map,
-				    BUS_DMASYNC_POSTREAD);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+				adapter->rx_buffer_area[nic_i].map,
+				BUS_DMASYNC_POSTREAD);
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
 		if (n) { /* update the state variables */
-			adapter->next_rx_desc_to_check = l;
-			kring->nr_hwavail += n;
+#ifdef NIC_PARAVIRT
+			if (csb_mode) {
+			    if (n > 1) {
+				/* leave one spare buffer so we avoid rxkicks */
+				nm_i = nm_prev(nm_i, lim);
+				nic_i = nm_prev(nic_i, lim);
+				n--;
+			    } else {
+				csb->guest_need_rxkick = 1;
+			    }
+			}
+#endif /* NIC_PARAVIRT */
+			ND("%d new packets at nic %d nm %d tail %d",
+				n,
+				adapter->next_rx_desc_to_check,
+				netmap_idx_n2k(kring, adapter->next_rx_desc_to_check),
+				kring->nr_hwtail);
+			adapter->next_rx_desc_to_check = nic_i;
+			// if_inc_counter(ifp, IFCOUNTER_IPACKETS, n);
+			kring->nr_hwtail = nm_i;
 		}
 		kring->nr_kflags &= ~NKR_PENDINTR;
 	}
 
-	/* skip past packets that userspace has released */
-	j = kring->nr_hwcur;	/* netmap ring index */
-	if (resvd > 0) {
-		if (resvd + ring->avail >= lim + 1) {
-			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
-			ring->reserved = resvd = 0; // XXX panic...
-		}
-		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
-	}
-	if (j != k) { /* userspace has released some packets. */
-		l = netmap_idx_k2n(kring, j); /* NIC ring index */
-		for (n = 0; j != k; n++) {
-			struct netmap_slot *slot = &ring->slot[j];
-			struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
-			struct em_buffer *rxbuf = &adapter->rx_buffer_area[l];
+	/*
+	 * Second part: skip past packets that userspace has released.
+	 */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
 			uint64_t paddr;
-			void *addr = PNMB(slot, &paddr);
+			void *addr = PNMB(na, slot, &paddr);
 
-			if (addr == netmap_buffer_base) { /* bad buf */
-				if (do_lock)
-					EM_RX_UNLOCK(adapter);
-				return netmap_ring_reinit(kring);
-			}
+			struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
+			struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i];
 
+			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
+				goto ring_reset;
+
 			if (slot->flags & NS_BUF_CHANGED) {
 				/* buffer has changed, reload map */
-				netmap_reload_map(adapter->rxtag, rxbuf->map, addr);
 				curr->buffer_addr = htole64(paddr);
+				netmap_reload_map(na, adapter->rxtag, rxbuf->map, addr);
 				slot->flags &= ~NS_BUF_CHANGED;
 			}
 			curr->status = 0;
-
 			bus_dmamap_sync(adapter->rxtag, rxbuf->map,
 			    BUS_DMASYNC_PREREAD);
-
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+#ifdef NIC_PARAVIRT
+			if (csb_mode && csb->host_rxkick_at == nic_i)
+				do_host_rxkick = 1;
+#endif /* NIC_PARAVIRT */
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		kring->nr_hwavail -= n;
-		kring->nr_hwcur = k;
+		kring->nr_hwcur = head;
 		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		/*
 		 * IMPORTANT: we must leave one free slot in the ring,
-		 * so move l back by one unit
+		 * so move nic_i back by one unit
 		 */
-		l = (l == 0) ? lim : l - 1;
-		E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), l);
+		nic_i = nm_prev(nic_i, lim);
+#ifdef NIC_PARAVIRT
+		/* set unconditionally, then also kick if needed */
+		if (csb)
+			csb->guest_rdt = nic_i;
+		if (!csb_mode || do_host_rxkick)
+#endif /* NIC_PARAVIRT */
+		E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
 	}
-	/* tell userspace that there are new packets */
-	ring->avail = kring->nr_hwavail - resvd;
-	if (do_lock)
-		EM_RX_UNLOCK(adapter);
+
+	/* tell userspace that there might be new packets */
+	nm_rxsync_finalize(kring);
+
 	return 0;
+
+ring_reset:
+	return netmap_ring_reinit(kring);
 }
 
 
@@ -347,14 +485,14 @@
 	bzero(&na, sizeof(na));
 
 	na.ifp = adapter->ifp;
-	na.separate_locks = 1;
+	na.na_flags = NAF_BDG_MAYSLEEP;
 	na.num_tx_desc = adapter->num_tx_desc;
 	na.num_rx_desc = adapter->num_rx_desc;
 	na.nm_txsync = lem_netmap_txsync;
 	na.nm_rxsync = lem_netmap_rxsync;
-	na.nm_lock = lem_netmap_lock_wrapper;
 	na.nm_register = lem_netmap_reg;
-	netmap_attach(&na, 1);
+	na.num_tx_rings = na.num_rx_rings = 1;
+	netmap_attach(&na);
 }
 
 /* end of file */

Modified: trunk/sys/dev/netmap/if_re_netmap.h
===================================================================
--- trunk/sys/dev/netmap/if_re_netmap.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/netmap/if_re_netmap.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * Copyright (C) 2011 Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -24,11 +25,11 @@
  */
 
 /*
- * $MidnightBSD$
- * $Id: if_re_netmap.h,v 1.2 2013-01-08 03:53:24 laffer1 Exp $
+ * $FreeBSD: stable/10/sys/dev/netmap/if_re_netmap.h 278779 2015-02-14 19:41:26Z luigi $
  *
- * netmap support for "re"
- * For details on netmap support please see ixgbe_netmap.h
+ * netmap support for: re
+ *
+ * For more details on netmap support please see ixgbe_netmap.h
  */
 
 
@@ -40,71 +41,24 @@
 
 
 /*
- * wrapper to export locks to the generic code
- * We should not use the tx/rx locks
+ * Register/unregister. We are already under netmap lock.
  */
-static void
-re_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
-{
-	struct rl_softc *adapter = ifp->if_softc;
-
-	switch (what) {
-	case NETMAP_CORE_LOCK:
-		RL_LOCK(adapter);
-		break;
-	case NETMAP_CORE_UNLOCK:
-		RL_UNLOCK(adapter);
-		break;
-
-	case NETMAP_TX_LOCK:
-	case NETMAP_RX_LOCK:
-	case NETMAP_TX_UNLOCK:
-	case NETMAP_RX_UNLOCK:
-		D("invalid lock call %d, no tx/rx locks here", what);
-		break;
-	}
-}
-
-
-/*
- * support for netmap register/unregisted. We are already under core lock.
- * only called on the first register or the last unregister.
- */
 static int
-re_netmap_reg(struct ifnet *ifp, int onoff)
+re_netmap_reg(struct netmap_adapter *na, int onoff)
 {
+	struct ifnet *ifp = na->ifp;
 	struct rl_softc *adapter = ifp->if_softc;
-	struct netmap_adapter *na = NA(ifp);
-	int error = 0;
 
-	if (na == NULL)
-		return EINVAL;
-	/* Tell the stack that the interface is no longer active */
-	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
-
-	re_stop(adapter);
-
+	RL_LOCK(adapter);
+	re_stop(adapter); /* also clears IFF_DRV_RUNNING */
 	if (onoff) {
-		ifp->if_capenable |= IFCAP_NETMAP;
-
-		/* save if_transmit to restore it later */
-		na->if_transmit = ifp->if_transmit;
-		ifp->if_transmit = netmap_start;
-
-		re_init_locked(adapter);
-
-		if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
-			error = ENOMEM;
-			goto fail;
-		}
+		nm_set_native_flags(na);
 	} else {
-fail:
-		/* restore if_transmit */
-		ifp->if_transmit = na->if_transmit;
-		ifp->if_capenable &= ~IFCAP_NETMAP;
-		re_init_locked(adapter);	/* also enables intr */
+		nm_clear_native_flags(na);
 	}
-	return (error);
+	re_init_locked(adapter);	/* also enables intr */
+	RL_UNLOCK(adapter);
+	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
 }
 
 
@@ -112,97 +66,102 @@
  * Reconcile kernel and user view of the transmit ring.
  */
 static int
-re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+re_netmap_txsync(struct netmap_kring *kring, int flags)
 {
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
+	struct netmap_ring *ring = kring->ring;
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
+
+	/* device-specific */
 	struct rl_softc *sc = ifp->if_softc;
 	struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
-	struct netmap_adapter *na = NA(sc->rl_ifp);
-	struct netmap_kring *kring = &na->tx_rings[ring_nr];
-	struct netmap_ring *ring = kring->ring;
-	int j, k, l, n, lim = kring->nkr_num_slots - 1;
 
-	k = ring->cur;
-	if (k > lim)
-		return netmap_ring_reinit(kring);
-
-	if (do_lock)
-		RL_LOCK(sc);
-
-	/* Sync the TX descriptor list */
 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
-            sc->rl_ldata.rl_tx_list_map,
-            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+	    sc->rl_ldata.rl_tx_list_map,
+	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); // XXX extra postwrite ?
 
-	/* XXX move after the transmissions */
-	/* record completed transmissions */
-        for (n = 0, l = sc->rl_ldata.rl_tx_considx;
-	    l != sc->rl_ldata.rl_tx_prodidx;
-	    n++, l = RL_TX_DESC_NXT(sc, l)) {
-		uint32_t cmdstat =
-			le32toh(sc->rl_ldata.rl_tx_list[l].rl_cmdstat);
-		if (cmdstat & RL_TDESC_STAT_OWN)
-			break;
-	}
-	if (n > 0) {
-		sc->rl_ldata.rl_tx_considx = l;
-		sc->rl_ldata.rl_tx_free += n;
-		kring->nr_hwavail += n;
-	}
+	/*
+	 * First part: process new packets to send.
+	 */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {	/* we have new packets to send */
+		nic_i = sc->rl_ldata.rl_tx_prodidx;
+		// XXX or netmap_idx_k2n(kring, nm_i);
 
-	/* update avail to what the kernel knows */
-	ring->avail = kring->nr_hwavail;
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			u_int len = slot->len;
+			uint64_t paddr;
+			void *addr = PNMB(na, slot, &paddr);
 
-	j = kring->nr_hwcur;
-	if (j != k) {	/* we have new packets to send */
-		l = sc->rl_ldata.rl_tx_prodidx;
-		for (n = 0; j != k; n++) {
-			struct netmap_slot *slot = &ring->slot[j];
-			struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[l];
+			/* device-specific */
+			struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[nic_i];
 			int cmd = slot->len | RL_TDESC_CMD_EOF |
 				RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
-			uint64_t paddr;
-			void *addr = PNMB(slot, &paddr);
-			int len = slot->len;
 
-			if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
-				if (do_lock)
-					RL_UNLOCK(sc);
-				// XXX what about prodidx ?
-				return netmap_ring_reinit(kring);
-			}
+			NM_CHECK_ADDR_LEN(na, addr, len);
 
-			if (l == lim)	/* mark end of ring */
+			if (nic_i == lim)	/* mark end of ring */
 				cmd |= RL_TDESC_CMD_EOR;
 
 			if (slot->flags & NS_BUF_CHANGED) {
+				/* buffer has changed, reload map */
 				desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
 				desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
-				/* buffer has changed, unload and reload map */
-				netmap_reload_map(sc->rl_ldata.rl_tx_mtag,
-					txd[l].tx_dmamap, addr);
-				slot->flags &= ~NS_BUF_CHANGED;
+				netmap_reload_map(na, sc->rl_ldata.rl_tx_mtag,
+					txd[nic_i].tx_dmamap, addr);
 			}
-			slot->flags &= ~NS_REPORT;
+			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+			/* Fill the slot in the NIC ring. */
 			desc->rl_cmdstat = htole32(cmd);
+
+			/* make sure changes to the buffer are synced */
 			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
-				txd[l].tx_dmamap, BUS_DMASYNC_PREWRITE);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+				txd[nic_i].tx_dmamap,
+				BUS_DMASYNC_PREWRITE);
+
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		sc->rl_ldata.rl_tx_prodidx = l;
-		kring->nr_hwcur = k; /* the saved ring->cur */
-		ring->avail -= n; // XXX see others
-		kring->nr_hwavail = ring->avail;
+		sc->rl_ldata.rl_tx_prodidx = nic_i;
+		kring->nr_hwcur = head;
 
+		/* synchronize the NIC ring */
 		bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
-		    sc->rl_ldata.rl_tx_list_map,
-		    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
+			sc->rl_ldata.rl_tx_list_map,
+			BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 
 		/* start ? */
 		CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
 	}
-	if (do_lock)
-		RL_UNLOCK(sc);
+
+	/*
+	 * Second part: reclaim buffers for completed transmissions.
+	 */
+	if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
+		nic_i = sc->rl_ldata.rl_tx_considx;
+		for (n = 0; nic_i != sc->rl_ldata.rl_tx_prodidx;
+		    n++, nic_i = RL_TX_DESC_NXT(sc, nic_i)) {
+			uint32_t cmdstat =
+				le32toh(sc->rl_ldata.rl_tx_list[nic_i].rl_cmdstat);
+			if (cmdstat & RL_TDESC_STAT_OWN)
+				break;
+		}
+		if (n > 0) {
+			sc->rl_ldata.rl_tx_considx = nic_i;
+			sc->rl_ldata.rl_tx_free += n;
+			kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
+		}
+	}
+
+	nm_txsync_finalize(kring);
+
 	return 0;
 }
 
@@ -211,44 +170,46 @@
  * Reconcile kernel and user view of the receive ring.
  */
 static int
-re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+re_netmap_rxsync(struct netmap_kring *kring, int flags)
 {
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
+	struct netmap_ring *ring = kring->ring;
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = nm_rxsync_prologue(kring);
+	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
+
+	/* device-specific */
 	struct rl_softc *sc = ifp->if_softc;
 	struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
-	struct netmap_adapter *na = NA(sc->rl_ifp);
-	struct netmap_kring *kring = &na->rx_rings[ring_nr];
-	struct netmap_ring *ring = kring->ring;
-	int j, l, n, lim = kring->nkr_num_slots - 1;
-	int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
-	u_int k = ring->cur, resvd = ring->reserved;
 
-	k = ring->cur;
-	if (k > lim)
+	if (head > lim)
 		return netmap_ring_reinit(kring);
 
-	if (do_lock)
-		RL_LOCK(sc);
-	/* XXX check sync modes */
 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
-	    sc->rl_ldata.rl_rx_list_map,
-	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+			sc->rl_ldata.rl_rx_list_map,
+			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/*
-	 * Import newly received packets into the netmap ring.
-	 * j is an index in the netmap ring, l in the NIC ring.
+	 * First part: import newly received packets.
 	 *
-	 * The device uses all the buffers in the ring, so we need
+	 * This device uses all the buffers in the ring, so we need
 	 * another termination condition in addition to RL_RDESC_STAT_OWN
-	 * cleared (all buffers could have it cleared. The easiest one
-	 * is to limit the amount of data reported up to 'lim'
+	 * cleared (all buffers could have it cleared). The easiest one
+	 * is to stop right before nm_hwcur.
 	 */
-	l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
-	j = netmap_idx_n2k(kring, l); /* the kring index */
 	if (netmap_no_pendintr || force_update) {
 		uint16_t slot_flags = kring->nkr_slot_flags;
+		uint32_t stop_i = nm_prev(kring->nr_hwcur, lim);
 
-		for (n = kring->nr_hwavail; n < lim ; n++) {
-			struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l];
+		nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
+		nm_i = netmap_idx_n2k(kring, nic_i);
+
+		while (nm_i != stop_i) {
+			struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[nic_i];
 			uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
 			uint32_t total_len;
 
@@ -257,78 +218,72 @@
 			total_len = rxstat & sc->rl_rxlenmask;
 			/* XXX subtract crc */
 			total_len = (total_len < 4) ? 0 : total_len - 4;
-			kring->ring->slot[j].len = total_len;
-			kring->ring->slot[j].flags = slot_flags;
+			ring->slot[nm_i].len = total_len;
+			ring->slot[nm_i].flags = slot_flags;
 			/*  sync was in re_newbuf() */
 			bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
-			    rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+			    rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD);
+			// if_inc_counter(sc->rl_ifp, IFCOUNTER_IPACKETS, 1);
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		if (n != kring->nr_hwavail) {
-			sc->rl_ldata.rl_rx_prodidx = l;
-			sc->rl_ifp->if_ipackets += n - kring->nr_hwavail;
-			kring->nr_hwavail = n;
-		}
+		sc->rl_ldata.rl_rx_prodidx = nic_i;
+		kring->nr_hwtail = nm_i;
 		kring->nr_kflags &= ~NKR_PENDINTR;
 	}
 
-	/* skip past packets that userspace has released */
-	j = kring->nr_hwcur;
-	if (resvd > 0) {
-		if (resvd + ring->avail >= lim + 1) {
-			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
-			ring->reserved = resvd = 0; // XXX panic...
-		}
-		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
-	}
-	if (j != k) { /* userspace has released some packets. */
-		l = netmap_idx_k2n(kring, j); /* the NIC index */
-		for (n = 0; j != k; n++) {
-			struct netmap_slot *slot = ring->slot + j;
-			struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[l];
-			int cmd = NETMAP_BUF_SIZE | RL_RDESC_CMD_OWN;
+	/*
+	 * Second part: skip past packets that userspace has released.
+	 */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
 			uint64_t paddr;
-			void *addr = PNMB(slot, &paddr);
+			void *addr = PNMB(na, slot, &paddr);
 
-			if (addr == netmap_buffer_base) { /* bad buf */
-				if (do_lock)
-					RL_UNLOCK(sc);
-				return netmap_ring_reinit(kring);
-			}
+			struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[nic_i];
+			int cmd = NETMAP_BUF_SIZE(na) | RL_RDESC_CMD_OWN;
 
-			if (l == lim)	/* mark end of ring */
+			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
+				goto ring_reset;
+
+			if (nic_i == lim)	/* mark end of ring */
 				cmd |= RL_RDESC_CMD_EOR;
 
-			slot->flags &= ~NS_REPORT;
 			if (slot->flags & NS_BUF_CHANGED) {
-				netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
-					rxd[l].rx_dmamap, addr);
+				/* buffer has changed, reload map */
 				desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
 				desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
+				netmap_reload_map(na, sc->rl_ldata.rl_rx_mtag,
+					rxd[nic_i].rx_dmamap, addr);
 				slot->flags &= ~NS_BUF_CHANGED;
 			}
 			desc->rl_cmdstat = htole32(cmd);
 			bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
-				rxd[l].rx_dmamap, BUS_DMASYNC_PREREAD);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+			    rxd[nic_i].rx_dmamap,
+			    BUS_DMASYNC_PREREAD);
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		kring->nr_hwavail -= n;
-		kring->nr_hwcur = k;
-		/* Flush the RX DMA ring */
+		kring->nr_hwcur = head;
 
 		bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
 		    sc->rl_ldata.rl_rx_list_map,
-		    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	}
-	/* tell userspace that there are new packets */
-	ring->avail = kring->nr_hwavail - resvd;
-	if (do_lock)
-		RL_UNLOCK(sc);
+
+	/* tell userspace that there might be new packets */
+	nm_rxsync_finalize(kring);
+
 	return 0;
+
+ring_reset:
+	return netmap_ring_reinit(kring);
 }
 
+
 /*
  * Additional routines to init the tx and rx rings.
  * In other drivers we do that inline in the main code.
@@ -340,9 +295,10 @@
 	struct rl_desc *desc;
 	int i, n;
 	struct netmap_adapter *na = NA(sc->rl_ifp);
-	struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0);
+	struct netmap_slot *slot;
 
-	/* slot is NULL if we are not in netmap mode */
+	slot = netmap_reset(na, NR_TX, 0, 0);
+	/* slot is NULL if we are not in native netmap mode */
 	if (!slot)
 		return;
 	/* in netmap mode, overwrite addresses and maps */
@@ -354,11 +310,11 @@
 	for (i = 0; i < n; i++) {
 		uint64_t paddr;
 		int l = netmap_idx_n2k(&na->tx_rings[0], i);
-		void *addr = PNMB(slot + l, &paddr);
+		void *addr = PNMB(na, slot + l, &paddr);
 
 		desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
 		desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
-		netmap_load_map(sc->rl_ldata.rl_tx_mtag,
+		netmap_load_map(na, sc->rl_ldata.rl_tx_mtag,
 			txd[i].tx_dmamap, addr);
 	}
 }
@@ -370,36 +326,35 @@
 	struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
 	struct rl_desc *desc = sc->rl_ldata.rl_rx_list;
 	uint32_t cmdstat;
-	int i, n, max_avail;
+	uint32_t nic_i, max_avail;
+	uint32_t const n = sc->rl_ldata.rl_rx_desc_cnt;
 
 	if (!slot)
 		return;
-	n = sc->rl_ldata.rl_rx_desc_cnt;
 	/*
-	 * Userspace owned hwavail packets before the reset,
-	 * so the NIC that last hwavail descriptors of the ring
-	 * are still owned by the driver (and keep one empty).
+	 * Do not release the slots owned by userspace,
+	 * and also keep one empty.
 	 */
-	max_avail = n - 1 - na->rx_rings[0].nr_hwavail;
-	for (i = 0; i < n; i++) {
+	max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]);
+	for (nic_i = 0; nic_i < n; nic_i++) {
 		void *addr;
 		uint64_t paddr;
-		int l = netmap_idx_n2k(&na->rx_rings[0], i);
+		uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i);
 
-		addr = PNMB(slot + l, &paddr);
+		addr = PNMB(na, slot + nm_i, &paddr);
 
-		netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
-		    sc->rl_ldata.rl_rx_desc[i].rx_dmamap, addr);
+		netmap_reload_map(na, sc->rl_ldata.rl_rx_mtag,
+		    sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, addr);
 		bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
-		    sc->rl_ldata.rl_rx_desc[i].rx_dmamap, BUS_DMASYNC_PREREAD);
-		desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
-		desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
-		cmdstat = NETMAP_BUF_SIZE;
-		if (i == n - 1) /* mark the end of ring */
+		    sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, BUS_DMASYNC_PREREAD);
+		desc[nic_i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
+		desc[nic_i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
+		cmdstat = NETMAP_BUF_SIZE(na);
+		if (nic_i == n - 1) /* mark the end of ring */
 			cmdstat |= RL_RDESC_CMD_EOR;
-		if (i < max_avail)
+		if (nic_i < max_avail)
 			cmdstat |= RL_RDESC_CMD_OWN;
-		desc[i].rl_cmdstat = htole32(cmdstat);
+		desc[nic_i].rl_cmdstat = htole32(cmdstat);
 	}
 }
 
@@ -412,13 +367,14 @@
 	bzero(&na, sizeof(na));
 
 	na.ifp = sc->rl_ifp;
-	na.separate_locks = 0;
+	na.na_flags = NAF_BDG_MAYSLEEP;
 	na.num_tx_desc = sc->rl_ldata.rl_tx_desc_cnt;
 	na.num_rx_desc = sc->rl_ldata.rl_rx_desc_cnt;
 	na.nm_txsync = re_netmap_txsync;
 	na.nm_rxsync = re_netmap_rxsync;
-	na.nm_lock = re_netmap_lock_wrapper;
 	na.nm_register = re_netmap_reg;
-	netmap_attach(&na, 1);
+	na.num_tx_rings = na.num_rx_rings = 1;
+	netmap_attach(&na);
 }
+
 /* end of file */

Added: trunk/sys/dev/netmap/if_vtnet_netmap.h
===================================================================
--- trunk/sys/dev/netmap/if_vtnet_netmap.h	                        (rev 0)
+++ trunk/sys/dev/netmap/if_vtnet_netmap.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,435 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2014 Vincenzo Maffione, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: stable/10/sys/dev/netmap/if_vtnet_netmap.h 270252 2014-08-20 23:34:36Z luigi $
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>    /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+
+#define SOFTC_T	vtnet_softc
+
+/* Free all the unused buffer in all the RX virtqueues.
+ * This function is called when entering and exiting netmap mode.
+ * - buffers queued by the virtio driver return skbuf/mbuf pointer
+ *   and need to be freed;
+ * - buffers queued by netmap return the txq/rxq, and do not need work
+ */
+static void
+vtnet_netmap_free_bufs(struct SOFTC_T* sc)
+{
+	int i, nmb = 0, n = 0, last;
+
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i];
+		struct virtqueue *vq;
+		struct mbuf *m;
+		struct vtnet_txq *txq = &sc->vtnet_txqs[i];
+                struct vtnet_tx_header *txhdr;
+
+		last = 0;
+		vq = rxq->vtnrx_vq;
+		while ((m = virtqueue_drain(vq, &last)) != NULL) {
+			n++;
+			if (m != (void *)rxq)
+				m_freem(m);
+			else
+				nmb++;
+		}
+
+		last = 0;
+		vq = txq->vtntx_vq;
+		while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
+			n++;
+			if (txhdr != (void *)txq) {
+				m_freem(txhdr->vth_mbuf);
+				uma_zfree(vtnet_tx_header_zone, txhdr);
+			} else
+				nmb++;
+		}
+	}
+	D("freed %d mbufs, %d netmap bufs on %d queues",
+		n - nmb, nmb, i);
+}
+
+/* Register and unregister. */
+static int
+vtnet_netmap_reg(struct netmap_adapter *na, int onoff)
+{
+        struct ifnet *ifp = na->ifp;
+	struct SOFTC_T *sc = ifp->if_softc;
+
+	VTNET_CORE_LOCK(sc);
+	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+	/* enable or disable flags and callbacks in na and ifp */
+	if (onoff) {
+		nm_set_native_flags(na);
+	} else {
+		nm_clear_native_flags(na);
+	}
+	/* drain queues so netmap and native drivers
+	 * do not interfere with each other
+	 */
+	vtnet_netmap_free_bufs(sc);
+        vtnet_init_locked(sc);       /* also enable intr */
+        VTNET_CORE_UNLOCK(sc);
+        return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
+}
+
+
+/* Reconcile kernel and user view of the transmit ring. */
+static int
+vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+        struct ifnet *ifp = na->ifp;
+	struct netmap_ring *ring = kring->ring;
+	u_int ring_nr = kring->ring_id;
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
+
+	/* device-specific */
+	struct SOFTC_T *sc = ifp->if_softc;
+	struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr];
+	struct virtqueue *vq = txq->vtntx_vq;
+
+	/*
+	 * First part: process new packets to send.
+	 */
+	rmb();
+	
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {	/* we have new packets to send */
+		struct sglist *sg = txq->vtntx_sg;
+
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			/* we use an empty header here */
+			static struct virtio_net_hdr_mrg_rxbuf hdr;
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			u_int len = slot->len;
+			uint64_t paddr;
+			void *addr = PNMB(na, slot, &paddr);
+                        int err;
+
+			NM_CHECK_ADDR_LEN(na, addr, len);
+
+			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+			/* Initialize the scatterlist, expose it to the hypervisor,
+			 * and kick the hypervisor (if necessary).
+			 */
+			sglist_reset(sg); // cheap
+			// if vtnet_hdr_size > 0 ...
+			err = sglist_append(sg, &hdr, sc->vtnet_hdr_size);
+			// XXX later, support multi segment
+			err = sglist_append_phys(sg, paddr, len);
+			/* use na as the cookie */
+                        err = virtqueue_enqueue(vq, txq, sg, sg->sg_nseg, 0);
+                        if (unlikely(err < 0)) {
+                                D("virtqueue_enqueue failed");
+                                break;
+                        }
+
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
+		}
+		/* Update hwcur depending on where we stopped. */
+		kring->nr_hwcur = nm_i; /* note we migth break early */
+
+		/* No more free TX slots? Ask the hypervisor for notifications,
+		 * possibly only when a considerable amount of work has been
+		 * done.
+		 */
+		ND(3,"sent %d packets, hwcur %d", n, nm_i);
+		virtqueue_disable_intr(vq);
+		virtqueue_notify(vq);
+	} else {
+		if (ring->head != ring->tail)
+		    ND(5, "pure notify ? head %d tail %d nused %d %d",
+			ring->head, ring->tail, virtqueue_nused(vq),
+			(virtqueue_dump(vq), 1));
+		virtqueue_notify(vq);
+		virtqueue_enable_intr(vq); // like postpone with 0
+	}
+
+	
+        /* Free used slots. We only consider our own used buffers, recognized
+	 * by the token we passed to virtqueue_add_outbuf.
+	 */
+        n = 0;
+        for (;;) {
+                struct vtnet_tx_header *txhdr = virtqueue_dequeue(vq, NULL);
+                if (txhdr == NULL)
+                        break;
+                if (likely(txhdr == (void *)txq)) {
+                        n++;
+			if (virtqueue_nused(vq) < 32) { // XXX slow release
+				break;
+			}
+		} else { /* leftover from previous transmission */
+			m_freem(txhdr->vth_mbuf);
+			uma_zfree(vtnet_tx_header_zone, txhdr);
+		}
+        }
+	if (n) {
+		kring->nr_hwtail += n;
+		if (kring->nr_hwtail > lim)
+			kring->nr_hwtail -= lim + 1;
+	}
+	if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 0*/) {
+		ND(3, "disable intr, hwcur %d", nm_i);
+		virtqueue_disable_intr(vq);
+	} else {
+		ND(3, "enable intr, hwcur %d", nm_i);
+		virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT);
+	}
+
+//out:
+	nm_txsync_finalize(kring);
+
+        return 0;
+}
+
+static int
+vtnet_refill_rxq(struct netmap_kring *kring, u_int nm_i, u_int head)
+{
+	struct netmap_adapter *na = kring->na;
+        struct ifnet *ifp = na->ifp;
+	struct netmap_ring *ring = kring->ring;
+	u_int ring_nr = kring->ring_id;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int n;
+
+	/* device-specific */
+	struct SOFTC_T *sc = ifp->if_softc;
+	struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
+	struct virtqueue *vq = rxq->vtnrx_vq;
+
+	/* use a local sglist, default might be short */
+	struct sglist_seg ss[2];
+	struct sglist sg = { ss, 0, 0, 2 };
+
+	for (n = 0; nm_i != head; n++) {
+		static struct virtio_net_hdr_mrg_rxbuf hdr;
+		struct netmap_slot *slot = &ring->slot[nm_i];
+		uint64_t paddr;
+		void *addr = PNMB(na, slot, &paddr);
+		int err = 0;
+
+		if (addr == NETMAP_BUF_BASE(na)) { /* bad buf */
+			if (netmap_ring_reinit(kring))
+				return -1;
+		}
+
+		slot->flags &= ~NS_BUF_CHANGED;
+		sglist_reset(&sg); // cheap
+		err = sglist_append(&sg, &hdr, sc->vtnet_hdr_size);
+		err = sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na));
+		/* writable for the host */
+		err = virtqueue_enqueue(vq, rxq, &sg, 0, sg.sg_nseg);
+		if (err < 0) {
+			D("virtqueue_enqueue failed");
+			break;
+		}
+		nm_i = nm_next(nm_i, lim);
+	}
+	return nm_i;
+}
+
+/* Reconcile kernel and user view of the receive ring. */
+static int
+vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+        struct ifnet *ifp = na->ifp;
+	struct netmap_ring *ring = kring->ring;
+	u_int ring_nr = kring->ring_id;
+	u_int nm_i;	/* index into the netmap ring */
+	// u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = nm_rxsync_prologue(kring);
+	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
+
+	/* device-specific */
+	struct SOFTC_T *sc = ifp->if_softc;
+	struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
+	struct virtqueue *vq = rxq->vtnrx_vq;
+
+	/* XXX netif_carrier_ok ? */
+
+	if (head > lim)
+		return netmap_ring_reinit(kring);
+
+	rmb();
+	/*
+	 * First part: import newly received packets.
+	 * Only accept our
+	 * own buffers (matching the token). We should only get
+	 * matching buffers, because of vtnet_netmap_free_rx_unused_bufs()
+	 * and vtnet_netmap_init_buffers().
+	 */
+	if (netmap_no_pendintr || force_update) {
+		uint16_t slot_flags = kring->nkr_slot_flags;
+                struct netmap_adapter *token;
+
+                nm_i = kring->nr_hwtail;
+                n = 0;
+		for (;;) {
+			int len;
+                        token = virtqueue_dequeue(vq, &len);
+                        if (token == NULL)
+                                break;
+                        if (likely(token == (void *)rxq)) {
+                            ring->slot[nm_i].len = len;
+                            ring->slot[nm_i].flags = slot_flags;
+                            nm_i = nm_next(nm_i, lim);
+                            n++;
+                        } else {
+			    D("This should not happen");
+                        }
+		}
+		kring->nr_hwtail = nm_i;
+		kring->nr_kflags &= ~NKR_PENDINTR;
+	}
+        ND("[B] h %d c %d hwcur %d hwtail %d",
+		ring->head, ring->cur, kring->nr_hwcur,
+			      kring->nr_hwtail);
+
+	/*
+	 * Second part: skip past packets that userspace has released.
+	 */
+	nm_i = kring->nr_hwcur; /* netmap ring index */
+	if (nm_i != head) {
+		int err = vtnet_refill_rxq(kring, nm_i, head);
+		if (err < 0)
+			return 1;
+		kring->nr_hwcur = err;
+		virtqueue_notify(vq);
+		/* After draining the queue may need an intr from the hypervisor */
+        	vtnet_rxq_enable_intr(rxq);
+	}
+
+	/* tell userspace that there might be new packets. */
+	nm_rxsync_finalize(kring);
+
+        ND("[C] h %d c %d t %d hwcur %d hwtail %d",
+		ring->head, ring->cur, ring->tail,
+		kring->nr_hwcur, kring->nr_hwtail);
+
+	return 0;
+}
+
+
+/* Make RX virtqueues buffers pointing to netmap buffers. */
+static int
+vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc)
+{
+	struct ifnet *ifp = sc->vtnet_ifp;
+	struct netmap_adapter* na = NA(ifp);
+	unsigned int r;
+
+	if (!nm_native_on(na))
+		return 0;
+	for (r = 0; r < na->num_rx_rings; r++) {
+                struct netmap_kring *kring = &na->rx_rings[r];
+		struct vtnet_rxq *rxq = &sc->vtnet_rxqs[r];
+		struct virtqueue *vq = rxq->vtnrx_vq;
+	        struct netmap_slot* slot;
+		int err = 0;
+
+		slot = netmap_reset(na, NR_RX, r, 0);
+		if (!slot) {
+			D("strange, null netmap ring %d", r);
+			return 0;
+		}
+		/* Add up to na>-num_rx_desc-1 buffers to this RX virtqueue.
+		 * It's important to leave one virtqueue slot free, otherwise
+		 * we can run into ring->cur/ring->tail wraparounds.
+		 */
+		err = vtnet_refill_rxq(kring, 0, na->num_rx_desc-1);
+		if (err < 0)
+			return 0;
+		virtqueue_notify(vq);
+	}
+
+	return 1;
+}
+
+/* Update the virtio-net device configurations. Number of queues can
+ * change dinamically, by 'ethtool --set-channels $IFNAME combined $N'.
+ * This is actually the only way virtio-net can currently enable
+ * the multiqueue mode.
+ * XXX note that we seem to lose packets if the netmap ring has more
+ * slots than the queue
+ */
+static int
+vtnet_netmap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
+						u_int *rxr, u_int *rxd)
+{
+	struct ifnet *ifp = na->ifp;
+	struct SOFTC_T *sc = ifp->if_softc;
+
+	*txr = *rxr = sc->vtnet_max_vq_pairs;
+	*rxd = 512; // sc->vtnet_rx_nmbufs;
+	*txd = *rxd; // XXX
+        D("vtnet config txq=%d, txd=%d rxq=%d, rxd=%d",
+					*txr, *txd, *rxr, *rxd);
+
+	return 0;
+}
+
+static void
+vtnet_netmap_attach(struct SOFTC_T *sc)
+{
+	struct netmap_adapter na;
+
+	bzero(&na, sizeof(na));
+
+	na.ifp = sc->vtnet_ifp;
+	na.num_tx_desc =  1024;// sc->vtnet_rx_nmbufs;
+	na.num_rx_desc =  1024; // sc->vtnet_rx_nmbufs;
+	na.nm_register = vtnet_netmap_reg;
+	na.nm_txsync = vtnet_netmap_txsync;
+	na.nm_rxsync = vtnet_netmap_rxsync;
+	na.nm_config = vtnet_netmap_config;
+	na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
+	D("max rings %d", sc->vtnet_max_vq_pairs);
+	netmap_attach(&na);
+
+        D("virtio attached txq=%d, txd=%d rxq=%d, rxd=%d",
+			na.num_tx_rings, na.num_tx_desc,
+			na.num_tx_rings, na.num_rx_desc);
+}
+/* end of file */


Property changes on: trunk/sys/dev/netmap/if_vtnet_netmap.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/netmap/ixgbe_netmap.h
===================================================================
--- trunk/sys/dev/netmap/ixgbe_netmap.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/netmap/ixgbe_netmap.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -24,19 +25,18 @@
  */
 
 /*
- * $MidnightBSD$
- * $Id: ixgbe_netmap.h,v 1.2 2013-01-08 03:53:24 laffer1 Exp $
+ * $FreeBSD: stable/10/sys/dev/netmap/ixgbe_netmap.h 295008 2016-01-28 19:21:01Z sbruno $
  *
- * netmap modifications for ixgbe
+ * netmap support for: ixgbe (both ix and ixv)
  *
  * This file is meant to be a reference on how to implement
  * netmap support for a network driver.
- * This file contains code but only static or inline functions
- * that are used by a single driver. To avoid replication of
- * code we just #include it near the beginning of the
- * standard driver.
+ * This file contains code but only static or inline functions used
+ * by a single driver. To avoid replication of code we just #include
+ * it near the beginning of the standard driver.
  */
 
+
 #include <net/netmap.h>
 #include <sys/selinfo.h>
 /*
@@ -49,7 +49,11 @@
  */
 #include <dev/netmap/netmap_kern.h>
 
+void ixgbe_netmap_attach(struct adapter *adapter);
+
 /*
+ * device-specific sysctl variables:
+ *
  * ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
  *	During regular operations the CRC is stripped, but on some
  *	hardware reception of frames not multiple of 64 is slower,
@@ -57,54 +61,18 @@
  *
  * ix_rx_miss, ix_rx_miss_bufs:
  *	count packets that might be missed due to lost interrupts.
- *
- * ix_use_dd
- *	use the dd bit for completed tx transmissions.
- *	This is tricky, much better to use TDH for now.
  */
 SYSCTL_DECL(_dev_netmap);
-static int ix_rx_miss, ix_rx_miss_bufs, ix_use_dd, ix_crcstrip;
+static int ix_rx_miss, ix_rx_miss_bufs;
+int ix_crcstrip;
 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
     CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
-SYSCTL_INT(_dev_netmap, OID_AUTO, ix_use_dd,
-    CTLFLAG_RW, &ix_use_dd, 0, "use dd instead of tdh to detect tx frames");
 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
     CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
     CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs");
 
-/*
- * wrapper to export locks to the generic netmap code.
- */
-static void
-ixgbe_netmap_lock_wrapper(struct ifnet *_a, int what, u_int queueid)
-{
-	struct adapter *adapter = _a->if_softc;
 
-	ASSERT(queueid < adapter->num_queues);
-	switch (what) {
-	case NETMAP_CORE_LOCK:
-		IXGBE_CORE_LOCK(adapter);
-		break;
-	case NETMAP_CORE_UNLOCK:
-		IXGBE_CORE_UNLOCK(adapter);
-		break;
-	case NETMAP_TX_LOCK:
-		IXGBE_TX_LOCK(&adapter->tx_rings[queueid]);
-		break;
-	case NETMAP_TX_UNLOCK:
-		IXGBE_TX_UNLOCK(&adapter->tx_rings[queueid]);
-		break;
-	case NETMAP_RX_LOCK:
-		IXGBE_RX_LOCK(&adapter->rx_rings[queueid]);
-		break;
-	case NETMAP_RX_UNLOCK:
-		IXGBE_RX_UNLOCK(&adapter->rx_rings[queueid]);
-		break;
-	}
-}
-
-
 static void
 set_crcstrip(struct ixgbe_hw *hw, int onoff)
 {
@@ -142,186 +110,144 @@
 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
 }
 
+
 /*
- * Register/unregister. We are already under core lock.
+ * Register/unregister. We are already under netmap lock.
  * Only called on the first register or the last unregister.
  */
 static int
-ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
+ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
 {
+	struct ifnet *ifp = na->ifp;
 	struct adapter *adapter = ifp->if_softc;
-	struct netmap_adapter *na = NA(ifp);
-	int error = 0;
 
-	if (na == NULL)
-		return EINVAL; /* no netmap support here */
+	IXGBE_CORE_LOCK(adapter);
+	ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ?
 
-	ixgbe_disable_intr(adapter);
-
-	/* Tell the stack that the interface is no longer active */
-	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
-
-	set_crcstrip(&adapter->hw, onoff);
-	if (onoff) { /* enable netmap mode */
-		ifp->if_capenable |= IFCAP_NETMAP;
-
-		/* save if_transmit and replace with our routine */
-		na->if_transmit = ifp->if_transmit;
-		ifp->if_transmit = netmap_start;
-
-		/*
-		 * reinitialize the adapter, now with netmap flag set,
-		 * so the rings will be set accordingly.
-		 */
-		ixgbe_init_locked(adapter);
-		if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
-			error = ENOMEM;
-			goto fail;
-		}
-	} else { /* reset normal mode (explicit request or netmap failed) */
-fail:
-		/* restore if_transmit */
-		ifp->if_transmit = na->if_transmit;
-		ifp->if_capenable &= ~IFCAP_NETMAP;
-		/* initialize the card, this time in standard mode */
-		ixgbe_init_locked(adapter);	/* also enables intr */
+	if (!IXGBE_IS_VF(adapter))
+		set_crcstrip(&adapter->hw, onoff);
+	/* enable or disable flags and callbacks in na and ifp */
+	if (onoff) {
+		nm_set_native_flags(na);
+	} else {
+		nm_clear_native_flags(na);
 	}
-	set_crcstrip(&adapter->hw, onoff);
-	return (error);
+	ixgbe_init_locked(adapter);	/* also enables intr */
+	if (!IXGBE_IS_VF(adapter))
+		set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
+	IXGBE_CORE_UNLOCK(adapter);
+	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
 }
 
 
 /*
  * Reconcile kernel and user view of the transmit ring.
- * This routine might be called frequently so it must be efficient.
  *
- * ring->cur holds the userspace view of the current ring index.  Userspace
- * has filled the tx slots from the previous call's ring->cur up to but not
- * including ring->cur for this call.  In this function the kernel updates
- * kring->nr_hwcur to ring->cur, thus slots [kring->nr_hwcur, ring->cur) are
- * now ready to transmit.  At the last interrupt kring->nr_hwavail slots were
- * available.
+ * All information is in the kring.
+ * Userspace wants to send packets up to the one before kring->rhead,
+ * kernel knows kring->nr_hwcur is the first unsent packet.
  *
- * This function runs under lock (acquired from the caller or internally).
- * It must first update ring->avail to what the kernel knows,
- * subtract the newly used slots (ring->cur - kring->nr_hwcur)
- * from both avail and nr_hwavail, and set ring->nr_hwcur = ring->cur
- * issuing a dmamap_sync on all slots.
+ * Here we push packets out (as many as possible), and possibly
+ * reclaim buffers from previously completed transmission.
  *
- * Since ring comes from userspace, its content must be read only once,
- * and validated before being used to update the kernel's structures.
- * (this is also true for every use of ring in the kernel).
- *
- * ring->avail is never used, only checked for bogus values.
- *
- * do_lock is set iff the function is called from the ioctl handler.
- * In this case, grab a lock around the body, and also reclaim transmitted
- * buffers irrespective of interrupt mitigation.
+ * The caller (netmap) guarantees that there is only one instance
+ * running at any time. Any interference with other driver
+ * methods should be handled by the individual drivers.
  */
 static int
-ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
 {
-	struct adapter *adapter = ifp->if_softc;
-	struct tx_ring *txr = &adapter->tx_rings[ring_nr];
-	struct netmap_adapter *na = NA(adapter->ifp);
-	struct netmap_kring *kring = &na->tx_rings[ring_nr];
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
-	u_int j, l, n = 0;
-	u_int const k = ring->cur, lim = kring->nkr_num_slots - 1;
-
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
 	/*
-	 * ixgbe can generate an interrupt on every tx packet, but it
-	 * seems very expensive, so we interrupt once every half ring,
-	 * or when requested with NS_REPORT
+	 * interrupts on every tx packet are expensive so request
+	 * them every half ring, or where NS_REPORT is set
 	 */
 	u_int report_frequency = kring->nkr_num_slots >> 1;
 
-	if (k > lim)
-		return netmap_ring_reinit(kring);
-	if (do_lock)
-		IXGBE_TX_LOCK(txr);
+	/* device-specific */
+	struct adapter *adapter = ifp->if_softc;
+	struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
+	int reclaim_tx;
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 			BUS_DMASYNC_POSTREAD);
 
 	/*
-	 * Process new packets to send. j is the current index in the
-	 * netmap ring, l is the corresponding index in the NIC ring.
+	 * First part: process new packets to send.
+	 * nm_i is the current index in the netmap ring,
+	 * nic_i is the corresponding index in the NIC ring.
 	 * The two numbers differ because upon a *_init() we reset
 	 * the NIC ring but leave the netmap ring unchanged.
 	 * For the transmit ring, we have
 	 *
-	 *		j = kring->nr_hwcur
-	 *		l = IXGBE_TDT (not tracked in the driver)
+	 *		nm_i = kring->nr_hwcur
+	 *		nic_i = IXGBE_TDT (not tracked in the driver)
 	 * and
-	 * 		j == (l + kring->nkr_hwofs) % ring_size
+	 * 		nm_i == (nic_i + kring->nkr_hwofs) % ring_size
 	 *
 	 * In this driver kring->nkr_hwofs >= 0, but for other
 	 * drivers it might be negative as well.
 	 */
-	j = kring->nr_hwcur;
-	if (j != k) {	/* we have new packets to send */
-		prefetch(&ring->slot[j]);
-		l = netmap_idx_k2n(kring, j); /* NIC index */
-		prefetch(&txr->tx_buffers[l]);
-		for (n = 0; j != k; n++) {
-			/*
-			 * Collect per-slot info.
-			 * Note that txbuf and curr are indexed by l.
-			 *
-			 * In this driver we collect the buffer address
-			 * (using the PNMB() macro) because we always
-			 * need to rewrite it into the NIC ring.
-			 * Many other drivers preserve the address, so
-			 * we only need to access it if NS_BUF_CHANGED
-			 * is set.
-			 * XXX note, on this device the dmamap* calls are
-			 * not necessary because tag is 0, however just accessing
-			 * the per-packet tag kills 1Mpps at 900 MHz.
-			 */
-			struct netmap_slot *slot = &ring->slot[j];
-			union ixgbe_adv_tx_desc *curr = &txr->tx_base[l];
-			struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l];
+
+	/*
+	 * If we have packets to send (kring->nr_hwcur != kring->rhead)
+	 * iterate over the netmap ring, fetch length and update
+	 * the corresponding slot in the NIC ring. Some drivers also
+	 * need to update the buffer's physical address in the NIC slot
+	 * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
+	 *
+	 * The netmap_reload_map() calls is especially expensive,
+	 * even when (as in this case) the tag is 0, so do only
+	 * when the buffer has actually changed.
+	 *
+	 * If possible do not set the report/intr bit on all slots,
+	 * but only a few times per ring or when NS_REPORT is set.
+	 *
+	 * Finally, on 10G and faster drivers, it might be useful
+	 * to prefetch the next slot and txr entry.
+	 */
+
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {	/* we have new packets to send */
+		nic_i = netmap_idx_k2n(kring, nm_i);
+
+		__builtin_prefetch(&ring->slot[nm_i]);
+		__builtin_prefetch(&txr->tx_buffers[nic_i]);
+
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			u_int len = slot->len;
 			uint64_t paddr;
-			// XXX type for flags and len ?
-			int flags = ((slot->flags & NS_REPORT) ||
-				j == 0 || j == report_frequency) ?
-					IXGBE_TXD_CMD_RS : 0;
-			u_int len = slot->len;
-			void *addr = PNMB(slot, &paddr);
+			void *addr = PNMB(na, slot, &paddr);
 
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
-			prefetch(&ring->slot[j]);
-			prefetch(&txr->tx_buffers[l]);
+			/* device-specific */
+			union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
+			struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i];
+			int flags = (slot->flags & NS_REPORT ||
+				nic_i == 0 || nic_i == report_frequency) ?
+				IXGBE_TXD_CMD_RS : 0;
 
-			/*
-			 * Quick check for valid addr and len.
-			 * NMB() returns netmap_buffer_base for invalid
-			 * buffer indexes (but the address is still a
-			 * valid one to be used in a ring). slot->len is
-			 * unsigned so no need to check for negative values.
-			 */
-			if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
-ring_reset:
-				if (do_lock)
-					IXGBE_TX_UNLOCK(txr);
-				return netmap_ring_reinit(kring);
-			}
+			/* prefetch for next round */
+			__builtin_prefetch(&ring->slot[nm_i + 1]);
+			__builtin_prefetch(&txr->tx_buffers[nic_i + 1]);
 
+			NM_CHECK_ADDR_LEN(na, addr, len);
+
 			if (slot->flags & NS_BUF_CHANGED) {
-				/* buffer has changed, unload and reload map */
-				netmap_reload_map(txr->txtag, txbuf->map, addr);
-				slot->flags &= ~NS_BUF_CHANGED;
+				/* buffer has changed, reload map */
+				netmap_reload_map(na, txr->txtag, txbuf->map, addr);
 			}
-			slot->flags &= ~NS_REPORT;
-			/*
-			 * Fill the slot in the NIC ring.
-			 * In this driver we need to rewrite the buffer
-			 * address in the NIC ring. Other drivers do not
-			 * need this.
-			 * Use legacy descriptor, it is faster.
-			 */
+			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+			/* Fill the slot in the NIC ring. */
+			/* Use legacy descriptor, they are faster? */
 			curr->read.buffer_addr = htole64(paddr);
 			curr->read.olinfo_status = 0;
 			curr->read.cmd_type_len = htole32(len | flags |
@@ -328,103 +254,78 @@
 				IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
 
 			/* make sure changes to the buffer are synced */
-			bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE);
+			bus_dmamap_sync(txr->txtag, txbuf->map,
+				BUS_DMASYNC_PREWRITE);
+
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		kring->nr_hwcur = k; /* the saved ring->cur */
-		/* decrease avail by number of packets  sent */
-		kring->nr_hwavail -= n;
+		kring->nr_hwcur = head;
 
 		/* synchronize the NIC ring */
 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-		/* (re)start the transmitter up to slot l (excluded) */
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), l);
+
+		/* (re)start the tx unit up to slot nic_i (excluded) */
+		IXGBE_WRITE_REG(&adapter->hw, txr->tail, nic_i);
 	}
 
 	/*
-	 * Reclaim buffers for completed transmissions.
+	 * Second part: reclaim buffers for completed transmissions.
 	 * Because this is expensive (we read a NIC register etc.)
 	 * we only do it in specific cases (see below).
-	 * In all cases kring->nr_kflags indicates which slot will be
-	 * checked upon a tx interrupt (nkr_num_slots means none).
 	 */
-	if (do_lock) {
-		j = 1; /* forced reclaim, ignore interrupts */
-		kring->nr_kflags = kring->nkr_num_slots;
-	} else if (kring->nr_hwavail > 0) {
-		j = 0; /* buffers still available: no reclaim, ignore intr. */
-		kring->nr_kflags = kring->nkr_num_slots;
+	if (flags & NAF_FORCE_RECLAIM) {
+		reclaim_tx = 1; /* forced reclaim */
+	} else if (!nm_kr_txempty(kring)) {
+		reclaim_tx = 0; /* have buffers, no reclaim */
 	} else {
 		/*
-		 * no buffers available, locate a slot for which we request
-		 * ReportStatus (approximately half ring after next_to_clean)
-		 * and record it in kring->nr_kflags.
-		 * If the slot has DD set, do the reclaim looking at TDH,
-		 * otherwise we go to sleep (in netmap_poll()) and will be
-		 * woken up when slot nr_kflags will be ready.
+		 * No buffers available. Locate previous slot with
+		 * REPORT_STATUS set.
+		 * If the slot has DD set, we can reclaim space,
+		 * otherwise wait for the next interrupt.
+		 * This enables interrupt moderation on the tx
+		 * side though it might reduce throughput.
 		 */
 		struct ixgbe_legacy_tx_desc *txd =
 		    (struct ixgbe_legacy_tx_desc *)txr->tx_base;
 
-		j = txr->next_to_clean + kring->nkr_num_slots/2;
-		if (j >= kring->nkr_num_slots)
-			j -= kring->nkr_num_slots;
+		nic_i = txr->next_to_clean + report_frequency;
+		if (nic_i > lim)
+			nic_i -= lim + 1;
 		// round to the closest with dd set
-		j= (j < kring->nkr_num_slots / 4 || j >= kring->nkr_num_slots*3/4) ?
+		nic_i = (nic_i < kring->nkr_num_slots / 4 ||
+			 nic_i >= kring->nkr_num_slots*3/4) ?
 			0 : report_frequency;
-		kring->nr_kflags = j; /* the slot to check */
-		j = txd[j].upper.fields.status & IXGBE_TXD_STAT_DD;	// XXX cpu_to_le32 ?
+		reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD;	// XXX cpu_to_le32 ?
 	}
-	if (j) {
-		int delta;
-
+	if (reclaim_tx) {
 		/*
 		 * Record completed transmissions.
 		 * We (re)use the driver's txr->next_to_clean to keep
 		 * track of the most recently completed transmission.
 		 *
-		 * The datasheet discourages the use of TDH to find out the
-		 * number of sent packets. We should rather check the DD
-		 * status bit in a packet descriptor. However, we only set
-		 * the "report status" bit for some descriptors (a kind of
-		 * interrupt mitigation), so we can only check on those.
-		 * For the time being we use TDH, as we do it infrequently
-		 * enough not to pose performance problems.
+		 * The datasheet discourages the use of TDH to find
+		 * out the number of sent packets, but we only set
+		 * REPORT_STATUS in a few slots so TDH is the only
+		 * good way.
 		 */
-	    if (ix_use_dd) {
-		struct ixgbe_legacy_tx_desc *txd =
-		    (struct ixgbe_legacy_tx_desc *)txr->tx_base;
-		u_int k1 = netmap_idx_k2n(kring, kring->nr_hwcur);
-		l = txr->next_to_clean;
-		delta = 0;
-		while (l != k1 &&
-		    txd[l].upper.fields.status & IXGBE_TXD_STAT_DD) {
-		    delta++;
-		    l = (l == lim) ? 0 : l + 1;
+		nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_IS_VF(adapter) ?
+				       IXGBE_VFTDH(kring->ring_id) : IXGBE_TDH(kring->ring_id));
+		if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
+			D("TDH wrap %d", nic_i);
+			nic_i -= kring->nkr_num_slots;
 		}
-	    } else {
-		l = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
-		if (l >= kring->nkr_num_slots) { /* XXX can happen */
-			D("TDH wrap %d", l);
-			l -= kring->nkr_num_slots;
-		}
-		delta = l - txr->next_to_clean;
-	    }
-		if (delta) {
+		if (nic_i != txr->next_to_clean) {
 			/* some tx completed, increment avail */
-			if (delta < 0)
-				delta += kring->nkr_num_slots;
-			txr->next_to_clean = l;
-			kring->nr_hwavail += delta;
-			if (kring->nr_hwavail > lim)
-				goto ring_reset;
+			txr->next_to_clean = nic_i;
+			kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
 		}
 	}
-	/* update avail to what the kernel knows */
-	ring->avail = kring->nr_hwavail;
 
-	if (do_lock)
-		IXGBE_TX_UNLOCK(txr);
+	nm_txsync_finalize(kring);
+
 	return 0;
 }
 
@@ -431,75 +332,75 @@
 
 /*
  * Reconcile kernel and user view of the receive ring.
- * Same as for the txsync, this routine must be efficient and
- * avoid races in accessing the shared regions.
+ * Same as for the txsync, this routine must be efficient.
+ * The caller guarantees a single invocations, but races against
+ * the rest of the driver should be handled here.
  *
- * When called, userspace has read data from slots kring->nr_hwcur
- * up to ring->cur (excluded).
+ * On call, kring->rhead is the first packet that userspace wants
+ * to keep, and kring->rcur is the wakeup point.
+ * The kernel has previously reported packets up to kring->rtail.
  *
- * The last interrupt reported kring->nr_hwavail slots available
- * after kring->nr_hwcur.
- * We must subtract the newly consumed slots (cur - nr_hwcur)
- * from nr_hwavail, make the descriptors available for the next reads,
- * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail.
- *
- * do_lock has a special meaning: please refer to txsync.
+ * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
+ * of whether or not we received an interrupt.
  */
 static int
-ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
 {
-	struct adapter *adapter = ifp->if_softc;
-	struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
-	struct netmap_adapter *na = NA(adapter->ifp);
-	struct netmap_kring *kring = &na->rx_rings[ring_nr];
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
-	u_int j, l, n, lim = kring->nkr_num_slots - 1;
-	int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
-	u_int k = ring->cur, resvd = ring->reserved;
+	u_int nm_i;	/* index into the netmap ring */
+	u_int nic_i;	/* index into the NIC ring */
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = nm_rxsync_prologue(kring);
+	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
 
-	if (k > lim)
+	/* device-specific */
+	struct adapter *adapter = ifp->if_softc;
+	struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
+
+	if (head > lim)
 		return netmap_ring_reinit(kring);
 
-	if (do_lock)
-		IXGBE_RX_LOCK(rxr);
 	/* XXX check sync modes */
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/*
-	 * First part, import newly received packets into the netmap ring.
+	 * First part: import newly received packets.
 	 *
-	 * j is the index of the next free slot in the netmap ring,
-	 * and l is the index of the next received packet in the NIC ring,
+	 * nm_i is the index of the next free slot in the netmap ring,
+	 * nic_i is the index of the next received packet in the NIC ring,
 	 * and they may differ in case if_init() has been called while
 	 * in netmap mode. For the receive ring we have
 	 *
-	 *	j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
-	 *	l = rxr->next_to_check;
+	 *	nic_i = rxr->next_to_check;
+	 *	nm_i = kring->nr_hwtail (previous)
 	 * and
-	 *	j == (l + kring->nkr_hwofs) % ring_size
+	 *	nm_i == (nic_i + kring->nkr_hwofs) % ring_size
 	 *
 	 * rxr->next_to_check is set to 0 on a ring reinit
 	 */
 	if (netmap_no_pendintr || force_update) {
-		int crclen = ix_crcstrip ? 0 : 4;
+		int crclen = (ix_crcstrip || IXGBE_IS_VF(adapter) ) ? 0 : 4;
 		uint16_t slot_flags = kring->nkr_slot_flags;
 
-		l = rxr->next_to_check;
-		j = netmap_idx_n2k(kring, l);
+		nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
+		nm_i = netmap_idx_n2k(kring, nic_i);
 
 		for (n = 0; ; n++) {
-			union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
+			union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
 			uint32_t staterr = le32toh(curr->wb.upper.status_error);
 
 			if ((staterr & IXGBE_RXD_STAT_DD) == 0)
 				break;
-			ring->slot[j].len = le16toh(curr->wb.upper.length) - crclen;
-			ring->slot[j].flags = slot_flags;
+			ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
+			ring->slot[nm_i].flags = slot_flags;
 			bus_dmamap_sync(rxr->ptag,
-			    rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+			    rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
 		if (n) { /* update the state variables */
 			if (netmap_no_pendintr && !force_update) {
@@ -507,49 +408,37 @@
 				ix_rx_miss ++;
 				ix_rx_miss_bufs += n;
 			}
-			rxr->next_to_check = l;
-			kring->nr_hwavail += n;
+			rxr->next_to_check = nic_i;
+			kring->nr_hwtail = nm_i;
 		}
 		kring->nr_kflags &= ~NKR_PENDINTR;
 	}
 
 	/*
-	 * Skip past packets that userspace has released
-	 * (from kring->nr_hwcur to ring->cur - ring->reserved excluded),
+	 * Second part: skip past packets that userspace has released.
+	 * (kring->nr_hwcur to kring->rhead excluded),
 	 * and make the buffers available for reception.
-	 * As usual j is the index in the netmap ring, l is the index
-	 * in the NIC ring, and j == (l + kring->nkr_hwofs) % ring_size
+	 * As usual nm_i is the index in the netmap ring,
+	 * nic_i is the index in the NIC ring, and
+	 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
 	 */
-	j = kring->nr_hwcur;
-	if (resvd > 0) {
-		if (resvd + ring->avail >= lim + 1) {
-			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
-			ring->reserved = resvd = 0; // XXX panic...
-		}
-		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
-	}
-	if (j != k) { /* userspace has released some packets. */
-		l = netmap_idx_k2n(kring, j);
-		for (n = 0; j != k; n++) {
-			/* collect per-slot info, with similar validations
-			 * and flag handling as in the txsync code.
-			 *
-			 * NOTE curr and rxbuf are indexed by l.
-			 * Also, this driver needs to update the physical
-			 * address in the NIC ring, but other drivers
-			 * may not have this requirement.
-			 */
-			struct netmap_slot *slot = &ring->slot[j];
-			union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
-			struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[l];
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		nic_i = netmap_idx_k2n(kring, nm_i);
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
 			uint64_t paddr;
-			void *addr = PNMB(slot, &paddr);
+			void *addr = PNMB(na, slot, &paddr);
 
-			if (addr == netmap_buffer_base) /* bad buf */
+			union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
+			struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
+
+			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
 				goto ring_reset;
 
 			if (slot->flags & NS_BUF_CHANGED) {
-				netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
+				/* buffer has changed, reload map */
+				netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr);
 				slot->flags &= ~NS_BUF_CHANGED;
 			}
 			curr->wb.upper.status_error = 0;
@@ -556,29 +445,27 @@
 			curr->read.pkt_addr = htole64(paddr);
 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
 			    BUS_DMASYNC_PREREAD);
-			j = (j == lim) ? 0 : j + 1;
-			l = (l == lim) ? 0 : l + 1;
+			nm_i = nm_next(nm_i, lim);
+			nic_i = nm_next(nic_i, lim);
 		}
-		kring->nr_hwavail -= n;
-		kring->nr_hwcur = k;
+		kring->nr_hwcur = head;
+
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-		/* IMPORTANT: we must leave one free slot in the ring,
-		 * so move l back by one unit
+		/*
+		 * IMPORTANT: we must leave one free slot in the ring,
+		 * so move nic_i back by one unit
 		 */
-		l = (l == 0) ? lim : l - 1;
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), l);
+		nic_i = nm_prev(nic_i, lim);
+		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, nic_i);
 	}
-	/* tell userspace that there are new packets */
-	ring->avail = kring->nr_hwavail - resvd;
 
-	if (do_lock)
-		IXGBE_RX_UNLOCK(rxr);
+	/* tell userspace that there might be new packets */
+	nm_rxsync_finalize(kring);
+
 	return 0;
 
 ring_reset:
-	if (do_lock)
-		IXGBE_RX_UNLOCK(rxr);
 	return netmap_ring_reinit(kring);
 }
 
@@ -590,7 +477,7 @@
  * netmap mode will be disabled and the driver will only
  * operate in standard mode.
  */
-static void
+void
 ixgbe_netmap_attach(struct adapter *adapter)
 {
 	struct netmap_adapter na;
@@ -598,14 +485,14 @@
 	bzero(&na, sizeof(na));
 
 	na.ifp = adapter->ifp;
-	na.separate_locks = 1;	/* this card has separate rx/tx locks */
+	na.na_flags = NAF_BDG_MAYSLEEP;
 	na.num_tx_desc = adapter->num_tx_desc;
 	na.num_rx_desc = adapter->num_rx_desc;
 	na.nm_txsync = ixgbe_netmap_txsync;
 	na.nm_rxsync = ixgbe_netmap_rxsync;
-	na.nm_lock = ixgbe_netmap_lock_wrapper;
 	na.nm_register = ixgbe_netmap_reg;
-	netmap_attach(&na, adapter->num_queues);
-}	
+	na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
+	netmap_attach(&na);
+}
 
 /* end of file */

Modified: trunk/sys/dev/netmap/netmap.c
===================================================================
--- trunk/sys/dev/netmap/netmap.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/netmap/netmap.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -8,7 +9,7 @@
  *      notice, this list of conditions and the following disclaimer.
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
+ *      documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -23,9 +24,10 @@
  * SUCH DAMAGE.
  */
 
-#define NM_BRIDGE
 
 /*
+ * $FreeBSD: stable/10/sys/dev/netmap/netmap.c 281706 2015-04-18 21:22:26Z rpaulo $
+ *
  * This module supports memory mapped access to network devices,
  * see netmap(4).
  *
@@ -52,57 +54,373 @@
  *    packets on the output interface.
  * 6. select() or poll() can be used to wait for events on individual
  *    transmit or receive queues (or all queues for a given interface).
+ *
+
+		SYNCHRONIZATION (USER)
+
+The netmap rings and data structures may be shared among multiple
+user threads or even independent processes.
+Any synchronization among those threads/processes is delegated
+to the threads themselves. Only one thread at a time can be in
+a system call on the same netmap ring. The OS does not enforce
+this and only guarantees against system crashes in case of
+invalid usage.
+
+		LOCKING (INTERNAL)
+
+Within the kernel, access to the netmap rings is protected as follows:
+
+- a spinlock on each ring, to handle producer/consumer races on
+  RX rings attached to the host stack (against multiple host
+  threads writing from the host stack to the same ring),
+  and on 'destination' rings attached to a VALE switch
+  (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
+  protecting multiple active senders for the same destination)
+
+- an atomic variable to guarantee that there is at most one
+  instance of *_*xsync() on the ring at any time.
+  For rings connected to user file
+  descriptors, an atomic_test_and_set() protects this, and the
+  lock on the ring is not actually used.
+  For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
+  is also used to prevent multiple executions (the driver might indeed
+  already guarantee this).
+  For NIC TX rings connected to a VALE switch, the lock arbitrates
+  access to the queue (both when allocating buffers and when pushing
+  them out).
+
+- *xsync() should be protected against initializations of the card.
+  On FreeBSD most devices have the reset routine protected by
+  a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
+  the RING protection on rx_reset(), this should be added.
+
+  On linux there is an external lock on the tx path, which probably
+  also arbitrates access to the reset routine. XXX to be revised
+
+- a per-interface core_lock protecting access from the host stack
+  while interfaces may be detached from netmap mode.
+  XXX there should be no need for this lock if we detach the interfaces
+  only while they are down.
+
+
+--- VALE SWITCH ---
+
+NMG_LOCK() serializes all modifications to switches and ports.
+A switch cannot be deleted until all ports are gone.
+
+For each switch, an SX lock (RWlock on linux) protects
+deletion of ports. When configuring or deleting a new port, the
+lock is acquired in exclusive mode (after holding NMG_LOCK).
+When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
+The lock is held throughout the entire forwarding cycle,
+during which the thread may incur in a page fault.
+Hence it is important that sleepable shared locks are used.
+
+On the rx ring, the per-port lock is grabbed initially to reserve
+a number of slot in the ring, then the lock is released,
+packets are copied from source to destination, and then
+the lock is acquired again and the receive ring is updated.
+(A similar thing is done on the tx ring for NIC and host stack
+ports attached to the switch)
+
  */
 
-#ifdef linux
-#include "bsd_glue.h"
-static netdev_tx_t linux_netmap_start(struct sk_buff *skb, struct net_device *dev);
-#endif /* linux */
 
-#ifdef __APPLE__
-#include "osx_glue.h"
-#endif /* __APPLE__ */
+/* --- internals ----
+ *
+ * Roadmap to the code that implements the above.
+ *
+ * > 1. a process/thread issues one or more open() on /dev/netmap, to create
+ * >    select()able file descriptor on which events are reported.
+ *
+ *  	Internally, we allocate a netmap_priv_d structure, that will be
+ *  	initialized on ioctl(NIOCREGIF).
+ *
+ *      os-specific:
+ *  	    FreeBSD: netmap_open (netmap_freebsd.c). The priv is
+ *  		     per-thread.
+ *  	    linux:   linux_netmap_open (netmap_linux.c). The priv is
+ *  		     per-open.
+ *
+ * > 2. on each descriptor, the process issues an ioctl() to identify
+ * >    the interface that should report events to the file descriptor.
+ *
+ * 	Implemented by netmap_ioctl(), NIOCREGIF case, with nmr->nr_cmd==0.
+ * 	Most important things happen in netmap_get_na() and
+ * 	netmap_do_regif(), called from there. Additional details can be
+ * 	found in the comments above those functions.
+ *
+ * 	In all cases, this action creates/takes-a-reference-to a
+ * 	netmap_*_adapter describing the port, and allocates a netmap_if
+ * 	and all necessary netmap rings, filling them with netmap buffers.
+ *
+ *      In this phase, the sync callbacks for each ring are set (these are used
+ *      in steps 5 and 6 below).  The callbacks depend on the type of adapter.
+ *      The adapter creation/initialization code puts them in the
+ * 	netmap_adapter (fields na->nm_txsync and na->nm_rxsync).  Then, they
+ * 	are copied from there to the netmap_kring's during netmap_do_regif(), by
+ * 	the nm_krings_create() callback.  All the nm_krings_create callbacks
+ * 	actually call netmap_krings_create() to perform this and the other
+ * 	common stuff. netmap_krings_create() also takes care of the host rings,
+ * 	if needed, by setting their sync callbacks appropriately.
+ *
+ * 	Additional actions depend on the kind of netmap_adapter that has been
+ * 	registered:
+ *
+ * 	- netmap_hw_adapter:  	     [netmap.c]
+ * 	     This is a system netdev/ifp with native netmap support.
+ * 	     The ifp is detached from the host stack by redirecting:
+ * 	       - transmissions (from the network stack) to netmap_transmit()
+ * 	       - receive notifications to the nm_notify() callback for
+ * 	         this adapter. The callback is normally netmap_notify(), unless
+ * 	         the ifp is attached to a bridge using bwrap, in which case it
+ * 	         is netmap_bwrap_intr_notify().
+ *
+ * 	- netmap_generic_adapter:      [netmap_generic.c]
+ * 	      A system netdev/ifp without native netmap support.
+ *
+ * 	(the decision about native/non native support is taken in
+ * 	 netmap_get_hw_na(), called by netmap_get_na())
+ *
+ * 	- netmap_vp_adapter 		[netmap_vale.c]
+ * 	      Returned by netmap_get_bdg_na().
+ * 	      This is a persistent or ephemeral VALE port. Ephemeral ports
+ * 	      are created on the fly if they don't already exist, and are
+ * 	      always attached to a bridge.
+ * 	      Persistent VALE ports must must be created seperately, and i
+ * 	      then attached like normal NICs. The NIOCREGIF we are examining
+ * 	      will find them only if they had previosly been created and
+ * 	      attached (see VALE_CTL below).
+ *
+ * 	- netmap_pipe_adapter 	      [netmap_pipe.c]
+ * 	      Returned by netmap_get_pipe_na().
+ * 	      Both pipe ends are created, if they didn't already exist.
+ *
+ * 	- netmap_monitor_adapter      [netmap_monitor.c]
+ * 	      Returned by netmap_get_monitor_na().
+ * 	      If successful, the nm_sync callbacks of the monitored adapter
+ * 	      will be intercepted by the returned monitor.
+ *
+ * 	- netmap_bwrap_adapter	      [netmap_vale.c]
+ * 	      Cannot be obtained in this way, see VALE_CTL below
+ *
+ *
+ * 	os-specific:
+ * 	    linux: we first go through linux_netmap_ioctl() to
+ * 	           adapt the FreeBSD interface to the linux one.
+ *
+ *
+ * > 3. on each descriptor, the process issues an mmap() request to
+ * >    map the shared memory region within the process' address space.
+ * >    The list of interesting queues is indicated by a location in
+ * >    the shared memory region.
+ *
+ *      os-specific:
+ *  	    FreeBSD: netmap_mmap_single (netmap_freebsd.c).
+ *  	    linux:   linux_netmap_mmap (netmap_linux.c).
+ *
+ * > 4. using the functions in the netmap(4) userspace API, a process
+ * >    can look up the occupation state of a queue, access memory buffers,
+ * >    and retrieve received packets or enqueue packets to transmit.
+ *
+ * 	these actions do not involve the kernel.
+ *
+ * > 5. using some ioctl()s the process can synchronize the userspace view
+ * >    of the queue with the actual status in the kernel. This includes both
+ * >    receiving the notification of new packets, and transmitting new
+ * >    packets on the output interface.
+ *
+ * 	These are implemented in netmap_ioctl(), NIOCTXSYNC and NIOCRXSYNC
+ * 	cases. They invoke the nm_sync callbacks on the netmap_kring
+ * 	structures, as initialized in step 2 and maybe later modified
+ * 	by a monitor. Monitors, however, will always call the original
+ * 	callback before doing anything else.
+ *
+ *
+ * > 6. select() or poll() can be used to wait for events on individual
+ * >    transmit or receive queues (or all queues for a given interface).
+ *
+ * 	Implemented in netmap_poll(). This will call the same nm_sync()
+ * 	callbacks as in step 5 above.
+ *
+ * 	os-specific:
+ * 		linux: we first go through linux_netmap_poll() to adapt
+ * 		       the FreeBSD interface to the linux one.
+ *
+ *
+ *  ----  VALE_CTL -----
+ *
+ *  VALE switches are controlled by issuing a NIOCREGIF with a non-null
+ *  nr_cmd in the nmreq structure. These subcommands are handled by
+ *  netmap_bdg_ctl() in netmap_vale.c. Persistent VALE ports are created
+ *  and destroyed by issuing the NETMAP_BDG_NEWIF and NETMAP_BDG_DELIF
+ *  subcommands, respectively.
+ *
+ *  Any network interface known to the system (including a persistent VALE
+ *  port) can be attached to a VALE switch by issuing the
+ *  NETMAP_BDG_ATTACH subcommand. After the attachment, persistent VALE ports
+ *  look exactly like ephemeral VALE ports (as created in step 2 above).  The
+ *  attachment of other interfaces, instead, requires the creation of a
+ *  netmap_bwrap_adapter.  Moreover, the attached interface must be put in
+ *  netmap mode. This may require the creation of a netmap_generic_adapter if
+ *  we have no native support for the interface, or if generic adapters have
+ *  been forced by sysctl.
+ *
+ *  Both persistent VALE ports and bwraps are handled by netmap_get_bdg_na(),
+ *  called by nm_bdg_ctl_attach(), and discriminated by the nm_bdg_attach()
+ *  callback.  In the case of the bwrap, the callback creates the
+ *  netmap_bwrap_adapter.  The initialization of the bwrap is then
+ *  completed by calling netmap_do_regif() on it, in the nm_bdg_ctl()
+ *  callback (netmap_bwrap_bdg_ctl in netmap_vale.c).
+ *  A generic adapter for the wrapped ifp will be created if needed, when
+ *  netmap_get_bdg_na() calls netmap_get_hw_na().
+ *
+ *
+ *  ---- DATAPATHS -----
+ *
+ *              -= SYSTEM DEVICE WITH NATIVE SUPPORT =-
+ *
+ *    na == NA(ifp) == netmap_hw_adapter created in DEVICE_netmap_attach()
+ *
+ *    - tx from netmap userspace:
+ *	 concurrently:
+ *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
+ *                kring->nm_sync() == DEVICE_netmap_txsync()
+ *           2) device interrupt handler
+ *                na->nm_notify()  == netmap_notify()
+ *    - rx from netmap userspace:
+ *       concurrently:
+ *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
+ *                kring->nm_sync() == DEVICE_netmap_rxsync()
+ *           2) device interrupt handler
+ *                na->nm_notify()  == netmap_notify()
+ *    - tx from host stack
+ *       concurrently:
+ *           1) host stack
+ *                netmap_transmit()
+ *                  na->nm_notify  == netmap_notify()
+ *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
+ *                kring->nm_sync() == netmap_rxsync_from_host_compat
+ *                  netmap_rxsync_from_host(na, NULL, NULL)
+ *    - tx to host stack
+ *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
+ *             kring->nm_sync() == netmap_txsync_to_host_compat
+ *               netmap_txsync_to_host(na)
+ *                 NM_SEND_UP()
+ *                   FreeBSD: na->if_input() == ?? XXX
+ *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
+ *
+ *
+ *
+ *               -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
+ *
+ *
+ *
+ *                           -= VALE PORT =-
+ *
+ *
+ *
+ *                           -= NETMAP PIPE =-
+ *
+ *
+ *
+ *  -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, NO HOST RINGS =-
+ *
+ *
+ *
+ *  -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =-
+ *
+ *
+ *
+ *  -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, NO HOST RINGS =-
+ *
+ *
+ *
+ *  -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =-
+ *
+ *
+ *
+ */
 
-#ifdef __FreeBSD__
+/*
+ * OS-specific code that is used only within this file.
+ * Other OS-specific code that must be accessed by drivers
+ * is present in netmap_kern.h
+ */
+
+#if defined(__FreeBSD__)
 #include <sys/cdefs.h> /* prerequisite */
-__MBSDID("$MidnightBSD$");
-
 #include <sys/types.h>
-#include <sys/module.h>
 #include <sys/errno.h>
 #include <sys/param.h>	/* defines used in kernel.h */
-#include <sys/jail.h>
 #include <sys/kernel.h>	/* types used in module initialization */
-#include <sys/conf.h>	/* cdevsw struct */
-#include <sys/uio.h>	/* uio struct */
+#include <sys/conf.h>	/* cdevsw struct, UID, GID */
+#include <sys/filio.h>	/* FIONBIO */
 #include <sys/sockio.h>
 #include <sys/socketvar.h>	/* struct socket */
 #include <sys/malloc.h>
-#include <sys/mman.h>	/* PROT_EXEC */
 #include <sys/poll.h>
-#include <sys/proc.h>
 #include <sys/rwlock.h>
-#include <vm/vm.h>	/* vtophys */
-#include <vm/pmap.h>	/* vtophys */
 #include <sys/socket.h> /* sockaddrs */
-#include <machine/bus.h>
 #include <sys/selinfo.h>
 #include <sys/sysctl.h>
+#include <sys/jail.h>
+#include <net/vnet.h>
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/bpf.h>		/* BIOCIMMEDIATE */
-#include <net/vnet.h>
 #include <machine/bus.h>	/* bus_dmamap_* */
+#include <sys/endian.h>
+#include <sys/refcount.h>
 
-MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
-#endif /* __FreeBSD__ */
 
+/* reduce conditional code */
+// linux API, use for the knlist in FreeBSD
+/* use a private mutex for the knlist */
+#define init_waitqueue_head(x) do {			\
+	struct mtx *m = &(x)->m;			\
+	mtx_init(m, "nm_kn_lock", NULL, MTX_DEF);	\
+	knlist_init_mtx(&(x)->si.si_note, m);		\
+    } while (0)
+
+#define OS_selrecord(a, b)	selrecord(a, &((b)->si))
+#define OS_selwakeup(a, b)	freebsd_selwakeup(a, b)
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#else
+
+#error	Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
 #include <net/netmap.h>
 #include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
 
-/* XXX the following variables must be deprecated and included in nm_mem */
+
+MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
+
+/*
+ * The following variables are used by the drivers and replicate
+ * fields in the global memory pool. They only refer to buffers
+ * used by physical interfaces.
+ */
 u_int netmap_total_buffers;
 u_int netmap_buf_size;
-char *netmap_buffer_base;	/* address of an invalid buffer */
+char *netmap_buffer_base;	/* also address of an invalid buffer */
 
 /* user-controlled variables */
 int netmap_verbose;
@@ -119,179 +437,213 @@
 int netmap_no_pendintr = 1;
 SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
     CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
+int netmap_txsync_retry = 2;
+SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
+    &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
 
-int netmap_drop = 0;	/* debugging */
+int netmap_adaptive_io = 0;
+SYSCTL_INT(_dev_netmap, OID_AUTO, adaptive_io, CTLFLAG_RW,
+    &netmap_adaptive_io, 0 , "Adaptive I/O on paravirt");
+
 int netmap_flags = 0;	/* debug flags */
 int netmap_fwd = 0;	/* force transparent mode */
+int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */
 
-SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
+/*
+ * netmap_admode selects the netmap mode to use.
+ * Invalid values are reset to NETMAP_ADMODE_BEST
+ */
+enum { NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
+	NETMAP_ADMODE_NATIVE,	/* either native or none */
+	NETMAP_ADMODE_GENERIC,	/* force generic */
+	NETMAP_ADMODE_LAST };
+static int netmap_admode = NETMAP_ADMODE_BEST;
+
+int netmap_generic_mit = 100*1000;   /* Generic mitigation interval in nanoseconds. */
+int netmap_generic_ringsize = 1024;   /* Generic ringsize. */
+int netmap_generic_rings = 1;   /* number of queues in generic. */
+
 SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
 
-#ifdef NM_BRIDGE /* support for netmap bridge */
+NMG_LOCK_T	netmap_global_lock;
 
+
+static void
+nm_kr_get(struct netmap_kring *kr)
+{
+	while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
+		tsleep(kr, 0, "NM_KR_GET", 4);
+}
+
+
 /*
- * system parameters.
- *
- * All switched ports have prefix NM_NAME.
- * The switch has a max of NM_BDG_MAXPORTS ports (often stored in a bitmap,
- * so a practical upper bound is 64).
- * Each tx ring is read-write, whereas rx rings are readonly (XXX not done yet).
- * The virtual interfaces use per-queue lock instead of core lock.
- * In the tx loop, we aggregate traffic in batches to make all operations
- * faster. The batch size is NM_BDG_BATCH
+ * mark the ring as stopped, and run through the locks
+ * to make sure other users get to see it.
  */
-#define	NM_NAME			"vale"	/* prefix for the interface */
-#define NM_BDG_MAXPORTS		16	/* up to 64 ? */
-#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
-#define NM_BDG_HASH		1024	/* forwarding table entries */
-#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
-#define	NM_BRIDGES		4	/* number of bridges */
+static void
+netmap_disable_ring(struct netmap_kring *kr)
+{
+	kr->nkr_stopped = 1;
+	nm_kr_get(kr);
+	mtx_lock(&kr->q_lock);
+	mtx_unlock(&kr->q_lock);
+	nm_kr_put(kr);
+}
 
+/* stop or enable a single tx ring */
+void
+netmap_set_txring(struct netmap_adapter *na, u_int ring_id, int stopped)
+{
+	if (stopped)
+		netmap_disable_ring(na->tx_rings + ring_id);
+	else
+		na->tx_rings[ring_id].nkr_stopped = 0;
+	/* nofify that the stopped state has changed. This is currently
+	 *only used by bwrap to propagate the state to its own krings.
+	 * (see netmap_bwrap_intr_notify).
+	 */
+	na->nm_notify(na, ring_id, NR_TX, NAF_DISABLE_NOTIFY);
+}
 
-int netmap_bridge = NM_BDG_BATCH; /* bridge batch size */
-SYSCTL_INT(_dev_netmap, OID_AUTO, bridge, CTLFLAG_RW, &netmap_bridge, 0 , "");
+/* stop or enable a single rx ring */
+void
+netmap_set_rxring(struct netmap_adapter *na, u_int ring_id, int stopped)
+{
+	if (stopped)
+		netmap_disable_ring(na->rx_rings + ring_id);
+	else
+		na->rx_rings[ring_id].nkr_stopped = 0;
+	/* nofify that the stopped state has changed. This is currently
+	 *only used by bwrap to propagate the state to its own krings.
+	 * (see netmap_bwrap_intr_notify).
+	 */
+	na->nm_notify(na, ring_id, NR_RX, NAF_DISABLE_NOTIFY);
+}
 
-#ifdef linux
 
-#define	refcount_acquire(_a)	atomic_add(1, (atomic_t *)_a)
-#define	refcount_release(_a)	atomic_dec_and_test((atomic_t *)_a)
+/* stop or enable all the rings of na */
+void
+netmap_set_all_rings(struct netmap_adapter *na, int stopped)
+{
+	int i;
+	u_int ntx, nrx;
 
-#else /* !linux */
+	if (!nm_netmap_on(na))
+		return;
 
-#ifdef __FreeBSD__
-#include <sys/endian.h>
-#include <sys/refcount.h>
-#endif /* __FreeBSD__ */
+	ntx = netmap_real_tx_rings(na);
+	nrx = netmap_real_rx_rings(na);
 
-#define prefetch(x)	__builtin_prefetch(x)
+	for (i = 0; i < ntx; i++) {
+		netmap_set_txring(na, i, stopped);
+	}
 
-#endif /* !linux */
+	for (i = 0; i < nrx; i++) {
+		netmap_set_rxring(na, i, stopped);
+	}
+}
 
 /*
- * These are used to handle reference counters for bridge ports.
+ * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
+ * to finish and prevents any new one from starting.  Call this before turning
+ * netmap mode off, or before removing the harware rings (e.g., on module
+ * onload).  As a rule of thumb for linux drivers, this should be placed near
+ * each napi_disable().
  */
-#define	ADD_BDG_REF(ifp)	refcount_acquire(&NA(ifp)->na_bdg_refcount)
-#define	DROP_BDG_REF(ifp)	refcount_release(&NA(ifp)->na_bdg_refcount)
+void
+netmap_disable_all_rings(struct ifnet *ifp)
+{
+	netmap_set_all_rings(NA(ifp), 1 /* stopped */);
+}
 
-static void bdg_netmap_attach(struct ifnet *ifp);
-static int bdg_netmap_reg(struct ifnet *ifp, int onoff);
-/* per-tx-queue entry */
-struct nm_bdg_fwd {	/* forwarding entry for a bridge */
-	void *buf;
-	uint64_t dst;	/* dst mask */
-	uint32_t src;	/* src index ? */
-	uint16_t len;	/* src len */
-};
-
-struct nm_hash_ent {
-	uint64_t	mac;	/* the top 2 bytes are the epoch */
-	uint64_t	ports;
-};
-
 /*
- * Interfaces for a bridge are all in bdg_ports[].
- * The array has fixed size, an empty entry does not terminate
- * the search. But lookups only occur on attach/detach so we
- * don't mind if they are slow.
- *
- * The bridge is non blocking on the transmit ports.
- *
- * bdg_lock protects accesses to the bdg_ports array.
+ * Convenience function used in drivers.  Re-enables rxsync and txsync on the
+ * adapter's rings In linux drivers, this should be placed near each
+ * napi_enable().
  */
-struct nm_bridge {
-	struct ifnet *bdg_ports[NM_BDG_MAXPORTS];
-	int n_ports;
-	uint64_t act_ports;
-	int freelist;	/* first buffer index */
-	NM_SELINFO_T si;	/* poll/select wait queue */
-	NM_LOCK_T bdg_lock;	/* protect the selinfo ? */
+void
+netmap_enable_all_rings(struct ifnet *ifp)
+{
+	netmap_set_all_rings(NA(ifp), 0 /* enabled */);
+}
 
-	/* the forwarding table, MAC+ports */
-	struct nm_hash_ent ht[NM_BDG_HASH];
 
-	int namelen;	/* 0 means free */
-	char basename[IFNAMSIZ];
-};
-
-struct nm_bridge nm_bridges[NM_BRIDGES];
-
-#define BDG_LOCK(b)	mtx_lock(&(b)->bdg_lock)
-#define BDG_UNLOCK(b)	mtx_unlock(&(b)->bdg_lock)
-
 /*
- * NA(ifp)->bdg_port	port index
+ * generic bound_checking function
  */
+u_int
+nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
+{
+	u_int oldv = *v;
+	const char *op = NULL;
 
-// XXX only for multiples of 64 bytes, non overlapped.
-static inline void
-pkt_copy(void *_src, void *_dst, int l)
-{
-        uint64_t *src = _src;
-        uint64_t *dst = _dst;
-        if (unlikely(l >= 1024)) {
-                bcopy(src, dst, l);
-                return;
-        }
-        for (; likely(l > 0); l-=64) {
-                *dst++ = *src++;
-                *dst++ = *src++;
-                *dst++ = *src++;
-                *dst++ = *src++;
-                *dst++ = *src++;
-                *dst++ = *src++;
-                *dst++ = *src++;
-                *dst++ = *src++;
-        }
+	if (dflt < lo)
+		dflt = lo;
+	if (dflt > hi)
+		dflt = hi;
+	if (oldv < lo) {
+		*v = dflt;
+		op = "Bump";
+	} else if (oldv > hi) {
+		*v = hi;
+		op = "Clamp";
+	}
+	if (op && msg)
+		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
+	return *v;
 }
 
+
 /*
- * locate a bridge among the existing ones.
- * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
- * We assume that this is called with a name of at least NM_NAME chars.
+ * packet-dump function, user-supplied or static buffer.
+ * The destination buffer must be at least 30+4*len
  */
-static struct nm_bridge *
-nm_find_bridge(const char *name)
+const char *
+nm_dump_buf(char *p, int len, int lim, char *dst)
 {
-	int i, l, namelen, e;
-	struct nm_bridge *b = NULL;
+	static char _dst[8192];
+	int i, j, i0;
+	static char hex[] ="0123456789abcdef";
+	char *o;	/* output position */
 
-	namelen = strlen(NM_NAME);	/* base length */
-	l = strlen(name);		/* actual length */
-	for (i = namelen + 1; i < l; i++) {
-		if (name[i] == ':') {
-			namelen = i;
-			break;
+#define P_HI(x)	hex[((x) & 0xf0)>>4]
+#define P_LO(x)	hex[((x) & 0xf)]
+#define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
+	if (!dst)
+		dst = _dst;
+	if (lim <= 0 || lim > len)
+		lim = len;
+	o = dst;
+	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
+	o += strlen(o);
+	/* hexdump routine */
+	for (i = 0; i < lim; ) {
+		sprintf(o, "%5d: ", i);
+		o += strlen(o);
+		memset(o, ' ', 48);
+		i0 = i;
+		for (j=0; j < 16 && i < lim; i++, j++) {
+			o[j*3] = P_HI(p[i]);
+			o[j*3+1] = P_LO(p[i]);
 		}
+		i = i0;
+		for (j=0; j < 16 && i < lim; i++, j++)
+			o[j + 48] = P_C(p[i]);
+		o[j+48] = '\n';
+		o += j+49;
 	}
-	if (namelen >= IFNAMSIZ)
-		namelen = IFNAMSIZ;
-	ND("--- prefix is '%.*s' ---", namelen, name);
-
-	/* use the first entry for locking */
-	BDG_LOCK(nm_bridges); // XXX do better
-	for (e = -1, i = 1; i < NM_BRIDGES; i++) {
-		b = nm_bridges + i;
-		if (b->namelen == 0)
-			e = i;	/* record empty slot */
-		else if (strncmp(name, b->basename, namelen) == 0) {
-			ND("found '%.*s' at %d", namelen, name, i);
-			break;
-		}
-	}
-	if (i == NM_BRIDGES) { /* all full */
-		if (e == -1) { /* no empty slot */
-			b = NULL;
-		} else {
-			b = nm_bridges + e;
-			strncpy(b->basename, name, namelen);
-			b->namelen = namelen;
-		}
-	}
-	BDG_UNLOCK(nm_bridges);
-	return b;
+	*o = '\0';
+#undef P_HI
+#undef P_LO
+#undef P_C
+	return dst;
 }
-#endif /* NM_BRIDGE */
 
 
 /*
@@ -298,16 +650,15 @@
  * Fetch configuration from the device, to cope with dynamic
  * reconfigurations after loading the module.
  */
-static int
+/* call with NMG_LOCK held */
+int
 netmap_update_config(struct netmap_adapter *na)
 {
-	struct ifnet *ifp = na->ifp;
 	u_int txr, txd, rxr, rxd;
 
 	txr = txd = rxr = rxd = 0;
-	if (na->nm_config) {
-		na->nm_config(ifp, &txr, &txd, &rxr, &rxd);
-	} else {
+	if (na->nm_config == NULL ||
+	    na->nm_config(na, &txr, &txd, &rxr, &rxd)) {
 		/* take whatever we had at init time */
 		txr = na->num_tx_rings;
 		txd = na->num_tx_desc;
@@ -318,15 +669,15 @@
 	if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
 	    na->num_rx_rings == rxr && na->num_rx_desc == rxd)
 		return 0; /* nothing changed */
-	if (netmap_verbose || na->refcount > 0) {
+	if (netmap_verbose || na->active_fds > 0) {
 		D("stored config %s: txring %d x %d, rxring %d x %d",
-			ifp->if_xname,
+			na->name,
 			na->num_tx_rings, na->num_tx_desc,
 			na->num_rx_rings, na->num_rx_desc);
 		D("new config %s: txring %d x %d, rxring %d x %d",
-			ifp->if_xname, txr, txd, rxr, rxd);
+			na->name, txr, txd, rxr, rxd);
 	}
-	if (na->refcount == 0) {
+	if (na->active_fds == 0) {
 		D("configuration changed (but fine)");
 		na->num_tx_rings = txr;
 		na->num_tx_desc = txd;
@@ -338,327 +689,418 @@
 	return 1;
 }
 
-/*------------- memory allocator -----------------*/
-#include "netmap_mem2.c"
-/*------------ end of memory allocator ----------*/
+/* kring->nm_sync callback for the host tx ring */
+static int
+netmap_txsync_to_host_compat(struct netmap_kring *kring, int flags)
+{
+	(void)flags; /* unused */
+	netmap_txsync_to_host(kring->na);
+	return 0;
+}
 
+/* kring->nm_sync callback for the host rx ring */
+static int
+netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags)
+{
+	(void)flags; /* unused */
+	netmap_rxsync_from_host(kring->na, NULL, NULL);
+	return 0;
+}
 
-/* Structure associated to each thread which registered an interface.
+
+
+/* create the krings array and initialize the fields common to all adapters.
+ * The array layout is this:
  *
- * The first 4 fields of this structure are written by NIOCREGIF and
- * read by poll() and NIOC?XSYNC.
- * There is low contention among writers (actually, a correct user program
- * should have no contention among writers) and among writers and readers,
- * so we use a single global lock to protect the structure initialization.
- * Since initialization involves the allocation of memory, we reuse the memory
- * allocator lock.
- * Read access to the structure is lock free. Readers must check that
- * np_nifp is not NULL before using the other fields.
- * If np_nifp is NULL initialization has not been performed, so they should
- * return an error to userlevel.
+ *                    +----------+
+ * na->tx_rings ----->|          | \
+ *                    |          |  } na->num_tx_ring
+ *                    |          | /
+ *                    +----------+
+ *                    |          |    host tx kring
+ * na->rx_rings ----> +----------+
+ *                    |          | \
+ *                    |          |  } na->num_rx_rings
+ *                    |          | /
+ *                    +----------+
+ *                    |          |    host rx kring
+ *                    +----------+
+ * na->tailroom ----->|          | \
+ *                    |          |  } tailroom bytes
+ *                    |          | /
+ *                    +----------+
  *
- * The ref_done field is used to regulate access to the refcount in the
- * memory allocator. The refcount must be incremented at most once for
- * each open("/dev/netmap"). The increment is performed by the first
- * function that calls netmap_get_memory() (currently called by
- * mmap(), NIOCGINFO and NIOCREGIF).
- * If the refcount is incremented, it is then decremented when the
- * private structure is destroyed.
+ * Note: for compatibility, host krings are created even when not needed.
+ * The tailroom space is currently used by vale ports for allocating leases.
  */
-struct netmap_priv_d {
-	struct netmap_if * volatile np_nifp;	/* netmap interface descriptor. */
+/* call with NMG_LOCK held */
+int
+netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
+{
+	u_int i, len, ndesc;
+	struct netmap_kring *kring;
+	u_int ntx, nrx;
 
-	struct ifnet	*np_ifp;	/* device for which we hold a reference */
-	int		np_ringid;	/* from the ioctl */
-	u_int		np_qfirst, np_qlast;	/* range of rings to scan */
-	uint16_t	np_txpoll;
+	/* account for the (possibly fake) host rings */
+	ntx = na->num_tx_rings + 1;
+	nrx = na->num_rx_rings + 1;
 
-	unsigned long	ref_done;	/* use with NMA_LOCK held */
-};
+	len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
 
+	na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (na->tx_rings == NULL) {
+		D("Cannot allocate krings");
+		return ENOMEM;
+	}
+	na->rx_rings = na->tx_rings + ntx;
 
-static int
-netmap_get_memory(struct netmap_priv_d* p)
-{
-	int error = 0;
-	NMA_LOCK();
-	if (!p->ref_done) {
-		error = netmap_memory_finalize();
-		if (!error)
-			p->ref_done = 1;
+	/*
+	 * All fields in krings are 0 except the one initialized below.
+	 * but better be explicit on important kring fields.
+	 */
+	ndesc = na->num_tx_desc;
+	for (i = 0; i < ntx; i++) { /* Transmit rings */
+		kring = &na->tx_rings[i];
+		bzero(kring, sizeof(*kring));
+		kring->na = na;
+		kring->ring_id = i;
+		kring->nkr_num_slots = ndesc;
+		if (i < na->num_tx_rings) {
+			kring->nm_sync = na->nm_txsync;
+		} else if (i == na->num_tx_rings) {
+			kring->nm_sync = netmap_txsync_to_host_compat;
+		}
+		/*
+		 * IMPORTANT: Always keep one slot empty.
+		 */
+		kring->rhead = kring->rcur = kring->nr_hwcur = 0;
+		kring->rtail = kring->nr_hwtail = ndesc - 1;
+		snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", na->name, i);
+		ND("ktx %s h %d c %d t %d",
+			kring->name, kring->rhead, kring->rcur, kring->rtail);
+		mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
+		init_waitqueue_head(&kring->si);
 	}
-	NMA_UNLOCK();
-	return error;
+
+	ndesc = na->num_rx_desc;
+	for (i = 0; i < nrx; i++) { /* Receive rings */
+		kring = &na->rx_rings[i];
+		bzero(kring, sizeof(*kring));
+		kring->na = na;
+		kring->ring_id = i;
+		kring->nkr_num_slots = ndesc;
+		if (i < na->num_rx_rings) {
+			kring->nm_sync = na->nm_rxsync;
+		} else if (i == na->num_rx_rings) {
+			kring->nm_sync = netmap_rxsync_from_host_compat;
+		}
+		kring->rhead = kring->rcur = kring->nr_hwcur = 0;
+		kring->rtail = kring->nr_hwtail = 0;
+		snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", na->name, i);
+		ND("krx %s h %d c %d t %d",
+			kring->name, kring->rhead, kring->rcur, kring->rtail);
+		mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
+		init_waitqueue_head(&kring->si);
+	}
+	init_waitqueue_head(&na->tx_si);
+	init_waitqueue_head(&na->rx_si);
+
+	na->tailroom = na->rx_rings + nrx;
+
+	return 0;
 }
 
-/*
- * File descriptor's private data destructor.
- *
- * Call nm_register(ifp,0) to stop netmap mode on the interface and
- * revert to normal operation. We expect that np_ifp has not gone.
- */
-/* call with NMA_LOCK held */
+
+#ifdef __FreeBSD__
 static void
-netmap_dtor_locked(void *data)
+netmap_knlist_destroy(NM_SELINFO_T *si)
 {
-	struct netmap_priv_d *priv = data;
-	struct ifnet *ifp = priv->np_ifp;
-	struct netmap_adapter *na = NA(ifp);
-	struct netmap_if *nifp = priv->np_nifp;
+	/* XXX kqueue(9) needed; these will mirror knlist_init. */
+	knlist_delete(&si->si.si_note, curthread, 0 /* not locked */ );
+	knlist_destroy(&si->si.si_note);
+	/* now we don't need the mutex anymore */
+	mtx_destroy(&si->m);
+}
+#endif /* __FreeBSD__ */
 
-	na->refcount--;
-	if (na->refcount <= 0) {	/* last instance */
-		u_int i, j, lim;
 
-		if (netmap_verbose)
-			D("deleting last instance for %s", ifp->if_xname);
-		/*
-		 * there is a race here with *_netmap_task() and
-		 * netmap_poll(), which don't run under NETMAP_REG_LOCK.
-		 * na->refcount == 0 && na->ifp->if_capenable & IFCAP_NETMAP
-		 * (aka NETMAP_DELETING(na)) are a unique marker that the
-		 * device is dying.
-		 * Before destroying stuff we sleep a bit, and then complete
-		 * the job. NIOCREG should realize the condition and
-		 * loop until they can continue; the other routines
-		 * should check the condition at entry and quit if
-		 * they cannot run.
-		 */
-		na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
-		tsleep(na, 0, "NIOCUNREG", 4);
-		na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
-		na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */
-		/* Wake up any sleeping threads. netmap_poll will
-		 * then return POLLERR
-		 */
-		for (i = 0; i < na->num_tx_rings + 1; i++)
-			selwakeuppri(&na->tx_rings[i].si, PI_NET);
-		for (i = 0; i < na->num_rx_rings + 1; i++)
-			selwakeuppri(&na->rx_rings[i].si, PI_NET);
-		selwakeuppri(&na->tx_si, PI_NET);
-		selwakeuppri(&na->rx_si, PI_NET);
-		/* release all buffers */
-		for (i = 0; i < na->num_tx_rings + 1; i++) {
-			struct netmap_ring *ring = na->tx_rings[i].ring;
-			lim = na->tx_rings[i].nkr_num_slots;
-			for (j = 0; j < lim; j++)
-				netmap_free_buf(nifp, ring->slot[j].buf_idx);
-			/* knlist_destroy(&na->tx_rings[i].si.si_note); */
-			mtx_destroy(&na->tx_rings[i].q_lock);
-		}
-		for (i = 0; i < na->num_rx_rings + 1; i++) {
-			struct netmap_ring *ring = na->rx_rings[i].ring;
-			lim = na->rx_rings[i].nkr_num_slots;
-			for (j = 0; j < lim; j++)
-				netmap_free_buf(nifp, ring->slot[j].buf_idx);
-			/* knlist_destroy(&na->rx_rings[i].si.si_note); */
-			mtx_destroy(&na->rx_rings[i].q_lock);
-		}
-		/* XXX kqueue(9) needed; these will mirror knlist_init. */
-		/* knlist_destroy(&na->tx_si.si_note); */
-		/* knlist_destroy(&na->rx_si.si_note); */
-		netmap_free_rings(na);
-		wakeup(na);
+/* undo the actions performed by netmap_krings_create */
+/* call with NMG_LOCK held */
+void
+netmap_krings_delete(struct netmap_adapter *na)
+{
+	struct netmap_kring *kring = na->tx_rings;
+
+	/* we rely on the krings layout described above */
+	for ( ; kring != na->tailroom; kring++) {
+		mtx_destroy(&kring->q_lock);
+		netmap_knlist_destroy(&kring->si);
 	}
-	netmap_if_free(nifp);
+	free(na->tx_rings, M_DEVBUF);
+	na->tx_rings = na->rx_rings = na->tailroom = NULL;
 }
 
+
+/*
+ * Destructor for NIC ports. They also have an mbuf queue
+ * on the rings connected to the host so we need to purge
+ * them first.
+ */
+/* call with NMG_LOCK held */
 static void
-nm_if_rele(struct ifnet *ifp)
+netmap_hw_krings_delete(struct netmap_adapter *na)
 {
-#ifndef NM_BRIDGE
-	if_rele(ifp);
-#else /* NM_BRIDGE */
-	int i, full;
-	struct nm_bridge *b;
+	struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
 
-	if (strncmp(ifp->if_xname, NM_NAME, sizeof(NM_NAME) - 1)) {
-		if_rele(ifp);
-		return;
-	}
-	if (!DROP_BDG_REF(ifp))
-		return;
-	b = ifp->if_bridge;
-	BDG_LOCK(nm_bridges);
-	BDG_LOCK(b);
-	ND("want to disconnect %s from the bridge", ifp->if_xname);
-	full = 0;
-	for (i = 0; i < NM_BDG_MAXPORTS; i++) {
-		if (b->bdg_ports[i] == ifp) {
-			b->bdg_ports[i] = NULL;
-			bzero(ifp, sizeof(*ifp));
-			free(ifp, M_DEVBUF);
-			break;
-		}
-		else if (b->bdg_ports[i] != NULL)
-			full = 1;
-	}
-	BDG_UNLOCK(b);
-	if (full == 0) {
-		ND("freeing bridge %d", b - nm_bridges);
-		b->namelen = 0;
-	}
-	BDG_UNLOCK(nm_bridges);
-	if (i == NM_BDG_MAXPORTS)
-		D("ouch, cannot find ifp to remove");
-#endif /* NM_BRIDGE */
+	ND("destroy sw mbq with len %d", mbq_len(q));
+	mbq_purge(q);
+	mbq_safe_destroy(q);
+	netmap_krings_delete(na);
 }
 
-static void
-netmap_dtor(void *data)
+
+/* create a new netmap_if for a newly registered fd.
+ * If this is the first registration of the adapter,
+ * also create the netmap rings and their in-kernel view,
+ * the netmap krings.
+ */
+/* call with NMG_LOCK held */
+static struct netmap_if*
+netmap_if_new(struct netmap_adapter *na)
 {
-	struct netmap_priv_d *priv = data;
-	struct ifnet *ifp = priv->np_ifp;
+	struct netmap_if *nifp;
 
-	NMA_LOCK();
-	if (ifp) {
-		struct netmap_adapter *na = NA(ifp);
+	if (netmap_update_config(na)) {
+		/* configuration mismatch, report and fail */
+		return NULL;
+	}
 
-		na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
-		netmap_dtor_locked(data);
-		na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
+	if (na->active_fds)	/* already registered */
+		goto final;
 
-		nm_if_rele(ifp); /* might also destroy *na */
+	/* create and init the krings arrays.
+	 * Depending on the adapter, this may also create
+	 * the netmap rings themselves
+	 */
+	if (na->nm_krings_create(na))
+		return NULL;
+
+	/* create all missing netmap rings */
+	if (netmap_mem_rings_create(na))
+		goto cleanup;
+
+final:
+
+	/* in all cases, create a new netmap if */
+	nifp = netmap_mem_if_new(na);
+	if (nifp == NULL)
+		goto cleanup;
+
+	return (nifp);
+
+cleanup:
+
+	if (na->active_fds == 0) {
+		netmap_mem_rings_delete(na);
+		na->nm_krings_delete(na);
 	}
-	if (priv->ref_done) {
-		netmap_memory_deref();
-	}
-	NMA_UNLOCK();
-	bzero(priv, sizeof(*priv));	/* XXX for safety */
-	free(priv, M_DEVBUF);
+
+	return NULL;
 }
 
-#ifdef __FreeBSD__
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pager.h>
-#include <vm/uma.h>
 
-static struct cdev_pager_ops saved_cdev_pager_ops;
-
+/* grab a reference to the memory allocator, if we don't have one already.  The
+ * reference is taken from the netmap_adapter registered with the priv.
+ */
+/* call with NMG_LOCK held */
 static int
-netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
-    vm_ooffset_t foff, struct ucred *cred, u_short *color)
+netmap_get_memory_locked(struct netmap_priv_d* p)
 {
-	if (netmap_verbose)
-		D("first mmap for %p", handle);
-	return saved_cdev_pager_ops.cdev_pg_ctor(handle,
-			size, prot, foff, cred, color);
+	struct netmap_mem_d *nmd;
+	int error = 0;
+
+	if (p->np_na == NULL) {
+		if (!netmap_mmap_unreg)
+			return ENODEV;
+		/* for compatibility with older versions of the API
+ 		 * we use the global allocator when no interface has been
+ 		 * registered
+ 		 */
+		nmd = &nm_mem;
+	} else {
+		nmd = p->np_na->nm_mem;
+	}
+	if (p->np_mref == NULL) {
+		error = netmap_mem_finalize(nmd, p->np_na);
+		if (!error)
+			p->np_mref = nmd;
+	} else if (p->np_mref != nmd) {
+		/* a virtual port has been registered, but previous
+ 		 * syscalls already used the global allocator.
+ 		 * We cannot continue
+ 		 */
+		error = ENODEV;
+	}
+	return error;
 }
 
-static void
-netmap_dev_pager_dtor(void *handle)
+
+/* call with NMG_LOCK *not* held */
+int
+netmap_get_memory(struct netmap_priv_d* p)
 {
-	saved_cdev_pager_ops.cdev_pg_dtor(handle);
-	ND("ready to release memory for %p", handle);
+	int error;
+	NMG_LOCK();
+	error = netmap_get_memory_locked(p);
+	NMG_UNLOCK();
+	return error;
 }
 
 
-static struct cdev_pager_ops netmap_cdev_pager_ops = {
-        .cdev_pg_ctor = netmap_dev_pager_ctor,
-        .cdev_pg_dtor = netmap_dev_pager_dtor,
-        .cdev_pg_fault = NULL,
-};
-
+/* call with NMG_LOCK held */
 static int
-netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
-	vm_size_t objsize,  vm_object_t *objp, int prot)
+netmap_have_memory_locked(struct netmap_priv_d* p)
 {
-	vm_object_t obj;
+	return p->np_mref != NULL;
+}
 
-	ND("cdev %p foff %jd size %jd objp %p prot %d", cdev,
-	    (intmax_t )*foff, (intmax_t )objsize, objp, prot);
-	obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
-            curthread->td_ucred);
-	ND("returns obj %p", obj);
-	if (obj == NULL)
-		return EINVAL;
-	if (saved_cdev_pager_ops.cdev_pg_fault == NULL) {
-		ND("initialize cdev_pager_ops");
-		saved_cdev_pager_ops = *(obj->un_pager.devp.ops);
-		netmap_cdev_pager_ops.cdev_pg_fault =
-			saved_cdev_pager_ops.cdev_pg_fault;
-	};
-	obj->un_pager.devp.ops = &netmap_cdev_pager_ops;
-	*objp = obj;
-	return 0;
+
+/* call with NMG_LOCK held */
+static void
+netmap_drop_memory_locked(struct netmap_priv_d* p)
+{
+	if (p->np_mref) {
+		netmap_mem_deref(p->np_mref, p->np_na);
+		p->np_mref = NULL;
+	}
 }
-#endif /* __FreeBSD__ */
 
 
 /*
- * mmap(2) support for the "netmap" device.
- *
- * Expose all the memory previously allocated by our custom memory
- * allocator: this way the user has only to issue a single mmap(2), and
- * can work on all the data structures flawlessly.
- *
- * Return 0 on success, -1 otherwise.
+ * Call nm_register(ifp,0) to stop netmap mode on the interface and
+ * revert to normal operation.
+ * The second argument is the nifp to work on. In some cases it is
+ * not attached yet to the netmap_priv_d so we need to pass it as
+ * a separate argument.
  */
-
-#ifdef __FreeBSD__
-static int
-netmap_mmap(__unused struct cdev *dev,
-#if __FreeBSD_version < 900000
-		vm_offset_t offset, vm_paddr_t *paddr, int nprot
-#else
-		vm_ooffset_t offset, vm_paddr_t *paddr, int nprot,
-		__unused vm_memattr_t *memattr
-#endif
-	)
+/* call with NMG_LOCK held */
+static void
+netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
 {
-	int error = 0;
-	struct netmap_priv_d *priv;
+	struct netmap_adapter *na = priv->np_na;
 
-	if (nprot & PROT_EXEC)
-		return (-1);	// XXX -1 or EINVAL ?
+	NMG_LOCK_ASSERT();
+	na->active_fds--;
+	if (na->active_fds <= 0) {	/* last instance */
 
-	error = devfs_get_cdevpriv((void **)&priv);
-	if (error == EBADF) {	/* called on fault, memory is initialized */
-		ND(5, "handling fault at ofs 0x%x", offset);
-		error = 0;
-	} else if (error == 0)	/* make sure memory is set */
-		error = netmap_get_memory(priv);
-	if (error)
-		return (error);
+		if (netmap_verbose)
+			D("deleting last instance for %s", na->name);
+		/*
+		 * (TO CHECK) This function is only called
+		 * when the last reference to this file descriptor goes
+		 * away. This means we cannot have any pending poll()
+		 * or interrupt routine operating on the structure.
+		 * XXX The file may be closed in a thread while
+		 * another thread is using it.
+		 * Linux keeps the file opened until the last reference
+		 * by any outstanding ioctl/poll or mmap is gone.
+		 * FreeBSD does not track mmap()s (but we do) and
+		 * wakes up any sleeping poll(). Need to check what
+		 * happens if the close() occurs while a concurrent
+		 * syscall is running.
+		 */
+		na->nm_register(na, 0); /* off, clear flags */
+		/* Wake up any sleeping threads. netmap_poll will
+		 * then return POLLERR
+		 * XXX The wake up now must happen during *_down(), when
+		 * we order all activities to stop. -gl
+		 */
+		netmap_knlist_destroy(&na->tx_si);
+		netmap_knlist_destroy(&na->rx_si);
 
-	ND("request for offset 0x%x", (uint32_t)offset);
-	*paddr = netmap_ofstophys(offset);
+		/* delete rings and buffers */
+		netmap_mem_rings_delete(na);
+		na->nm_krings_delete(na);
+	}
+	/* delete the nifp */
+	netmap_mem_if_delete(na, nifp);
+}
 
-	return (*paddr ? 0 : ENOMEM);
+/* call with NMG_LOCK held */
+static __inline int
+nm_tx_si_user(struct netmap_priv_d *priv)
+{
+	return (priv->np_na != NULL &&
+		(priv->np_txqlast - priv->np_txqfirst > 1));
 }
 
-static int
-netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+/* call with NMG_LOCK held */
+static __inline int
+nm_rx_si_user(struct netmap_priv_d *priv)
 {
-	if (netmap_verbose)
-		D("dev %p fflag 0x%x devtype %d td %p",
-			dev, fflag, devtype, td);
-	return 0;
+	return (priv->np_na != NULL &&
+		(priv->np_rxqlast - priv->np_rxqfirst > 1));
 }
 
-static int
-netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+
+/*
+ * Destructor of the netmap_priv_d, called when the fd has
+ * no active open() and mmap(). Also called in error paths.
+ *
+ * returns 1 if this is the last instance and we can free priv
+ */
+/* call with NMG_LOCK held */
+int
+netmap_dtor_locked(struct netmap_priv_d *priv)
 {
-	struct netmap_priv_d *priv;
-	int error;
+	struct netmap_adapter *na = priv->np_na;
 
-	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
-			      M_NOWAIT | M_ZERO);
-	if (priv == NULL)
-		return ENOMEM;
+#ifdef __FreeBSD__
+	/*
+	 * np_refcount is the number of active mmaps on
+	 * this file descriptor
+	 */
+	if (--priv->np_refcount > 0) {
+		return 0;
+	}
+#endif /* __FreeBSD__ */
+	if (!na) {
+	    return 1; //XXX is it correct?
+	}
+	netmap_do_unregif(priv, priv->np_nifp);
+	priv->np_nifp = NULL;
+	netmap_drop_memory_locked(priv);
+	if (priv->np_na) {
+		if (nm_tx_si_user(priv))
+			na->tx_si_users--;
+		if (nm_rx_si_user(priv))
+			na->rx_si_users--;
+		netmap_adapter_put(na);
+		priv->np_na = NULL;
+	}
+	return 1;
+}
 
-	error = devfs_set_cdevpriv(priv, netmap_dtor);
-	if (error)
-	        return error;
 
-	return 0;
+/* call with NMG_LOCK *not* held */
+void
+netmap_dtor(void *data)
+{
+	struct netmap_priv_d *priv = data;
+	int last_instance;
+
+	NMG_LOCK();
+	last_instance = netmap_dtor_locked(priv);
+	NMG_UNLOCK();
+	if (last_instance) {
+		bzero(priv, sizeof(*priv));	/* for safety */
+		free(priv, M_DEVBUF);
+	}
 }
-#endif /* __FreeBSD__ */
 
 
+
+
 /*
  * Handlers for synchronization of the queues from/to the host.
  * Netmap has two operating modes:
@@ -677,100 +1119,98 @@
  * might take a while before releasing the buffer.
  */
 
+
 /*
  * pass a chain of buffers to the host stack as coming from 'dst'
+ * We do not need to lock because the queue is private.
  */
 static void
-netmap_send_up(struct ifnet *dst, struct mbuf *head)
+netmap_send_up(struct ifnet *dst, struct mbq *q)
 {
 	struct mbuf *m;
 
 	/* send packets up, outside the lock */
-	while ((m = head) != NULL) {
-		head = head->m_nextpkt;
-		m->m_nextpkt = NULL;
+	while ((m = mbq_dequeue(q)) != NULL) {
 		if (netmap_verbose & NM_VERB_HOST)
 			D("sending up pkt %p size %d", m, MBUF_LEN(m));
 		NM_SEND_UP(dst, m);
 	}
+	mbq_destroy(q);
 }
 
-struct mbq {
-	struct mbuf *head;
-	struct mbuf *tail;
-	int count;
-};
 
 /*
  * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
- * Run from hwcur to cur - reserved
+ * Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
+ * and pass them up. Drop remaining packets in the unlikely event
+ * of an mbuf shortage.
  */
 static void
 netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
 {
-	/* Take packets from hwcur to cur-reserved and pass them up.
-	 * In case of no buffers we give up. At the end of the loop,
-	 * the queue is drained in all cases.
-	 * XXX handle reserved
-	 */
-	int k = kring->ring->cur - kring->ring->reserved;
-	u_int n, lim = kring->nkr_num_slots - 1;
-	struct mbuf *m, *tail = q->tail;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->ring->head;
+	u_int n;
+	struct netmap_adapter *na = kring->na;
 
-	if (k < 0)
-		k = k + kring->nkr_num_slots;
-	for (n = kring->nr_hwcur; n != k;) {
+	for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
+		struct mbuf *m;
 		struct netmap_slot *slot = &kring->ring->slot[n];
 
-		n = (n == lim) ? 0 : n + 1;
 		if ((slot->flags & NS_FORWARD) == 0 && !force)
 			continue;
-		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) {
-			D("bad pkt at %d len %d", n, slot->len);
+		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE(na)) {
+			RD(5, "bad pkt at %d len %d", n, slot->len);
 			continue;
 		}
 		slot->flags &= ~NS_FORWARD; // XXX needed ?
-		m = m_devget(NMB(slot), slot->len, 0, kring->na->ifp, NULL);
+		/* XXX TODO: adapt to the case of a multisegment packet */
+		m = m_devget(NMB(na, slot), slot->len, 0, na->ifp, NULL);
 
 		if (m == NULL)
 			break;
-		if (tail)
-			tail->m_nextpkt = m;
-		else
-			q->head = m;
-		tail = m;
-		q->count++;
-		m->m_nextpkt = NULL;
+		mbq_enqueue(q, m);
 	}
-	q->tail = tail;
 }
 
+
 /*
- * called under main lock to send packets from the host to the NIC
- * The host ring has packets from nr_hwcur to (cur - reserved)
- * to be sent down. We scan the tx rings, which have just been
- * flushed so nr_hwcur == cur. Pushing packets down means
- * increment cur and decrement avail.
- * XXX to be verified
+ * Send to the NIC rings packets marked NS_FORWARD between
+ * kring->nr_hwcur and kring->rhead
+ * Called under kring->rx_queue.lock on the sw rx ring,
  */
-static void
+static u_int
 netmap_sw_to_nic(struct netmap_adapter *na)
 {
 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
-	struct netmap_kring *k1 = &na->tx_rings[0];
-	int i, howmany, src_lim, dst_lim;
+	struct netmap_slot *rxslot = kring->ring->slot;
+	u_int i, rxcur = kring->nr_hwcur;
+	u_int const head = kring->rhead;
+	u_int const src_lim = kring->nkr_num_slots - 1;
+	u_int sent = 0;
 
-	howmany = kring->nr_hwavail;	/* XXX otherwise cur - reserved - nr_hwcur */
+	/* scan rings to find space, then fill as much as possible */
+	for (i = 0; i < na->num_tx_rings; i++) {
+		struct netmap_kring *kdst = &na->tx_rings[i];
+		struct netmap_ring *rdst = kdst->ring;
+		u_int const dst_lim = kdst->nkr_num_slots - 1;
 
-	src_lim = kring->nkr_num_slots;
-	for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
-		ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
-		dst_lim = k1->nkr_num_slots;
-		while (howmany > 0 && k1->ring->avail > 0) {
+		/* XXX do we trust ring or kring->rcur,rtail ? */
+		for (; rxcur != head && !nm_ring_empty(rdst);
+		     rxcur = nm_next(rxcur, src_lim) ) {
 			struct netmap_slot *src, *dst, tmp;
-			src = &kring->ring->slot[kring->nr_hwcur];
-			dst = &k1->ring->slot[k1->ring->cur];
+			u_int dst_cur = rdst->cur;
+
+			src = &rxslot[rxcur];
+			if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
+				continue;
+
+			sent++;
+
+			dst = &rdst->slot[dst_cur];
+
 			tmp = *src;
+
 			src->buf_idx = dst->buf_idx;
 			src->flags = NS_BUF_CHANGED;
 
@@ -777,60 +1217,53 @@
 			dst->buf_idx = tmp.buf_idx;
 			dst->len = tmp.len;
 			dst->flags = NS_BUF_CHANGED;
-			ND("out len %d buf %d from %d to %d",
-				dst->len, dst->buf_idx,
-				kring->nr_hwcur, k1->ring->cur);
 
-			if (++kring->nr_hwcur >= src_lim)
-				kring->nr_hwcur = 0;
-			howmany--;
-			kring->nr_hwavail--;
-			if (++k1->ring->cur >= dst_lim)
-				k1->ring->cur = 0;
-			k1->ring->avail--;
+			rdst->cur = nm_next(dst_cur, dst_lim);
 		}
-		kring->ring->cur = kring->nr_hwcur; // XXX
-		k1++;
+		/* if (sent) XXX txsync ? */
 	}
+	return sent;
 }
 
+
 /*
- * netmap_sync_to_host() passes packets up. We are called from a
+ * netmap_txsync_to_host() passes packets up. We are called from a
  * system call in user process context, and the only contention
  * can be among multiple user threads erroneously calling
  * this routine concurrently.
  */
-static void
-netmap_sync_to_host(struct netmap_adapter *na)
+void
+netmap_txsync_to_host(struct netmap_adapter *na)
 {
 	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
 	struct netmap_ring *ring = kring->ring;
-	u_int k, lim = kring->nkr_num_slots - 1;
-	struct mbq q = { NULL, NULL };
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
+	struct mbq q;
 
-	k = ring->cur;
-	if (k > lim) {
-		netmap_ring_reinit(kring);
-		return;
-	}
-	// na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
-
-	/* Take packets from hwcur to cur and pass them up.
+	/* Take packets from hwcur to head and pass them up.
+	 * force head = cur since netmap_grab_packets() stops at head
 	 * In case of no buffers we give up. At the end of the loop,
 	 * the queue is drained in all cases.
 	 */
-	netmap_grab_packets(kring, &q, 1);
-	kring->nr_hwcur = k;
-	kring->nr_hwavail = ring->avail = lim;
-	// na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
+	mbq_init(&q);
+	ring->cur = head;
+	netmap_grab_packets(kring, &q, 1 /* force */);
+	ND("have %d pkts in queue", mbq_len(&q));
+	kring->nr_hwcur = head;
+	kring->nr_hwtail = head + lim;
+	if (kring->nr_hwtail > lim)
+		kring->nr_hwtail -= lim + 1;
+	nm_txsync_finalize(kring);
 
-	netmap_send_up(na->ifp, q.head);
+	netmap_send_up(na->ifp, &q);
 }
 
+
 /*
  * rxsync backend for packets coming from the host stack.
- * They have been put in the queue by netmap_start() so we
- * need to protect access to the kring using a lock.
+ * They have been put in kring->rx_queue by netmap_transmit().
+ * We protect access to the kring using kring->rx_queue.lock
  *
  * This routine also does the selrecord if called from the poll handler
  * (we know because td != NULL).
@@ -837,133 +1270,429 @@
  *
  * NOTE: on linux, selrecord() is defined as a macro and uses pwait
  *     as an additional hidden argument.
+ * returns the number of packets delivered to tx queues in
+ * transparent mode, or a negative value if error
  */
-static void
-netmap_sync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
+int
+netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
 {
 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
 	struct netmap_ring *ring = kring->ring;
-	u_int j, n, lim = kring->nkr_num_slots;
-	u_int k = ring->cur, resvd = ring->reserved;
+	u_int nm_i, n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
+	int ret = 0;
+	struct mbq *q = &kring->rx_queue;
 
 	(void)pwait;	/* disable unused warnings */
-	na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
-	if (k >= lim) {
-		netmap_ring_reinit(kring);
-		return;
+	(void)td;
+
+	mbq_lock(q);
+
+	/* First part: import newly received packets */
+	n = mbq_len(q);
+	if (n) { /* grab packets from the queue */
+		struct mbuf *m;
+		uint32_t stop_i;
+
+		nm_i = kring->nr_hwtail;
+		stop_i = nm_prev(nm_i, lim);
+		while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
+			int len = MBUF_LEN(m);
+			struct netmap_slot *slot = &ring->slot[nm_i];
+
+			m_copydata(m, 0, len, NMB(na, slot));
+			ND("nm %d len %d", nm_i, len);
+			if (netmap_verbose)
+                                D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
+
+			slot->len = len;
+			slot->flags = kring->nkr_slot_flags;
+			nm_i = nm_next(nm_i, lim);
+			m_freem(m);
+		}
+		kring->nr_hwtail = nm_i;
 	}
-	/* new packets are already set in nr_hwavail */
-	/* skip past packets that userspace has released */
-	j = kring->nr_hwcur;
-	if (resvd > 0) {
-		if (resvd + ring->avail >= lim + 1) {
-			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
-			ring->reserved = resvd = 0; // XXX panic...
+
+	/*
+	 * Second part: skip past packets that userspace has released.
+	 */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) { /* something was released */
+		if (netmap_fwd || kring->ring->flags & NR_FORWARD)
+			ret = netmap_sw_to_nic(na);
+		kring->nr_hwcur = head;
+	}
+
+	nm_rxsync_finalize(kring);
+
+	/* access copies of cur,tail in the kring */
+	if (kring->rcur == kring->rtail && td) /* no bufs available */
+		OS_selrecord(td, &kring->si);
+
+	mbq_unlock(q);
+	return ret;
+}
+
+
+/* Get a netmap adapter for the port.
+ *
+ * If it is possible to satisfy the request, return 0
+ * with *na containing the netmap adapter found.
+ * Otherwise return an error code, with *na containing NULL.
+ *
+ * When the port is attached to a bridge, we always return
+ * EBUSY.
+ * Otherwise, if the port is already bound to a file descriptor,
+ * then we unconditionally return the existing adapter into *na.
+ * In all the other cases, we return (into *na) either native,
+ * generic or NULL, according to the following table:
+ *
+ *					native_support
+ * active_fds   dev.netmap.admode         YES     NO
+ * -------------------------------------------------------
+ *    >0              *                 NA(ifp) NA(ifp)
+ *
+ *     0        NETMAP_ADMODE_BEST      NATIVE  GENERIC
+ *     0        NETMAP_ADMODE_NATIVE    NATIVE   NULL
+ *     0        NETMAP_ADMODE_GENERIC   GENERIC GENERIC
+ *
+ */
+
+int
+netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
+{
+	/* generic support */
+	int i = netmap_admode;	/* Take a snapshot. */
+	int error = 0;
+	struct netmap_adapter *prev_na;
+	struct netmap_generic_adapter *gna;
+
+	*na = NULL; /* default */
+
+	/* reset in case of invalid value */
+	if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
+		i = netmap_admode = NETMAP_ADMODE_BEST;
+
+	if (NETMAP_CAPABLE(ifp)) {
+		prev_na = NA(ifp);
+		/* If an adapter already exists, return it if
+		 * there are active file descriptors or if
+		 * netmap is not forced to use generic
+		 * adapters.
+		 */
+		if (NETMAP_OWNED_BY_ANY(prev_na)
+			|| i != NETMAP_ADMODE_GENERIC
+			|| prev_na->na_flags & NAF_FORCE_NATIVE
+#ifdef WITH_PIPES
+			/* ugly, but we cannot allow an adapter switch
+			 * if some pipe is referring to this one
+			 */
+			|| prev_na->na_next_pipe > 0
+#endif
+		) {
+			*na = prev_na;
+			return 0;
 		}
-		k = (k >= resvd) ? k - resvd : k + lim - resvd;
-        }
-	if (j != k) {
-		n = k >= j ? k - j : k + lim - j;
-		kring->nr_hwavail -= n;
-		kring->nr_hwcur = k;
 	}
-	k = ring->avail = kring->nr_hwavail - resvd;
-	if (k == 0 && td)
-		selrecord(td, &kring->si);
-	if (k && (netmap_verbose & NM_VERB_HOST))
-		D("%d pkts from stack", k);
-	na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
+
+	/* If there isn't native support and netmap is not allowed
+	 * to use generic adapters, we cannot satisfy the request.
+	 */
+	if (!NETMAP_CAPABLE(ifp) && i == NETMAP_ADMODE_NATIVE)
+		return EOPNOTSUPP;
+
+	/* Otherwise, create a generic adapter and return it,
+	 * saving the previously used netmap adapter, if any.
+	 *
+	 * Note that here 'prev_na', if not NULL, MUST be a
+	 * native adapter, and CANNOT be a generic one. This is
+	 * true because generic adapters are created on demand, and
+	 * destroyed when not used anymore. Therefore, if the adapter
+	 * currently attached to an interface 'ifp' is generic, it
+	 * must be that
+	 * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))).
+	 * Consequently, if NA(ifp) is generic, we will enter one of
+	 * the branches above. This ensures that we never override
+	 * a generic adapter with another generic adapter.
+	 */
+	prev_na = NA(ifp);
+	error = generic_netmap_attach(ifp);
+	if (error)
+		return error;
+
+	*na = NA(ifp);
+	gna = (struct netmap_generic_adapter*)NA(ifp);
+	gna->prev = prev_na; /* save old na */
+	if (prev_na != NULL) {
+		ifunit_ref(ifp->if_xname);
+		// XXX add a refcount ?
+		netmap_adapter_get(prev_na);
+	}
+	ND("Created generic NA %p (prev %p)", gna, gna->prev);
+
+	return 0;
 }
 
 
 /*
- * get a refcounted reference to an interface.
- * Return ENXIO if the interface does not exist, EINVAL if netmap
- * is not supported by the interface.
- * If successful, hold a reference.
+ * MUST BE CALLED UNDER NMG_LOCK()
+ *
+ * Get a refcounted reference to a netmap adapter attached
+ * to the interface specified by nmr.
+ * This is always called in the execution of an ioctl().
+ *
+ * Return ENXIO if the interface specified by the request does
+ * not exist, ENOTSUP if netmap is not supported by the interface,
+ * EBUSY if the interface is already attached to a bridge,
+ * EINVAL if parameters are invalid, ENOMEM if needed resources
+ * could not be allocated.
+ * If successful, hold a reference to the netmap adapter.
+ *
+ * No reference is kept on the real interface, which may then
+ * disappear at any time.
  */
-static int
-get_ifp(const char *name, struct ifnet **ifp)
+int
+netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 {
-#ifdef NM_BRIDGE
-	struct ifnet *iter = NULL;
+	struct ifnet *ifp = NULL;
+	int error = 0;
+	struct netmap_adapter *ret = NULL;
 
-	do {
-		struct nm_bridge *b;
-		int i, l, cand = -1;
+	*na = NULL;     /* default return value */
 
-		if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1))
-			break;
-		b = nm_find_bridge(name);
-		if (b == NULL) {
-			D("no bridges available for '%s'", name);
-			return (ENXIO);
+	NMG_LOCK_ASSERT();
+
+	/* we cascade through all possibile types of netmap adapter.
+	 * All netmap_get_*_na() functions return an error and an na,
+	 * with the following combinations:
+	 *
+	 * error    na
+	 *   0	   NULL		type doesn't match
+	 *  !0	   NULL		type matches, but na creation/lookup failed
+	 *   0	  !NULL		type matches and na created/found
+	 *  !0    !NULL		impossible
+	 */
+
+	/* try to see if this is a monitor port */
+	error = netmap_get_monitor_na(nmr, na, create);
+	if (error || *na != NULL)
+		return error;
+
+	/* try to see if this is a pipe port */
+	error = netmap_get_pipe_na(nmr, na, create);
+	if (error || *na != NULL)
+		return error;
+
+	/* try to see if this is a bridge port */
+	error = netmap_get_bdg_na(nmr, na, create);
+	if (error)
+		return error;
+
+	if (*na != NULL) /* valid match in netmap_get_bdg_na() */
+		goto pipes;
+
+	/*
+	 * This must be a hardware na, lookup the name in the system.
+	 * Note that by hardware we actually mean "it shows up in ifconfig".
+	 * This may still be a tap, a veth/epair, or even a
+	 * persistent VALE port.
+	 */
+	ifp = ifunit_ref(nmr->nr_name);
+	if (ifp == NULL) {
+	        return ENXIO;
+	}
+
+	error = netmap_get_hw_na(ifp, &ret);
+	if (error)
+		goto out;
+
+	*na = ret;
+	netmap_adapter_get(ret);
+
+pipes:
+	/*
+	 * If we are opening a pipe whose parent was not in netmap mode,
+	 * we have to allocate the pipe array now.
+	 * XXX get rid of this clumsiness (2014-03-15)
+	 */
+	error = netmap_pipe_alloc(*na, nmr);
+
+out:
+	if (error && ret != NULL)
+		netmap_adapter_put(ret);
+
+	if (ifp)
+		if_rele(ifp); /* allow live unloading of drivers modules */
+
+	return error;
+}
+
+
+/*
+ * validate parameters on entry for *_txsync()
+ * Returns ring->cur if ok, or something >= kring->nkr_num_slots
+ * in case of error.
+ *
+ * rhead, rcur and rtail=hwtail are stored from previous round.
+ * hwcur is the next packet to send to the ring.
+ *
+ * We want
+ *    hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
+ *
+ * hwcur, rhead, rtail and hwtail are reliable
+ */
+u_int
+nm_txsync_prologue(struct netmap_kring *kring)
+{
+	struct netmap_ring *ring = kring->ring;
+	u_int head = ring->head; /* read only once */
+	u_int cur = ring->cur; /* read only once */
+	u_int n = kring->nkr_num_slots;
+
+	ND(5, "%s kcur %d ktail %d head %d cur %d tail %d",
+		kring->name,
+		kring->nr_hwcur, kring->nr_hwtail,
+		ring->head, ring->cur, ring->tail);
+#if 1 /* kernel sanity checks; but we can trust the kring. */
+	if (kring->nr_hwcur >= n || kring->rhead >= n ||
+	    kring->rtail >= n ||  kring->nr_hwtail >= n)
+		goto error;
+#endif /* kernel sanity checks */
+	/*
+	 * user sanity checks. We only use 'cur',
+	 * A, B, ... are possible positions for cur:
+	 *
+	 *  0    A  cur   B  tail  C  n-1
+	 *  0    D  tail  E  cur   F  n-1
+	 *
+	 * B, F, D are valid. A, C, E are wrong
+	 */
+	if (kring->rtail >= kring->rhead) {
+		/* want rhead <= head <= rtail */
+		if (head < kring->rhead || head > kring->rtail)
+			goto error;
+		/* and also head <= cur <= rtail */
+		if (cur < head || cur > kring->rtail)
+			goto error;
+	} else { /* here rtail < rhead */
+		/* we need head outside rtail .. rhead */
+		if (head > kring->rtail && head < kring->rhead)
+			goto error;
+
+		/* two cases now: head <= rtail or head >= rhead  */
+		if (head <= kring->rtail) {
+			/* want head <= cur <= rtail */
+			if (cur < head || cur > kring->rtail)
+				goto error;
+		} else { /* head >= rhead */
+			/* cur must be outside rtail..head */
+			if (cur > kring->rtail && cur < head)
+				goto error;
 		}
-		/* XXX locking */
-		BDG_LOCK(b);
-		/* lookup in the local list of ports */
-		for (i = 0; i < NM_BDG_MAXPORTS; i++) {
-			iter = b->bdg_ports[i];
-			if (iter == NULL) {
-				if (cand == -1)
-					cand = i; /* potential insert point */
-				continue;
-			}
-			if (!strcmp(iter->if_xname, name)) {
-				ADD_BDG_REF(iter);
-				ND("found existing interface");
-				BDG_UNLOCK(b);
-				break;
-			}
+	}
+	if (ring->tail != kring->rtail) {
+		RD(5, "tail overwritten was %d need %d",
+			ring->tail, kring->rtail);
+		ring->tail = kring->rtail;
+	}
+	kring->rhead = head;
+	kring->rcur = cur;
+	return head;
+
+error:
+	RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d",
+		kring->name,
+		kring->nr_hwcur,
+		kring->rcur, kring->nr_hwtail,
+		cur, ring->tail);
+	return n;
+}
+
+
+/*
+ * validate parameters on entry for *_rxsync()
+ * Returns ring->head if ok, kring->nkr_num_slots on error.
+ *
+ * For a valid configuration,
+ * hwcur <= head <= cur <= tail <= hwtail
+ *
+ * We only consider head and cur.
+ * hwcur and hwtail are reliable.
+ *
+ */
+u_int
+nm_rxsync_prologue(struct netmap_kring *kring)
+{
+	struct netmap_ring *ring = kring->ring;
+	uint32_t const n = kring->nkr_num_slots;
+	uint32_t head, cur;
+
+	ND("%s kc %d kt %d h %d c %d t %d",
+		kring->name,
+		kring->nr_hwcur, kring->nr_hwtail,
+		ring->head, ring->cur, ring->tail);
+	/*
+	 * Before storing the new values, we should check they do not
+	 * move backwards. However:
+	 * - head is not an issue because the previous value is hwcur;
+	 * - cur could in principle go back, however it does not matter
+	 *   because we are processing a brand new rxsync()
+	 */
+	cur = kring->rcur = ring->cur;	/* read only once */
+	head = kring->rhead = ring->head;	/* read only once */
+#if 1 /* kernel sanity checks */
+	if (kring->nr_hwcur >= n || kring->nr_hwtail >= n)
+		goto error;
+#endif /* kernel sanity checks */
+	/* user sanity checks */
+	if (kring->nr_hwtail >= kring->nr_hwcur) {
+		/* want hwcur <= rhead <= hwtail */
+		if (head < kring->nr_hwcur || head > kring->nr_hwtail)
+			goto error;
+		/* and also rhead <= rcur <= hwtail */
+		if (cur < head || cur > kring->nr_hwtail)
+			goto error;
+	} else {
+		/* we need rhead outside hwtail..hwcur */
+		if (head < kring->nr_hwcur && head > kring->nr_hwtail)
+			goto error;
+		/* two cases now: head <= hwtail or head >= hwcur  */
+		if (head <= kring->nr_hwtail) {
+			/* want head <= cur <= hwtail */
+			if (cur < head || cur > kring->nr_hwtail)
+				goto error;
+		} else {
+			/* cur must be outside hwtail..head */
+			if (cur < head && cur > kring->nr_hwtail)
+				goto error;
 		}
-		if (i < NM_BDG_MAXPORTS) /* already unlocked */
-			break;
-		if (cand == -1) {
-			D("bridge full, cannot create new port");
-no_port:
-			BDG_UNLOCK(b);
-			*ifp = NULL;
-			return EINVAL;
-		}
-		ND("create new bridge port %s", name);
-		/* space for forwarding list after the ifnet */
-		l = sizeof(*iter) +
-			 sizeof(struct nm_bdg_fwd)*NM_BDG_BATCH ;
-		iter = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
-		if (!iter)
-			goto no_port;
-		strcpy(iter->if_xname, name);
-		bdg_netmap_attach(iter);
-		b->bdg_ports[cand] = iter;
-		iter->if_bridge = b;
-		ADD_BDG_REF(iter);
-		BDG_UNLOCK(b);
-		ND("attaching virtual bridge %p", b);
-	} while (0);
-	*ifp = iter;
-	if (! *ifp)
-#endif /* NM_BRIDGE */
-	*ifp = ifunit_ref(name);
-	if (*ifp == NULL)
-		return (ENXIO);
-	/* can do this if the capability exists and if_pspare[0]
-	 * points to the netmap descriptor.
-	 */
-	if (NETMAP_CAPABLE(*ifp))
-		return 0;	/* valid pointer, we hold the refcount */
-	nm_if_rele(*ifp);
-	return EINVAL;	// not NETMAP capable
+	}
+	if (ring->tail != kring->rtail) {
+		RD(5, "%s tail overwritten was %d need %d",
+			kring->name,
+			ring->tail, kring->rtail);
+		ring->tail = kring->rtail;
+	}
+	return head;
+
+error:
+	RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d",
+		kring->nr_hwcur,
+		kring->rcur, kring->nr_hwtail,
+		kring->rhead, kring->rcur, ring->tail);
+	return n;
 }
 
 
 /*
  * Error routine called when txsync/rxsync detects an error.
- * Can't do much more than resetting cur = hwcur, avail = hwavail.
+ * Can't do much more than resetting head =cur = hwcur, tail = hwtail
  * Return 1 on reinit.
  *
  * This routine is only called by the upper half of the kernel.
  * It only reads hwcur (which is changed only by the upper half, too)
- * and hwavail (which may be changed by the lower half, but only on
+ * and hwtail (which may be changed by the lower half, but only on
  * a tx ring and only to increase it, so any error will be recovered
  * on the next call). For the above, we don't strictly need to call
  * it under lock.
@@ -975,91 +1704,302 @@
 	u_int i, lim = kring->nkr_num_slots - 1;
 	int errors = 0;
 
-	RD(10, "called for %s", kring->na->ifp->if_xname);
+	// XXX KASSERT nm_kr_tryget
+	RD(10, "called for %s", kring->name);
+	// XXX probably wrong to trust userspace
+	kring->rhead = ring->head;
+	kring->rcur  = ring->cur;
+	kring->rtail = ring->tail;
+
 	if (ring->cur > lim)
 		errors++;
+	if (ring->head > lim)
+		errors++;
+	if (ring->tail > lim)
+		errors++;
 	for (i = 0; i <= lim; i++) {
 		u_int idx = ring->slot[i].buf_idx;
 		u_int len = ring->slot[i].len;
 		if (idx < 2 || idx >= netmap_total_buffers) {
-			if (!errors++)
-				D("bad buffer at slot %d idx %d len %d ", i, idx, len);
+			RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
 			ring->slot[i].buf_idx = 0;
 			ring->slot[i].len = 0;
-		} else if (len > NETMAP_BUF_SIZE) {
+		} else if (len > NETMAP_BUF_SIZE(kring->na)) {
 			ring->slot[i].len = 0;
-			if (!errors++)
-				D("bad len %d at slot %d idx %d",
-					len, i, idx);
+			RD(5, "bad len at slot %d idx %d len %d", i, idx, len);
 		}
 	}
 	if (errors) {
-		int pos = kring - kring->na->tx_rings;
-		int n = kring->na->num_tx_rings + 1;
-
 		RD(10, "total %d errors", errors);
-		errors++;
-		RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
-			kring->na->ifp->if_xname,
-			pos < n ?  "TX" : "RX", pos < n ? pos : pos - n,
+		RD(10, "%s reinit, cur %d -> %d tail %d -> %d",
+			kring->name,
 			ring->cur, kring->nr_hwcur,
-			ring->avail, kring->nr_hwavail);
-		ring->cur = kring->nr_hwcur;
-		ring->avail = kring->nr_hwavail;
+			ring->tail, kring->nr_hwtail);
+		ring->head = kring->rhead = kring->nr_hwcur;
+		ring->cur  = kring->rcur  = kring->nr_hwcur;
+		ring->tail = kring->rtail = kring->nr_hwtail;
 	}
 	return (errors ? 1 : 0);
 }
 
+/* interpret the ringid and flags fields of an nmreq, by translating them
+ * into a pair of intervals of ring indices:
+ *
+ * [priv->np_txqfirst, priv->np_txqlast) and
+ * [priv->np_rxqfirst, priv->np_rxqlast)
+ *
+ */
+int
+netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
+{
+	struct netmap_adapter *na = priv->np_na;
+	u_int j, i = ringid & NETMAP_RING_MASK;
+	u_int reg = flags & NR_REG_MASK;
 
+	if (reg == NR_REG_DEFAULT) {
+		/* convert from old ringid to flags */
+		if (ringid & NETMAP_SW_RING) {
+			reg = NR_REG_SW;
+		} else if (ringid & NETMAP_HW_RING) {
+			reg = NR_REG_ONE_NIC;
+		} else {
+			reg = NR_REG_ALL_NIC;
+		}
+		D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
+	}
+	switch (reg) {
+	case NR_REG_ALL_NIC:
+	case NR_REG_PIPE_MASTER:
+	case NR_REG_PIPE_SLAVE:
+		priv->np_txqfirst = 0;
+		priv->np_txqlast = na->num_tx_rings;
+		priv->np_rxqfirst = 0;
+		priv->np_rxqlast = na->num_rx_rings;
+		ND("%s %d %d", "ALL/PIPE",
+			priv->np_rxqfirst, priv->np_rxqlast);
+		break;
+	case NR_REG_SW:
+	case NR_REG_NIC_SW:
+		if (!(na->na_flags & NAF_HOST_RINGS)) {
+			D("host rings not supported");
+			return EINVAL;
+		}
+		priv->np_txqfirst = (reg == NR_REG_SW ?
+			na->num_tx_rings : 0);
+		priv->np_txqlast = na->num_tx_rings + 1;
+		priv->np_rxqfirst = (reg == NR_REG_SW ?
+			na->num_rx_rings : 0);
+		priv->np_rxqlast = na->num_rx_rings + 1;
+		ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
+			priv->np_rxqfirst, priv->np_rxqlast);
+		break;
+	case NR_REG_ONE_NIC:
+		if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
+			D("invalid ring id %d", i);
+			return EINVAL;
+		}
+		/* if not enough rings, use the first one */
+		j = i;
+		if (j >= na->num_tx_rings)
+			j = 0;
+		priv->np_txqfirst = j;
+		priv->np_txqlast = j + 1;
+		j = i;
+		if (j >= na->num_rx_rings)
+			j = 0;
+		priv->np_rxqfirst = j;
+		priv->np_rxqlast = j + 1;
+		break;
+	default:
+		D("invalid regif type %d", reg);
+		return EINVAL;
+	}
+	priv->np_flags = (flags & ~NR_REG_MASK) | reg;
+
+	if (netmap_verbose) {
+		D("%s: tx [%d,%d) rx [%d,%d) id %d",
+			na->name,
+			priv->np_txqfirst,
+			priv->np_txqlast,
+			priv->np_rxqfirst,
+			priv->np_rxqlast,
+			i);
+	}
+	return 0;
+}
+
+
 /*
  * Set the ring ID. For devices with a single queue, a request
  * for all rings is the same as a single ring.
  */
 static int
-netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
+netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
 {
-	struct ifnet *ifp = priv->np_ifp;
-	struct netmap_adapter *na = NA(ifp);
-	u_int i = ringid & NETMAP_RING_MASK;
-	/* initially (np_qfirst == np_qlast) we don't want to lock */
-	int need_lock = (priv->np_qfirst != priv->np_qlast);
-	int lim = na->num_rx_rings;
+	struct netmap_adapter *na = priv->np_na;
+	int error;
 
-	if (na->num_tx_rings > lim)
-		lim = na->num_tx_rings;
-	if ( (ringid & NETMAP_HW_RING) && i >= lim) {
-		D("invalid ring id %d", i);
-		return (EINVAL);
+	error = netmap_interp_ringid(priv, ringid, flags);
+	if (error) {
+		return error;
 	}
-	if (need_lock)
-		na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
-	priv->np_ringid = ringid;
-	if (ringid & NETMAP_SW_RING) {
-		priv->np_qfirst = NETMAP_SW_RING;
-		priv->np_qlast = 0;
-	} else if (ringid & NETMAP_HW_RING) {
-		priv->np_qfirst = i;
-		priv->np_qlast = i + 1;
-	} else {
-		priv->np_qfirst = 0;
-		priv->np_qlast = NETMAP_HW_RING ;
-	}
+
 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
-	if (need_lock)
-		na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
-    if (netmap_verbose) {
-	if (ringid & NETMAP_SW_RING)
-		D("ringid %s set to SW RING", ifp->if_xname);
-	else if (ringid & NETMAP_HW_RING)
-		D("ringid %s set to HW RING %d", ifp->if_xname,
-			priv->np_qfirst);
-	else
-		D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
-    }
+
+	/* optimization: count the users registered for more than
+	 * one ring, which are the ones sleeping on the global queue.
+	 * The default netmap_notify() callback will then
+	 * avoid signaling the global queue if nobody is using it
+	 */
+	if (nm_tx_si_user(priv))
+		na->tx_si_users++;
+	if (nm_rx_si_user(priv))
+		na->rx_si_users++;
 	return 0;
 }
 
 /*
+ * possibly move the interface to netmap-mode.
+ * If success it returns a pointer to netmap_if, otherwise NULL.
+ * This must be called with NMG_LOCK held.
+ *
+ * The following na callbacks are called in the process:
+ *
+ * na->nm_config()			[by netmap_update_config]
+ * (get current number and size of rings)
+ *
+ *  	We have a generic one for linux (netmap_linux_config).
+ *  	The bwrap has to override this, since it has to forward
+ *  	the request to the wrapped adapter (netmap_bwrap_config).
+ *
+ *    	XXX netmap_if_new calls this again (2014-03-15)
+ *
+ * na->nm_krings_create()		[by netmap_if_new]
+ * (create and init the krings array)
+ *
+ * 	One of the following:
+ *
+ *	* netmap_hw_krings_create, 			(hw ports)
+ *		creates the standard layout for the krings
+ * 		and adds the mbq (used for the host rings).
+ *
+ * 	* netmap_vp_krings_create			(VALE ports)
+ * 		add leases and scratchpads
+ *
+ * 	* netmap_pipe_krings_create			(pipes)
+ * 		create the krings and rings of both ends and
+ * 		cross-link them
+ *
+ *      * netmap_monitor_krings_create 			(monitors)
+ *      	avoid allocating the mbq
+ *
+ *      * netmap_bwrap_krings_create			(bwraps)
+ *      	create both the brap krings array,
+ *      	the krings array of the wrapped adapter, and
+ *      	(if needed) the fake array for the host adapter
+ *
+ * na->nm_register(, 1)
+ * (put the adapter in netmap mode)
+ *
+ * 	This may be one of the following:
+ * 	(XXX these should be either all *_register or all *_reg 2014-03-15)
+ *
+ * 	* netmap_hw_register				(hw ports)
+ * 		checks that the ifp is still there, then calls
+ * 		the hardware specific callback;
+ *
+ * 	* netmap_vp_reg					(VALE ports)
+ *		If the port is connected to a bridge,
+ *		set the NAF_NETMAP_ON flag under the
+ *		bridge write lock.
+ *
+ *	* netmap_pipe_reg				(pipes)
+ *		inform the other pipe end that it is no
+ *		longer responsibile for the lifetime of this
+ *		pipe end
+ *
+ *	* netmap_monitor_reg				(monitors)
+ *		intercept the sync callbacks of the monitored
+ *		rings
+ *
+ *	* netmap_bwrap_register				(bwraps)
+ *		cross-link the bwrap and hwna rings,
+ *		forward the request to the hwna, override
+ *		the hwna notify callback (to get the frames
+ *		coming from outside go through the bridge).
+ *
+ * XXX maybe netmap_if_new() should be merged with this (2014-03-15).
+ *
+ */
+struct netmap_if *
+netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
+	uint16_t ringid, uint32_t flags, int *err)
+{
+	struct netmap_if *nifp = NULL;
+	int error, need_mem = 0;
+
+	NMG_LOCK_ASSERT();
+	/* ring configuration may have changed, fetch from the card */
+	netmap_update_config(na);
+	priv->np_na = na;     /* store the reference */
+	error = netmap_set_ringid(priv, ringid, flags);
+	if (error)
+		goto out;
+	/* ensure allocators are ready */
+	need_mem = !netmap_have_memory_locked(priv);
+	if (need_mem) {
+		error = netmap_get_memory_locked(priv);
+		ND("get_memory returned %d", error);
+		if (error)
+			goto out;
+	}
+	/* Allocate a netmap_if and, if necessary, all the netmap_ring's */
+	nifp = netmap_if_new(na);
+	if (nifp == NULL) { /* allocation failed */
+		error = ENOMEM;
+		goto out;
+	}
+	na->active_fds++;
+	if (!nm_netmap_on(na)) {
+		/* Netmap not active, set the card in netmap mode
+		 * and make it use the shared buffers.
+		 */
+		/* cache the allocator info in the na */
+		na->na_lut = netmap_mem_get_lut(na->nm_mem);
+		ND("%p->na_lut == %p", na, na->na_lut);
+		na->na_lut_objtotal = netmap_mem_get_buftotal(na->nm_mem);
+		na->na_lut_objsize = netmap_mem_get_bufsize(na->nm_mem);
+		error = na->nm_register(na, 1); /* mode on */
+		if (error) {
+			netmap_do_unregif(priv, nifp);
+			nifp = NULL;
+		}
+	}
+out:
+	*err = error;
+	if (error) {
+		/* we should drop the allocator, but only
+		 * if we were the ones who grabbed it
+		 */
+		if (need_mem)
+			netmap_drop_memory_locked(priv);
+		priv->np_na = NULL;
+	}
+	if (nifp != NULL) {
+		/*
+		 * advertise that the interface is ready bt setting ni_nifp.
+		 * The barrier is needed because readers (poll and *SYNC)
+		 * check for priv->np_nifp != NULL without locking
+		 */
+		wmb(); /* make sure previous writes are visible to all CPUs */
+		priv->np_nifp = nifp;
+	}
+	return nifp;
+}
+
+
+
+/*
  * ioctl(2) support for the "netmap" device.
  *
  * Following a list of accepted commands:
@@ -1066,41 +2006,40 @@
  * - NIOCGINFO
  * - SIOCGIFADDR	just for convenience
  * - NIOCREGIF
- * - NIOCUNREGIF
  * - NIOCTXSYNC
  * - NIOCRXSYNC
  *
  * Return 0 on success, errno otherwise.
  */
-static int
+int
 netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 	int fflag, struct thread *td)
 {
 	struct netmap_priv_d *priv = NULL;
-	struct ifnet *ifp;
 	struct nmreq *nmr = (struct nmreq *) data;
-	struct netmap_adapter *na;
+	struct netmap_adapter *na = NULL;
 	int error;
-	u_int i, lim;
+	u_int i, qfirst, qlast;
 	struct netmap_if *nifp;
+	struct netmap_kring *krings;
 
 	(void)dev;	/* UNUSED */
 	(void)fflag;	/* UNUSED */
-#ifdef linux
-#define devfs_get_cdevpriv(pp)				\
-	({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; 	\
-		(*pp ? 0 : ENOENT); })
 
-/* devfs_set_cdevpriv cannot fail on linux */
-#define devfs_set_cdevpriv(p, fn)				\
-	({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); })
-
-
-#define devfs_clear_cdevpriv()	do {				\
-		netmap_dtor(priv); ((struct file *)td)->private_data = 0;	\
-	} while (0)
-#endif /* linux */
-
+	if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
+		/* truncate name */
+		nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
+		if (nmr->nr_version != NETMAP_API) {
+			D("API mismatch for %s got %d need %d",
+				nmr->nr_name,
+				nmr->nr_version, NETMAP_API);
+			nmr->nr_version = NETMAP_API;
+		}
+		if (nmr->nr_version < NETMAP_MIN_API ||
+		    nmr->nr_version > NETMAP_MAX_API) {
+			return EINVAL;
+		}
+	}
 	CURVNET_SET(TD_TO_VNET(td));
 
 	error = devfs_get_cdevpriv((void **)&priv);
@@ -1111,144 +2050,116 @@
 		return (error == ENOENT ? ENXIO : error);
 	}
 
-	nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';	/* truncate name */
 	switch (cmd) {
 	case NIOCGINFO:		/* return capabilities etc */
-		if (nmr->nr_version != NETMAP_API) {
-			D("API mismatch got %d have %d",
-				nmr->nr_version, NETMAP_API);
-			nmr->nr_version = NETMAP_API;
-			error = EINVAL;
+		if (nmr->nr_cmd == NETMAP_BDG_LIST) {
+			error = netmap_bdg_ctl(nmr, NULL);
 			break;
 		}
-		/* update configuration */
-		error = netmap_get_memory(priv);
-		ND("get_memory returned %d", error);
-		if (error)
-			break;
-		/* memsize is always valid */
-		nmr->nr_memsize = nm_mem.nm_totalsize;
-		nmr->nr_offset = 0;
-		nmr->nr_rx_rings = nmr->nr_tx_rings = 0;
-		nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
-		if (nmr->nr_name[0] == '\0')	/* just get memory info */
-			break;
-		error = get_ifp(nmr->nr_name, &ifp); /* get a refcount */
-		if (error)
-			break;
-		na = NA(ifp); /* retrieve netmap_adapter */
-		netmap_update_config(na);
-		nmr->nr_rx_rings = na->num_rx_rings;
-		nmr->nr_tx_rings = na->num_tx_rings;
-		nmr->nr_rx_slots = na->num_rx_desc;
-		nmr->nr_tx_slots = na->num_tx_desc;
-		nm_if_rele(ifp);	/* return the refcount */
+
+		NMG_LOCK();
+		do {
+			/* memsize is always valid */
+			struct netmap_mem_d *nmd = &nm_mem;
+			u_int memflags;
+
+			if (nmr->nr_name[0] != '\0') {
+				/* get a refcount */
+				error = netmap_get_na(nmr, &na, 1 /* create */);
+				if (error)
+					break;
+				nmd = na->nm_mem; /* get memory allocator */
+			}
+
+			error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
+				&nmr->nr_arg2);
+			if (error)
+				break;
+			if (na == NULL) /* only memory info */
+				break;
+			nmr->nr_offset = 0;
+			nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
+			netmap_update_config(na);
+			nmr->nr_rx_rings = na->num_rx_rings;
+			nmr->nr_tx_rings = na->num_tx_rings;
+			nmr->nr_rx_slots = na->num_rx_desc;
+			nmr->nr_tx_slots = na->num_tx_desc;
+			netmap_adapter_put(na);
+		} while (0);
+		NMG_UNLOCK();
 		break;
 
 	case NIOCREGIF:
-		if (nmr->nr_version != NETMAP_API) {
-			nmr->nr_version = NETMAP_API;
+		/* possibly attach/detach NIC and VALE switch */
+		i = nmr->nr_cmd;
+		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
+				|| i == NETMAP_BDG_VNET_HDR
+				|| i == NETMAP_BDG_NEWIF
+				|| i == NETMAP_BDG_DELIF) {
+			error = netmap_bdg_ctl(nmr, NULL);
+			break;
+		} else if (i != 0) {
+			D("nr_cmd must be 0 not %d", i);
 			error = EINVAL;
 			break;
 		}
-		/* ensure allocators are ready */
-		error = netmap_get_memory(priv);
-		ND("get_memory returned %d", error);
-		if (error)
-			break;
 
 		/* protect access to priv from concurrent NIOCREGIF */
-		NMA_LOCK();
-		if (priv->np_ifp != NULL) {	/* thread already registered */
-			error = netmap_set_ringid(priv, nmr->nr_ringid);
-			NMA_UNLOCK();
-			break;
-		}
-		/* find the interface and a reference */
-		error = get_ifp(nmr->nr_name, &ifp); /* keep reference */
-		if (error) {
-			NMA_UNLOCK();
-			break;
-		}
-		na = NA(ifp); /* retrieve netmap adapter */
+		NMG_LOCK();
+		do {
+			u_int memflags;
 
-		for (i = 10; i > 0; i--) {
-			na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
-			if (!NETMAP_DELETING(na))
+			if (priv->np_na != NULL) {	/* thread already registered */
+				error = EBUSY;
 				break;
-			na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
-			tsleep(na, 0, "NIOCREGIF", hz/10);
-		}
-		if (i == 0) {
-			D("too many NIOCREGIF attempts, give up");
-			error = EINVAL;
-			nm_if_rele(ifp);	/* return the refcount */
-			NMA_UNLOCK();
-			break;
-		}
+			}
+			/* find the interface and a reference */
+			error = netmap_get_na(nmr, &na, 1 /* create */); /* keep reference */
+			if (error)
+				break;
+			if (NETMAP_OWNED_BY_KERN(na)) {
+				netmap_adapter_put(na);
+				error = EBUSY;
+				break;
+			}
+			nifp = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags, &error);
+			if (!nifp) {    /* reg. failed, release priv and ref */
+				netmap_adapter_put(na);
+				priv->np_nifp = NULL;
+				break;
+			}
+			priv->np_td = td; // XXX kqueue, debugging only
 
-		/* ring configuration may have changed, fetch from the card */
-		netmap_update_config(na);
-		priv->np_ifp = ifp;	/* store the reference */
-		error = netmap_set_ringid(priv, nmr->nr_ringid);
-		if (error)
-			goto error;
-		nifp = netmap_if_new(nmr->nr_name, na);
-		if (nifp == NULL) { /* allocation failed */
-			error = ENOMEM;
-		} else if (ifp->if_capenable & IFCAP_NETMAP) {
-			/* was already set */
-		} else {
-			/* Otherwise set the card in netmap mode
-			 * and make it use the shared buffers.
-			 */
-			for (i = 0 ; i < na->num_tx_rings + 1; i++)
-				mtx_init(&na->tx_rings[i].q_lock, "nm_txq_lock", MTX_NETWORK_LOCK, MTX_DEF);
-			for (i = 0 ; i < na->num_rx_rings + 1; i++) {
-				mtx_init(&na->rx_rings[i].q_lock, "nm_rxq_lock", MTX_NETWORK_LOCK, MTX_DEF);
-			}
-			error = na->nm_register(ifp, 1); /* mode on */
+			/* return the offset of the netmap_if object */
+			nmr->nr_rx_rings = na->num_rx_rings;
+			nmr->nr_tx_rings = na->num_tx_rings;
+			nmr->nr_rx_slots = na->num_rx_desc;
+			nmr->nr_tx_slots = na->num_tx_desc;
+			error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
+				&nmr->nr_arg2);
 			if (error) {
-				netmap_dtor_locked(priv);
-				netmap_if_free(nifp);
+				netmap_adapter_put(na);
+				break;
 			}
-		}
+			if (memflags & NETMAP_MEM_PRIVATE) {
+				*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
+			}
+			priv->np_txsi = (priv->np_txqlast - priv->np_txqfirst > 1) ?
+				&na->tx_si : &na->tx_rings[priv->np_txqfirst].si;
+			priv->np_rxsi = (priv->np_rxqlast - priv->np_rxqfirst > 1) ?
+				&na->rx_si : &na->rx_rings[priv->np_rxqfirst].si;
 
-		if (error) {	/* reg. failed, release priv and ref */
-error:
-			na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
-			nm_if_rele(ifp);	/* return the refcount */
-			priv->np_ifp = NULL;
-			priv->np_nifp = NULL;
-			NMA_UNLOCK();
-			break;
-		}
-
-		na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
-
-		/* the following assignment is a commitment.
-		 * Readers (i.e., poll and *SYNC) check for
-		 * np_nifp != NULL without locking
-		 */
-		wmb(); /* make sure previous writes are visible to all CPUs */
-		priv->np_nifp = nifp;
-		NMA_UNLOCK();
-
-		/* return the offset of the netmap_if object */
-		nmr->nr_rx_rings = na->num_rx_rings;
-		nmr->nr_tx_rings = na->num_tx_rings;
-		nmr->nr_rx_slots = na->num_rx_desc;
-		nmr->nr_tx_slots = na->num_tx_desc;
-		nmr->nr_memsize = nm_mem.nm_totalsize;
-		nmr->nr_offset = netmap_if_offset(nifp);
+			if (nmr->nr_arg3) {
+				D("requested %d extra buffers", nmr->nr_arg3);
+				nmr->nr_arg3 = netmap_extra_alloc(na,
+					&nifp->ni_bufs_head, nmr->nr_arg3);
+				D("got %d extra buffers", nmr->nr_arg3);
+			}
+			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
+		} while (0);
+		NMG_UNLOCK();
 		break;
 
-	case NIOCUNREGIF:
-		// XXX we have no data here ?
-		D("deprecated, data is %p", nmr);
-		error = EINVAL;
-		break;
-
 	case NIOCTXSYNC:
 	case NIOCRXSYNC:
 		nifp = priv->np_nifp;
@@ -1257,52 +2168,69 @@
 			error = ENXIO;
 			break;
 		}
-		rmb(); /* make sure following reads are not from cache */
+		mb(); /* make sure following reads are not from cache */
 
+		na = priv->np_na;      /* we have a reference */
 
-		ifp = priv->np_ifp;	/* we have a reference */
-
-		if (ifp == NULL) {
-			D("Internal error: nifp != NULL && ifp == NULL");
+		if (na == NULL) {
+			D("Internal error: nifp != NULL && na == NULL");
 			error = ENXIO;
 			break;
 		}
 
-		na = NA(ifp); /* retrieve netmap adapter */
-		if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
-			if (cmd == NIOCTXSYNC)
-				netmap_sync_to_host(na);
-			else
-				netmap_sync_from_host(na, NULL, NULL);
+		if (!nm_netmap_on(na)) {
+			error = ENXIO;
 			break;
 		}
-		/* find the last ring to scan */
-		lim = priv->np_qlast;
-		if (lim == NETMAP_HW_RING)
-			lim = (cmd == NIOCTXSYNC) ?
-			    na->num_tx_rings : na->num_rx_rings;
 
-		for (i = priv->np_qfirst; i < lim; i++) {
+		if (cmd == NIOCTXSYNC) {
+			krings = na->tx_rings;
+			qfirst = priv->np_txqfirst;
+			qlast = priv->np_txqlast;
+		} else {
+			krings = na->rx_rings;
+			qfirst = priv->np_rxqfirst;
+			qlast = priv->np_rxqlast;
+		}
+
+		for (i = qfirst; i < qlast; i++) {
+			struct netmap_kring *kring = krings + i;
+			if (nm_kr_tryget(kring)) {
+				error = EBUSY;
+				goto out;
+			}
 			if (cmd == NIOCTXSYNC) {
-				struct netmap_kring *kring = &na->tx_rings[i];
 				if (netmap_verbose & NM_VERB_TXSYNC)
 					D("pre txsync ring %d cur %d hwcur %d",
 					    i, kring->ring->cur,
 					    kring->nr_hwcur);
-				na->nm_txsync(ifp, i, 1 /* do lock */);
+				if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
+					netmap_ring_reinit(kring);
+				} else {
+					kring->nm_sync(kring, NAF_FORCE_RECLAIM);
+				}
 				if (netmap_verbose & NM_VERB_TXSYNC)
 					D("post txsync ring %d cur %d hwcur %d",
 					    i, kring->ring->cur,
 					    kring->nr_hwcur);
 			} else {
-				na->nm_rxsync(ifp, i, 1 /* do lock */);
+				kring->nm_sync(kring, NAF_FORCE_READ);
 				microtime(&na->rx_rings[i].ring->ts);
 			}
+			nm_kr_put(kring);
 		}
 
 		break;
 
+	case NIOCCONFIG:
+		error = netmap_bdg_config(nmr);
+		break;
 #ifdef __FreeBSD__
+	case FIONBIO:
+	case FIOASYNC:
+		ND("FIONBIO/FIOASYNC are no-ops");
+		break;
+
 	case BIOCIMMEDIATE:
 	case BIOCGHDRCMPLT:
 	case BIOCSHDRCMPLT:
@@ -1312,15 +2240,18 @@
 
 	default:	/* allow device-specific ioctls */
 	    {
-		struct socket so;
-		bzero(&so, sizeof(so));
-		error = get_ifp(nmr->nr_name, &ifp); /* keep reference */
-		if (error)
-			break;
-		so.so_vnet = ifp->if_vnet;
-		// so->so_proto not null.
-		error = ifioctl(&so, cmd, data, td);
-		nm_if_rele(ifp);
+		struct ifnet *ifp = ifunit_ref(nmr->nr_name);
+		if (ifp == NULL) {
+			error = ENXIO;
+		} else {
+			struct socket so;
+
+			bzero(&so, sizeof(so));
+			so.so_vnet = ifp->if_vnet;
+			// so->so_proto not null.
+			error = ifioctl(&so, cmd, data, td);
+			if_rele(ifp);
+		}
 		break;
 	    }
 
@@ -1329,6 +2260,7 @@
 		error = EOPNOTSUPP;
 #endif /* linux */
 	}
+out:
 
 	CURVNET_RESTORE();
 	return (error);
@@ -1341,7 +2273,7 @@
  * Can be called for one or more queues.
  * Return true the event mask corresponding to ready events.
  * If there are no ready events, do a selrecord on either individual
- * selfd or on the global one.
+ * selinfo or on the global one.
  * Device-dependent parts (locking and sync of tx/rx rings)
  * are done through callbacks.
  *
@@ -1349,22 +2281,40 @@
  * The first one is remapped to pwait as selrecord() uses the name as an
  * hidden argument.
  */
-static int
+int
 netmap_poll(struct cdev *dev, int events, struct thread *td)
 {
 	struct netmap_priv_d *priv = NULL;
 	struct netmap_adapter *na;
-	struct ifnet *ifp;
 	struct netmap_kring *kring;
-	u_int core_lock, i, check_all, want_tx, want_rx, revents = 0;
-	u_int lim_tx, lim_rx, host_forwarded = 0;
-	struct mbq q = { NULL, NULL, 0 };
-	enum {NO_CL, NEED_CL, LOCKED_CL }; /* see below */
+	u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
+	struct mbq q;		/* packets from hw queues to host stack */
 	void *pwait = dev;	/* linux compatibility */
+	int is_kevent = 0;
 
+	/*
+	 * In order to avoid nested locks, we need to "double check"
+	 * txsync and rxsync if we decide to do a selrecord().
+	 * retry_tx (and retry_rx, later) prevent looping forever.
+	 */
+	int retry_tx = 1, retry_rx = 1;
+
 	(void)pwait;
+	mbq_init(&q);
 
-	if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
+	/*
+	 * XXX kevent has curthread->tp_fop == NULL,
+	 * so devfs_get_cdevpriv() fails. We circumvent this by passing
+	 * priv as the first argument, which is also useful to avoid
+	 * the selrecord() which are not necessary in that case.
+	 */
+	if (devfs_get_cdevpriv((void **)&priv) != 0) {
+		is_kevent = 1;
+		if (netmap_verbose)
+			D("called from kevent");
+		priv = (struct netmap_priv_d *)dev;
+	}
+	if (priv == NULL)
 		return POLLERR;
 
 	if (priv->np_nifp == NULL) {
@@ -1373,114 +2323,50 @@
 	}
 	rmb(); /* make sure following reads are not from cache */
 
-	ifp = priv->np_ifp;
-	// XXX check for deleting() ?
-	if ( (ifp->if_capenable & IFCAP_NETMAP) == 0)
+	na = priv->np_na;
+
+	if (!nm_netmap_on(na))
 		return POLLERR;
 
 	if (netmap_verbose & 0x8000)
-		D("device %s events 0x%x", ifp->if_xname, events);
+		D("device %s events 0x%x", na->name, events);
 	want_tx = events & (POLLOUT | POLLWRNORM);
 	want_rx = events & (POLLIN | POLLRDNORM);
 
-	na = NA(ifp); /* retrieve netmap adapter */
 
-	lim_tx = na->num_tx_rings;
-	lim_rx = na->num_rx_rings;
-	/* how many queues we are scanning */
-	if (priv->np_qfirst == NETMAP_SW_RING) {
-		if (priv->np_txpoll || want_tx) {
-			/* push any packets up, then we are always ready */
-			kring = &na->tx_rings[lim_tx];
-			netmap_sync_to_host(na);
-			revents |= want_tx;
-		}
-		if (want_rx) {
-			kring = &na->rx_rings[lim_rx];
-			if (kring->ring->avail == 0)
-				netmap_sync_from_host(na, td, dev);
-			if (kring->ring->avail > 0) {
-				revents |= want_rx;
-			}
-		}
-		return (revents);
-	}
-
-	/* if we are in transparent mode, check also the host rx ring */
-	kring = &na->rx_rings[lim_rx];
-	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
-			&& want_rx
-			&& (netmap_fwd || kring->ring->flags & NR_FORWARD) ) {
-		if (kring->ring->avail == 0)
-			netmap_sync_from_host(na, td, dev);
-		if (kring->ring->avail > 0)
-			revents |= want_rx;
-	}
-
 	/*
-	 * check_all is set if the card has more than one queue and
-	 * the client is polling all of them. If true, we sleep on
-	 * the "global" selfd, otherwise we sleep on individual selfd
-	 * (we can only sleep on one of them per direction).
-	 * The interrupt routine in the driver should always wake on
-	 * the individual selfd, and also on the global one if the card
-	 * has more than one ring.
+	 * check_all_{tx|rx} are set if the card has more than one queue AND
+	 * the file descriptor is bound to all of them. If so, we sleep on
+	 * the "global" selinfo, otherwise we sleep on individual selinfo
+	 * (FreeBSD only allows two selinfo's per file descriptor).
+	 * The interrupt routine in the driver wake one or the other
+	 * (or both) depending on which clients are active.
 	 *
-	 * If the card has only one lock, we just use that.
-	 * If the card has separate ring locks, we just use those
-	 * unless we are doing check_all, in which case the whole
-	 * loop is wrapped by the global lock.
-	 * We acquire locks only when necessary: if poll is called
-	 * when buffers are available, we can just return without locks.
-	 *
 	 * rxsync() is only called if we run out of buffers on a POLLIN.
 	 * txsync() is called if we run out of buffers on POLLOUT, or
 	 * there are pending packets to send. The latter can be disabled
 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
 	 */
-	check_all = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1 || lim_rx > 1);
+	check_all_tx = nm_tx_si_user(priv);
+	check_all_rx = nm_rx_si_user(priv);
 
 	/*
-	 * core_lock indicates what to do with the core lock.
-	 * The core lock is used when either the card has no individual
-	 * locks, or it has individual locks but we are cheking all
-	 * rings so we need the core lock to avoid missing wakeup events.
-	 *
-	 * It has three possible states:
-	 * NO_CL	we don't need to use the core lock, e.g.
-	 *		because we are protected by individual locks.
-	 * NEED_CL	we need the core lock. In this case, when we
-	 *		call the lock routine, move to LOCKED_CL
-	 *		to remember to release the lock once done.
-	 * LOCKED_CL	core lock is set, so we need to release it.
-	 */
-	core_lock = (check_all || !na->separate_locks) ? NEED_CL : NO_CL;
-#ifdef NM_BRIDGE
-	/* the bridge uses separate locks */
-	if (na->nm_register == bdg_netmap_reg) {
-		ND("not using core lock for %s", ifp->if_xname);
-		core_lock = NO_CL;
-	}
-#endif /* NM_BRIDGE */
-	if (priv->np_qlast != NETMAP_HW_RING) {
-		lim_tx = lim_rx = priv->np_qlast;
-	}
-
-	/*
-	 * We start with a lock free round which is good if we have
-	 * data available. If this fails, then lock and call the sync
+	 * We start with a lock free round which is cheap if we have
+	 * slots available. If this fails, then lock and call the sync
 	 * routines.
 	 */
-	for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
+	for (i = priv->np_rxqfirst; want_rx && i < priv->np_rxqlast; i++) {
 		kring = &na->rx_rings[i];
-		if (kring->ring->avail > 0) {
+		/* XXX compare ring->cur and kring->tail */
+		if (!nm_ring_empty(kring->ring)) {
 			revents |= want_rx;
 			want_rx = 0;	/* also breaks the loop */
 		}
 	}
-	for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
+	for (i = priv->np_txqfirst; want_tx && i < priv->np_txqlast; i++) {
 		kring = &na->tx_rings[i];
-		if (kring->ring->avail > 0) {
+		/* XXX compare ring->cur and kring->tail */
+		if (!nm_ring_empty(kring->ring)) {
 			revents |= want_tx;
 			want_tx = 0;	/* also breaks the loop */
 		}
@@ -1487,64 +2373,94 @@
 	}
 
 	/*
-	 * If we to push packets out (priv->np_txpoll) or want_tx is
-	 * still set, we do need to run the txsync calls (on all rings,
-	 * to avoid that the tx rings stall).
+	 * If we want to push packets out (priv->np_txpoll) or
+	 * want_tx is still set, we must issue txsync calls
+	 * (on all rings, to avoid that the tx rings stall).
+	 * XXX should also check cur != hwcur on the tx rings.
+	 * Fortunately, normal tx mode has np_txpoll set.
 	 */
 	if (priv->np_txpoll || want_tx) {
+		/*
+		 * The first round checks if anyone is ready, if not
+		 * do a selrecord and another round to handle races.
+		 * want_tx goes to 0 if any space is found, and is
+		 * used to skip rings with no pending transmissions.
+		 */
 flush_tx:
-		for (i = priv->np_qfirst; i < lim_tx; i++) {
+		for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
+			int found = 0;
+
 			kring = &na->tx_rings[i];
-			/*
-			 * Skip the current ring if want_tx == 0
-			 * (we have already done a successful sync on
-			 * a previous ring) AND kring->cur == kring->hwcur
-			 * (there are no pending transmissions for this ring).
-			 */
 			if (!want_tx && kring->ring->cur == kring->nr_hwcur)
 				continue;
-			if (core_lock == NEED_CL) {
-				na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
-				core_lock = LOCKED_CL;
+			/* only one thread does txsync */
+			if (nm_kr_tryget(kring)) {
+				/* either busy or stopped
+				 * XXX if the ring is stopped, sleeping would
+				 * be better. In current code, however, we only
+				 * stop the rings for brief intervals (2014-03-14)
+				 */
+				if (netmap_verbose)
+					RD(2, "%p lost race on txring %d, ok",
+					    priv, i);
+				continue;
 			}
-			if (na->separate_locks)
-				na->nm_lock(ifp, NETMAP_TX_LOCK, i);
-			if (netmap_verbose & NM_VERB_TXSYNC)
-				D("send %d on %s %d",
-					kring->ring->cur,
-					ifp->if_xname, i);
-			if (na->nm_txsync(ifp, i, 0 /* no lock */))
+			if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
+				netmap_ring_reinit(kring);
 				revents |= POLLERR;
+			} else {
+				if (kring->nm_sync(kring, 0))
+					revents |= POLLERR;
+			}
 
-			/* Check avail/call selrecord only if called with POLLOUT */
-			if (want_tx) {
-				if (kring->ring->avail > 0) {
-					/* stop at the first ring. We don't risk
-					 * starvation.
-					 */
-					revents |= want_tx;
-					want_tx = 0;
-				} else if (!check_all)
-					selrecord(td, &kring->si);
+			/*
+			 * If we found new slots, notify potential
+			 * listeners on the same ring.
+			 * Since we just did a txsync, look at the copies
+			 * of cur,tail in the kring.
+			 */
+			found = kring->rcur != kring->rtail;
+			nm_kr_put(kring);
+			if (found) { /* notify other listeners */
+				revents |= want_tx;
+				want_tx = 0;
+				na->nm_notify(na, i, NR_TX, 0);
 			}
-			if (na->separate_locks)
-				na->nm_lock(ifp, NETMAP_TX_UNLOCK, i);
 		}
+		if (want_tx && retry_tx && !is_kevent) {
+			OS_selrecord(td, check_all_tx ?
+			    &na->tx_si : &na->tx_rings[priv->np_txqfirst].si);
+			retry_tx = 0;
+			goto flush_tx;
+		}
 	}
 
 	/*
-	 * now if want_rx is still set we need to lock and rxsync.
+	 * If want_rx is still set scan receive rings.
 	 * Do it on all rings because otherwise we starve.
 	 */
 	if (want_rx) {
-		for (i = priv->np_qfirst; i < lim_rx; i++) {
+		int send_down = 0; /* transparent mode */
+		/* two rounds here for race avoidance */
+do_retry_rx:
+		for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
+			int found = 0;
+
 			kring = &na->rx_rings[i];
-			if (core_lock == NEED_CL) {
-				na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
-				core_lock = LOCKED_CL;
+
+			if (nm_kr_tryget(kring)) {
+				if (netmap_verbose)
+					RD(2, "%p lost race on rxring %d, ok",
+					    priv, i);
+				continue;
 			}
-			if (na->separate_locks)
-				na->nm_lock(ifp, NETMAP_RX_LOCK, i);
+
+			/*
+			 * transparent mode support: collect packets
+			 * from the rxring(s).
+			 * XXX NR_FORWARD should only be read on
+			 * physical or NIC ports
+			 */
 			if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
 				ND(10, "forwarding some buffers up %d to %d",
 				    kring->nr_hwcur, kring->ring->cur);
@@ -1551,97 +2467,195 @@
 				netmap_grab_packets(kring, &q, netmap_fwd);
 			}
 
-			if (na->nm_rxsync(ifp, i, 0 /* no lock */))
+			if (kring->nm_sync(kring, 0))
 				revents |= POLLERR;
 			if (netmap_no_timestamp == 0 ||
 					kring->ring->flags & NR_TIMESTAMP) {
 				microtime(&kring->ring->ts);
 			}
-
-			if (kring->ring->avail > 0)
+			/* after an rxsync we can use kring->rcur, rtail */
+			found = kring->rcur != kring->rtail;
+			nm_kr_put(kring);
+			if (found) {
 				revents |= want_rx;
-			else if (!check_all)
-				selrecord(td, &kring->si);
-			if (na->separate_locks)
-				na->nm_lock(ifp, NETMAP_RX_UNLOCK, i);
+				retry_rx = 0;
+				na->nm_notify(na, i, NR_RX, 0);
+			}
 		}
-	}
-	if (check_all && revents == 0) { /* signal on the global queue */
-		if (want_tx)
-			selrecord(td, &na->tx_si);
-		if (want_rx)
-			selrecord(td, &na->rx_si);
-	}
 
-	/* forward host to the netmap ring */
-	kring = &na->rx_rings[lim_rx];
-	if (kring->nr_hwavail > 0)
-		ND("host rx %d has %d packets", lim_rx, kring->nr_hwavail);
-	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
-			&& (netmap_fwd || kring->ring->flags & NR_FORWARD)
-			 && kring->nr_hwavail > 0 && !host_forwarded) {
-		if (core_lock == NEED_CL) {
-			na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
-			core_lock = LOCKED_CL;
+		/* transparent mode XXX only during first pass ? */
+		if (na->na_flags & NAF_HOST_RINGS) {
+			kring = &na->rx_rings[na->num_rx_rings];
+			if (check_all_rx
+			    && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
+				/* XXX fix to use kring fields */
+				if (nm_ring_empty(kring->ring))
+					send_down = netmap_rxsync_from_host(na, td, dev);
+				if (!nm_ring_empty(kring->ring))
+					revents |= want_rx;
+			}
 		}
-		netmap_sw_to_nic(na);
-		host_forwarded = 1; /* prevent another pass */
-		want_rx = 0;
-		goto flush_tx;
+
+		if (retry_rx && !is_kevent)
+			OS_selrecord(td, check_all_rx ?
+			    &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si);
+		if (send_down > 0 || retry_rx) {
+			retry_rx = 0;
+			if (send_down)
+				goto flush_tx; /* and retry_rx */
+			else
+				goto do_retry_rx;
+		}
 	}
 
-	if (core_lock == LOCKED_CL)
-		na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
-	if (q.head)
-		netmap_send_up(na->ifp, q.head);
+	/*
+	 * Transparent mode: marked bufs on rx rings between
+	 * kring->nr_hwcur and ring->head
+	 * are passed to the other endpoint.
+	 *
+	 * In this mode we also scan the sw rxring, which in
+	 * turn passes packets up.
+	 *
+	 * XXX Transparent mode at the moment requires to bind all
+ 	 * rings to a single file descriptor.
+	 */
 
+	if (q.head && na->ifp != NULL)
+		netmap_send_up(na->ifp, &q);
+
 	return (revents);
 }
 
-/*------- driver support routines ------*/
 
-/*
- * default lock wrapper.
+/*-------------------- driver support routines -------------------*/
+
+static int netmap_hw_krings_create(struct netmap_adapter *);
+
+/* default notify callback */
+static int
+netmap_notify(struct netmap_adapter *na, u_int n_ring,
+	enum txrx tx, int flags)
+{
+	struct netmap_kring *kring;
+
+	if (tx == NR_TX) {
+		kring = na->tx_rings + n_ring;
+		OS_selwakeup(&kring->si, PI_NET);
+		/* optimization: avoid a wake up on the global
+		 * queue if nobody has registered for more
+		 * than one ring
+		 */
+		if (na->tx_si_users > 0)
+			OS_selwakeup(&na->tx_si, PI_NET);
+	} else {
+		kring = na->rx_rings + n_ring;
+		OS_selwakeup(&kring->si, PI_NET);
+		/* optimization: same as above */
+		if (na->rx_si_users > 0)
+			OS_selwakeup(&na->rx_si, PI_NET);
+	}
+	return 0;
+}
+
+
+/* called by all routines that create netmap_adapters.
+ * Attach na to the ifp (if any) and provide defaults
+ * for optional callbacks. Defaults assume that we
+ * are creating an hardware netmap_adapter.
  */
-static void
-netmap_lock_wrapper(struct ifnet *dev, int what, u_int queueid)
+int
+netmap_attach_common(struct netmap_adapter *na)
 {
-	struct netmap_adapter *na = NA(dev);
+	struct ifnet *ifp = na->ifp;
 
-	switch (what) {
-#ifdef linux	/* some system do not need lock on register */
-	case NETMAP_REG_LOCK:
-	case NETMAP_REG_UNLOCK:
-		break;
-#endif /* linux */
+	if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
+		D("%s: invalid rings tx %d rx %d",
+			na->name, na->num_tx_rings, na->num_rx_rings);
+		return EINVAL;
+	}
+	/* ifp is NULL for virtual adapters (bwrap, non-persistent VALE ports,
+	 * pipes, monitors). For bwrap we actually have a non-null ifp for
+	 * use by the external modules, but that is set after this
+	 * function has been called.
+	 * XXX this is ugly, maybe split this function in two (2014-03-14)
+	 */
+	if (ifp != NULL) {
+		WNA(ifp) = na;
 
-	case NETMAP_CORE_LOCK:
-		mtx_lock(&na->core_lock);
-		break;
+	/* the following is only needed for na that use the host port.
+	 * XXX do we have something similar for linux ?
+	 */
+#ifdef __FreeBSD__
+		na->if_input = ifp->if_input; /* for netmap_send_up */
+#endif /* __FreeBSD__ */
 
-	case NETMAP_CORE_UNLOCK:
-		mtx_unlock(&na->core_lock);
-		break;
+		NETMAP_SET_CAPABLE(ifp);
+	}
+	if (na->nm_krings_create == NULL) {
+		/* we assume that we have been called by a driver,
+		 * since other port types all provide their own
+		 * nm_krings_create
+		 */
+		na->nm_krings_create = netmap_hw_krings_create;
+		na->nm_krings_delete = netmap_hw_krings_delete;
+	}
+	if (na->nm_notify == NULL)
+		na->nm_notify = netmap_notify;
+	na->active_fds = 0;
 
-	case NETMAP_TX_LOCK:
-		mtx_lock(&na->tx_rings[queueid].q_lock);
-		break;
+	if (na->nm_mem == NULL)
+		/* use the global allocator */
+		na->nm_mem = &nm_mem;
+	if (na->nm_bdg_attach == NULL)
+		/* no special nm_bdg_attach callback. On VALE
+		 * attach, we need to interpose a bwrap
+		 */
+		na->nm_bdg_attach = netmap_bwrap_attach;
+	return 0;
+}
 
-	case NETMAP_TX_UNLOCK:
-		mtx_unlock(&na->tx_rings[queueid].q_lock);
-		break;
 
-	case NETMAP_RX_LOCK:
-		mtx_lock(&na->rx_rings[queueid].q_lock);
-		break;
+/* standard cleanup, called by all destructors */
+void
+netmap_detach_common(struct netmap_adapter *na)
+{
+	if (na->ifp != NULL)
+		WNA(na->ifp) = NULL; /* XXX do we need this? */
 
-	case NETMAP_RX_UNLOCK:
-		mtx_unlock(&na->rx_rings[queueid].q_lock);
-		break;
+	if (na->tx_rings) { /* XXX should not happen */
+		D("freeing leftover tx_rings");
+		na->nm_krings_delete(na);
 	}
+	netmap_pipe_dealloc(na);
+	if (na->na_flags & NAF_MEM_OWNER)
+		netmap_mem_private_delete(na->nm_mem);
+	bzero(na, sizeof(*na));
+	free(na, M_DEVBUF);
 }
 
+/* Wrapper for the register callback provided hardware drivers.
+ * na->ifp == NULL means the the driver module has been
+ * unloaded, so we cannot call into it.
+ * Note that module unloading, in our patched linux drivers,
+ * happens under NMG_LOCK and after having stopped all the
+ * nic rings (see netmap_detach). This provides sufficient
+ * protection for the other driver-provied callbacks
+ * (i.e., nm_config and nm_*xsync), that therefore don't need
+ * to wrapped.
+ */
+static int
+netmap_hw_register(struct netmap_adapter *na, int onoff)
+{
+	struct netmap_hw_adapter *hwna =
+		(struct netmap_hw_adapter*)na;
 
+	if (na->ifp == NULL)
+		return onoff ? ENXIO : 0;
+
+	return hwna->nm_hw_register(na, onoff);
+}
+
+
 /*
  * Initialize a ``netmap_adapter`` object created by driver on attach.
  * We allocate a block of memory with room for a struct netmap_adapter
@@ -1649,63 +2663,121 @@
  * of hardware rings):
  * krings	0..N-1	are for the hardware queues.
  * kring	N	is for the host stack queue
- * kring	N+1	is only used for the selinfo for all queues.
+ * kring	N+1	is only used for the selinfo for all queues. // XXX still true ?
  * Return 0 on success, ENOMEM otherwise.
- *
- * By default the receive and transmit adapter ring counts are both initialized
- * to num_queues.  na->num_tx_rings can be set for cards with different tx/rx
- * setups.
  */
 int
-netmap_attach(struct netmap_adapter *arg, int num_queues)
+netmap_attach(struct netmap_adapter *arg)
 {
-	struct netmap_adapter *na = NULL;
+	struct netmap_hw_adapter *hwna = NULL;
+	// XXX when is arg == NULL ?
 	struct ifnet *ifp = arg ? arg->ifp : NULL;
 
 	if (arg == NULL || ifp == NULL)
 		goto fail;
-	na = malloc(sizeof(*na), M_DEVBUF, M_NOWAIT | M_ZERO);
-	if (na == NULL)
+	hwna = malloc(sizeof(*hwna), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (hwna == NULL)
 		goto fail;
-	WNA(ifp) = na;
-	*na = *arg; /* copy everything, trust the driver to not pass junk */
-	NETMAP_SET_CAPABLE(ifp);
-	if (na->num_tx_rings == 0)
-		na->num_tx_rings = num_queues;
-	na->num_rx_rings = num_queues;
-	na->refcount = na->na_single = na->na_multi = 0;
-	/* Core lock initialized here, others after netmap_if_new. */
-	mtx_init(&na->core_lock, "netmap core lock", MTX_NETWORK_LOCK, MTX_DEF);
-	if (na->nm_lock == NULL) {
-		ND("using default locks for %s", ifp->if_xname);
-		na->nm_lock = netmap_lock_wrapper;
+	hwna->up = *arg;
+	hwna->up.na_flags |= NAF_HOST_RINGS;
+	strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
+	hwna->nm_hw_register = hwna->up.nm_register;
+	hwna->up.nm_register = netmap_hw_register;
+	if (netmap_attach_common(&hwna->up)) {
+		free(hwna, M_DEVBUF);
+		goto fail;
 	}
+	netmap_adapter_get(&hwna->up);
 
 #ifdef linux
-	if (!ifp->netdev_ops) {
-		D("ouch, we cannot override netdev_ops");
-		goto fail;
+	if (ifp->netdev_ops) {
+		/* prepare a clone of the netdev ops */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
+		hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
+#else
+		hwna->nm_ndo = *ifp->netdev_ops;
+#endif
 	}
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
-	/* if needed, prepare a clone of the entire netdev ops */
-	na->nm_ndo = *ifp->netdev_ops;
-#endif /* 2.6.28 and above */
-	na->nm_ndo.ndo_start_xmit = linux_netmap_start;
+	hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
+	if (ifp->ethtool_ops) {
+		hwna->nm_eto = *ifp->ethtool_ops;
+	}
+	hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
+#ifdef ETHTOOL_SCHANNELS
+	hwna->nm_eto.set_channels = linux_netmap_set_channels;
+#endif
+	if (arg->nm_config == NULL) {
+		hwna->up.nm_config = netmap_linux_config;
+	}
 #endif /* linux */
 
-	D("success for %s", ifp->if_xname);
+#ifdef __FreeBSD__
+	if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
+	    hwna->up.num_tx_rings, hwna->up.num_tx_desc,
+	    hwna->up.num_rx_rings, hwna->up.num_rx_desc);
+#else
+	D("success for %s tx %d/%d rx %d/%d queues/slots",
+		hwna->up.name,
+		hwna->up.num_tx_rings, hwna->up.num_tx_desc,
+		hwna->up.num_rx_rings, hwna->up.num_rx_desc
+		);
+#endif
 	return 0;
 
 fail:
-	D("fail, arg %p ifp %p na %p", arg, ifp, na);
-	netmap_detach(ifp);
-	return (na ? EINVAL : ENOMEM);
+	D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
+	if (ifp)
+		netmap_detach(ifp);
+	return (hwna ? EINVAL : ENOMEM);
 }
 
 
+void
+NM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
+{
+	if (!na) {
+		return;
+	}
+
+	refcount_acquire(&na->na_refcount);
+}
+
+
+/* returns 1 iff the netmap_adapter is destroyed */
+int
+NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
+{
+	if (!na)
+		return 1;
+
+	if (!refcount_release(&na->na_refcount))
+		return 0;
+
+	if (na->nm_dtor)
+		na->nm_dtor(na);
+
+	netmap_detach_common(na);
+
+	return 1;
+}
+
+/* nm_krings_create callback for all hardware native adapters */
+int
+netmap_hw_krings_create(struct netmap_adapter *na)
+{
+	int ret = netmap_krings_create(na, 0);
+	if (ret == 0) {
+		/* initialize the mbq for the sw rx ring */
+		mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
+		ND("initialized sw rx queue %d", na->num_rx_rings);
+	}
+	return ret;
+}
+
+
+
 /*
- * Free the allocated memory linked to the given ``netmap_adapter``
- * object.
+ * Called on module unload by the netmap-enabled drivers
  */
 void
 netmap_detach(struct ifnet *ifp)
@@ -1715,15 +2787,19 @@
 	if (!na)
 		return;
 
-	mtx_destroy(&na->core_lock);
-
-	if (na->tx_rings) { /* XXX should not happen */
-		D("freeing leftover tx_rings");
-		free(na->tx_rings, M_DEVBUF);
+	NMG_LOCK();
+	netmap_disable_all_rings(ifp);
+	if (!netmap_adapter_put(na)) {
+		/* someone is still using the adapter,
+		 * tell them that the interface is gone
+		 */
+		na->ifp = NULL;
+		// XXX also clear NAF_NATIVE_ON ?
+		na->na_flags &= ~NAF_NETMAP_ON;
+		/* give them a chance to notice */
+		netmap_enable_all_rings(ifp);
 	}
-	bzero(na, sizeof(*na));
-	WNA(ifp) = NULL;
-	free(na, M_DEVBUF);
+	NMG_UNLOCK();
 }
 
 
@@ -1730,54 +2806,78 @@
 /*
  * Intercept packets from the network stack and pass them
  * to netmap as incoming packets on the 'software' ring.
- * We are not locked when called.
+ *
+ * We only store packets in a bounded mbq and then copy them
+ * in the relevant rxsync routine.
+ *
+ * We rely on the OS to make sure that the ifp and na do not go
+ * away (typically the caller checks for IFF_DRV_RUNNING or the like).
+ * In nm_register() or whenever there is a reinitialization,
+ * we make sure to make the mode change visible here.
  */
 int
-netmap_start(struct ifnet *ifp, struct mbuf *m)
+netmap_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
-	u_int i, len = MBUF_LEN(m);
-	u_int error = EBUSY, lim = kring->nkr_num_slots - 1;
-	struct netmap_slot *slot;
+	struct netmap_kring *kring;
+	u_int len = MBUF_LEN(m);
+	u_int error = ENOBUFS;
+	struct mbq *q;
+	int space;
 
-	if (netmap_verbose & NM_VERB_HOST)
-		D("%s packet %d len %d from the stack", ifp->if_xname,
-			kring->nr_hwcur + kring->nr_hwavail, len);
-	if (len > NETMAP_BUF_SIZE) { /* too long for us */
-		D("%s from_host, drop packet size %d > %d", ifp->if_xname,
-			len, NETMAP_BUF_SIZE);
-		m_freem(m);
-		return EINVAL;
+	// XXX [Linux] we do not need this lock
+	// if we follow the down/configure/up protocol -gl
+	// mtx_lock(&na->core_lock);
+
+	if (!nm_netmap_on(na)) {
+		D("%s not in netmap mode anymore", na->name);
+		error = ENXIO;
+		goto done;
 	}
-	na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
-	if (kring->nr_hwavail >= lim) {
-		if (netmap_verbose)
-			D("stack ring %s full\n", ifp->if_xname);
-		goto done;	/* no space */
+
+	kring = &na->rx_rings[na->num_rx_rings];
+	q = &kring->rx_queue;
+
+	// XXX reconsider long packets if we handle fragments
+	if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
+		D("%s from_host, drop packet size %d > %d", na->name,
+			len, NETMAP_BUF_SIZE(na));
+		goto done;
 	}
 
-	/* compute the insert position */
-	i = kring->nr_hwcur + kring->nr_hwavail;
-	if (i > lim)
-		i -= lim + 1;
-	slot = &kring->ring->slot[i];
-	m_copydata(m, 0, len, NMB(slot));
-	slot->len = len;
-	slot->flags = kring->nkr_slot_flags;
-	kring->nr_hwavail++;
-	if (netmap_verbose  & NM_VERB_HOST)
-		D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);
-	selwakeuppri(&kring->si, PI_NET);
-	error = 0;
+	/* protect against rxsync_from_host(), netmap_sw_to_nic()
+	 * and maybe other instances of netmap_transmit (the latter
+	 * not possible on Linux).
+	 * Also avoid overflowing the queue.
+	 */
+	mbq_lock(q);
+
+        space = kring->nr_hwtail - kring->nr_hwcur;
+        if (space < 0)
+                space += kring->nkr_num_slots;
+	if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
+		RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
+			na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
+			len, m);
+	} else {
+		mbq_enqueue(q, m);
+		ND(10, "%s %d bufs in queue len %d m %p",
+			na->name, mbq_len(q), len, m);
+		/* notify outside the lock */
+		m = NULL;
+		error = 0;
+	}
+	mbq_unlock(q);
+
 done:
-	na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
-
-	/* release the mbuf in either cases of success or failure. As an
-	 * alternative, put the mbuf in a free list and free the list
-	 * only when really necessary.
+	if (m)
+		m_freem(m);
+	/* unconditionally wake up listeners */
+	na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
+	/* this is normally netmap_notify(), but for nics
+	 * connected to a bridge it is netmap_bwrap_intr_notify(),
+	 * that possibly forwards the frames through the switch
 	 */
-	m_freem(m);
 
 	return (error);
 }
@@ -1786,42 +2886,58 @@
 /*
  * netmap_reset() is called by the driver routines when reinitializing
  * a ring. The driver is in charge of locking to protect the kring.
- * If netmap mode is not set just return NULL.
+ * If native netmap mode is not set just return NULL.
  */
 struct netmap_slot *
-netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
+netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
 	u_int new_cur)
 {
 	struct netmap_kring *kring;
 	int new_hwofs, lim;
 
-	if (na == NULL)
-		return NULL;	/* no netmap support here */
-	if (!(na->ifp->if_capenable & IFCAP_NETMAP))
+	if (!nm_native_on(na)) {
+		ND("interface not in native netmap mode");
 		return NULL;	/* nothing to reinitialize */
+	}
 
+	/* XXX note- in the new scheme, we are not guaranteed to be
+	 * under lock (e.g. when called on a device reset).
+	 * In this case, we should set a flag and do not trust too
+	 * much the values. In practice: TODO
+	 * - set a RESET flag somewhere in the kring
+	 * - do the processing in a conservative way
+	 * - let the *sync() fixup at the end.
+	 */
 	if (tx == NR_TX) {
 		if (n >= na->num_tx_rings)
 			return NULL;
 		kring = na->tx_rings + n;
+		// XXX check whether we should use hwcur or rcur
 		new_hwofs = kring->nr_hwcur - new_cur;
 	} else {
 		if (n >= na->num_rx_rings)
 			return NULL;
 		kring = na->rx_rings + n;
-		new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
+		new_hwofs = kring->nr_hwtail - new_cur;
 	}
 	lim = kring->nkr_num_slots - 1;
 	if (new_hwofs > lim)
 		new_hwofs -= lim + 1;
 
-	/* Alwayws set the new offset value and realign the ring. */
+	/* Always set the new offset value and realign the ring. */
+	if (netmap_verbose)
+	    D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
+		na->name,
+		tx == NR_TX ? "TX" : "RX", n,
+		kring->nkr_hwofs, new_hwofs,
+		kring->nr_hwtail,
+		tx == NR_TX ? lim : kring->nr_hwtail);
 	kring->nkr_hwofs = new_hwofs;
-	if (tx == NR_TX)
-		kring->nr_hwavail = kring->nkr_num_slots - 1;
-	ND(10, "new hwofs %d on %s %s[%d]",
-			kring->nkr_hwofs, na->ifp->if_xname,
-			tx == NR_TX ? "TX" : "RX", n);
+	if (tx == NR_TX) {
+		kring->nr_hwtail = kring->nr_hwcur + lim;
+		if (kring->nr_hwtail > lim)
+			kring->nr_hwtail -= lim + 1;
+	}
 
 #if 0 // def linux
 	/* XXX check that the mappings are correct */
@@ -1834,721 +2950,151 @@
 
 #endif /* linux */
 	/*
-	 * Wakeup on the individual and global lock
+	 * Wakeup on the individual and global selwait
 	 * We do the wakeup here, but the ring is not yet reconfigured.
 	 * However, we are under lock so there are no races.
 	 */
-	selwakeuppri(&kring->si, PI_NET);
-	selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET);
+	na->nm_notify(na, n, tx, 0);
 	return kring->ring->slot;
 }
 
 
 /*
- * Default functions to handle rx/tx interrupts
- * we have 4 cases:
- * 1 ring, single lock:
- *	lock(core); wake(i=0); unlock(core)
- * N rings, single lock:
- *	lock(core); wake(i); wake(N+1) unlock(core)
- * 1 ring, separate locks: (i=0)
- *	lock(i); wake(i); unlock(i)
- * N rings, separate locks:
- *	lock(i); wake(i); unlock(i); lock(core) wake(N+1) unlock(core)
- * work_done is non-null on the RX path.
+ * Dispatch rx/tx interrupts to the netmap rings.
  *
- * The 'q' argument also includes flag to tell whether the queue is
- * already locked on enter, and whether it should remain locked on exit.
- * This helps adapting to different defaults in drivers and OSes.
+ * "work_done" is non-null on the RX path, NULL for the TX path.
+ * We rely on the OS to make sure that there is only one active
+ * instance per queue, and that there is appropriate locking.
+ *
+ * The 'notify' routine depends on what the ring is attached to.
+ * - for a netmap file descriptor, do a selwakeup on the individual
+ *   waitqueue, plus one on the global one if needed
+ *   (see netmap_notify)
+ * - for a nic connected to a switch, call the proper forwarding routine
+ *   (see netmap_bwrap_intr_notify)
  */
-int
-netmap_rx_irq(struct ifnet *ifp, int q, int *work_done)
+void
+netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
 {
-	struct netmap_adapter *na;
-	struct netmap_kring *r;
-	NM_SELINFO_T *main_wq;
-	int locktype, unlocktype, lock;
+	struct netmap_adapter *na = NA(ifp);
+	struct netmap_kring *kring;
 
-	if (!(ifp->if_capenable & IFCAP_NETMAP))
-		return 0;
+	q &= NETMAP_RING_MASK;
 
-	lock = q & (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT);
-	q = q & NETMAP_RING_MASK;
-
-	ND(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
-	na = NA(ifp);
-	if (na->na_flags & NAF_SKIP_INTR) {
-		ND("use regular interrupt");
-		return 0;
+	if (netmap_verbose) {
+	        RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
 	}
 
 	if (work_done) { /* RX path */
 		if (q >= na->num_rx_rings)
-			return 0;	// not a physical queue
-		r = na->rx_rings + q;
-		r->nr_kflags |= NKR_PENDINTR;
-		main_wq = (na->num_rx_rings > 1) ? &na->rx_si : NULL;
-		locktype = NETMAP_RX_LOCK;
-		unlocktype = NETMAP_RX_UNLOCK;
+			return;	// not a physical queue
+		kring = na->rx_rings + q;
+		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
+		na->nm_notify(na, q, NR_RX, 0);
+		*work_done = 1; /* do not fire napi again */
 	} else { /* TX path */
 		if (q >= na->num_tx_rings)
-			return 0;	// not a physical queue
-		r = na->tx_rings + q;
-		main_wq = (na->num_tx_rings > 1) ? &na->tx_si : NULL;
-		work_done = &q; /* dummy */
-		locktype = NETMAP_TX_LOCK;
-		unlocktype = NETMAP_TX_UNLOCK;
+			return;	// not a physical queue
+		kring = na->tx_rings + q;
+		na->nm_notify(na, q, NR_TX, 0);
 	}
-	if (na->separate_locks) {
-		if (!(lock & NETMAP_LOCKED_ENTER))
-			na->nm_lock(ifp, locktype, q);
-		selwakeuppri(&r->si, PI_NET);
-		na->nm_lock(ifp, unlocktype, q);
-		if (main_wq) {
-			na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
-			selwakeuppri(main_wq, PI_NET);
-			na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
-		}
-		/* lock the queue again if requested */
-		if (lock & NETMAP_LOCKED_EXIT)
-			na->nm_lock(ifp, locktype, q);
-	} else {
-		if (!(lock & NETMAP_LOCKED_ENTER))
-			na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
-		selwakeuppri(&r->si, PI_NET);
-		if (main_wq)
-			selwakeuppri(main_wq, PI_NET);
-		if (!(lock & NETMAP_LOCKED_EXIT))
-			na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
-	}
-	*work_done = 1; /* do not fire napi again */
-	return 1;
 }
 
 
-#ifdef linux	/* linux-specific routines */
-
 /*
- * Remap linux arguments into the FreeBSD call.
- * - pwait is the poll table, passed as 'dev';
- *   If pwait == NULL someone else already woke up before. We can report
- *   events but they are filtered upstream.
- *   If pwait != NULL, then pwait->key contains the list of events.
- * - events is computed from pwait as above.
- * - file is passed as 'td';
+ * Default functions to handle rx/tx interrupts from a physical device.
+ * "work_done" is non-null on the RX path, NULL for the TX path.
+ *
+ * If the card is not in netmap mode, simply return 0,
+ * so that the caller proceeds with regular processing.
+ * Otherwise call netmap_common_irq() and return 1.
+ *
+ * If the card is connected to a netmap file descriptor,
+ * do a selwakeup on the individual queue, plus one on the global one
+ * if needed (multiqueue card _and_ there are multiqueue listeners),
+ * and return 1.
+ *
+ * Finally, if called on rx from an interface connected to a switch,
+ * calls the proper forwarding routine, and return 1.
  */
-static u_int
-linux_netmap_poll(struct file * file, struct poll_table_struct *pwait)
+int
+netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
-	int events = POLLIN | POLLOUT; /* XXX maybe... */
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
-	int events = pwait ? pwait->key : POLLIN | POLLOUT;
-#else /* in 3.4.0 field 'key' was renamed to '_key' */
-	int events = pwait ? pwait->_key : POLLIN | POLLOUT;
-#endif
-	return netmap_poll((void *)pwait, events, (void *)file);
-}
+	struct netmap_adapter *na = NA(ifp);
 
-static int
-linux_netmap_mmap(struct file *f, struct vm_area_struct *vma)
-{
-	int lut_skip, i, j;
-	int user_skip = 0;
-	struct lut_entry *l_entry;
-	int error = 0;
-	unsigned long off, tomap;
 	/*
-	 * vma->vm_start: start of mapping user address space
-	 * vma->vm_end: end of the mapping user address space
-	 * vma->vm_pfoff: offset of first page in the device
+	 * XXX emulated netmap mode sets NAF_SKIP_INTR so
+	 * we still use the regular driver even though the previous
+	 * check fails. It is unclear whether we should use
+	 * nm_native_on() here.
 	 */
+	if (!nm_netmap_on(na))
+		return 0;
 
-	// XXX security checks
-
-	error = netmap_get_memory(f->private_data);
-	ND("get_memory returned %d", error);
-	if (error)
-	    return -error;
-
-	off = vma->vm_pgoff << PAGE_SHIFT; /* offset in bytes */
-	tomap = vma->vm_end - vma->vm_start;
-	for (i = 0; i < NETMAP_POOLS_NR; i++) {  /* loop through obj_pools */
-		const struct netmap_obj_pool *p = &nm_mem.pools[i];
-		/*
-		 * In each pool memory is allocated in clusters
-		 * of size _clustsize, each containing clustentries
-		 * entries. For each object k we already store the
-		 * vtophys mapping in lut[k] so we use that, scanning
-		 * the lut[] array in steps of clustentries,
-		 * and we map each cluster (not individual pages,
-		 * it would be overkill -- XXX slow ? 20130415).
-		 */
-
-		/*
-		 * We interpret vm_pgoff as an offset into the whole
-		 * netmap memory, as if all clusters where contiguous.
-		 */
-		for (lut_skip = 0, j = 0; j < p->_numclusters; j++, lut_skip += p->clustentries) {
-			unsigned long paddr, mapsize;
-			if (p->_clustsize <= off) {
-				off -= p->_clustsize;
-				continue;
-			}
-			l_entry = &p->lut[lut_skip]; /* first obj in the cluster */
-			paddr = l_entry->paddr + off;
-			mapsize = p->_clustsize - off;
-			off = 0;
-			if (mapsize > tomap)
-				mapsize = tomap;
-			ND("remap_pfn_range(%lx, %lx, %lx)",
-				vma->vm_start + user_skip,
-				paddr >> PAGE_SHIFT, mapsize);
-			if (remap_pfn_range(vma, vma->vm_start + user_skip,
-					paddr >> PAGE_SHIFT, mapsize,
-					vma->vm_page_prot))
-				return -EAGAIN; // XXX check return value
-			user_skip += mapsize;
-			tomap -= mapsize;
-			if (tomap == 0)
-				goto done;
-		}
+	if (na->na_flags & NAF_SKIP_INTR) {
+		ND("use regular interrupt");
+		return 0;
 	}
-done:
 
-	return 0;
+	netmap_common_irq(ifp, q, work_done);
+	return 1;
 }
 
-static netdev_tx_t
-linux_netmap_start(struct sk_buff *skb, struct net_device *dev)
-{
-	netmap_start(dev, skb);
-	return (NETDEV_TX_OK);
-}
 
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)	// XXX was 38
-#define LIN_IOCTL_NAME	.ioctl
-int
-linux_netmap_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long data /* arg */)
-#else
-#define LIN_IOCTL_NAME	.unlocked_ioctl
-long
-linux_netmap_ioctl(struct file *file, u_int cmd, u_long data /* arg */)
-#endif
-{
-	int ret;
-	struct nmreq nmr;
-	bzero(&nmr, sizeof(nmr));
-
-	if (data && copy_from_user(&nmr, (void *)data, sizeof(nmr) ) != 0)
-		return -EFAULT;
-	ret = netmap_ioctl(NULL, cmd, (caddr_t)&nmr, 0, (void *)file);
-	if (data && copy_to_user((void*)data, &nmr, sizeof(nmr) ) != 0)
-		return -EFAULT;
-	return -ret;
-}
-
-
-static int
-netmap_release(struct inode *inode, struct file *file)
-{
-	(void)inode;	/* UNUSED */
-	if (file->private_data)
-		netmap_dtor(file->private_data);
-	return (0);
-}
-
-static int
-linux_netmap_open(struct inode *inode, struct file *file)
-{
-	struct netmap_priv_d *priv;
-	(void)inode;	/* UNUSED */
-
-	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
-			      M_NOWAIT | M_ZERO);
-	if (priv == NULL)
-		return -ENOMEM;
-
-	file->private_data = priv;
-
-	return (0);
-}
-
-static struct file_operations netmap_fops = {
-    .open = linux_netmap_open,
-    .mmap = linux_netmap_mmap,
-    LIN_IOCTL_NAME = linux_netmap_ioctl,
-    .poll = linux_netmap_poll,
-    .release = netmap_release,
-};
-
-static struct miscdevice netmap_cdevsw = {	/* same name as FreeBSD */
-	MISC_DYNAMIC_MINOR,
-	"netmap",
-	&netmap_fops,
-};
-
-static int netmap_init(void);
-static void netmap_fini(void);
-
-/* Errors have negative values on linux */
-static int linux_netmap_init(void)
-{
-	return -netmap_init();
-}
-
-module_init(linux_netmap_init);
-module_exit(netmap_fini);
-/* export certain symbols to other modules */
-EXPORT_SYMBOL(netmap_attach);		// driver attach routines
-EXPORT_SYMBOL(netmap_detach);		// driver detach routines
-EXPORT_SYMBOL(netmap_ring_reinit);	// ring init on error
-EXPORT_SYMBOL(netmap_buffer_lut);
-EXPORT_SYMBOL(netmap_total_buffers);	// index check
-EXPORT_SYMBOL(netmap_buffer_base);
-EXPORT_SYMBOL(netmap_reset);		// ring init routines
-EXPORT_SYMBOL(netmap_buf_size);
-EXPORT_SYMBOL(netmap_rx_irq);		// default irq handler
-EXPORT_SYMBOL(netmap_no_pendintr);	// XXX mitigation - should go away
-
-
-MODULE_AUTHOR("http://info.iet.unipi.it/~luigi/netmap/");
-MODULE_DESCRIPTION("The netmap packet I/O framework");
-MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */
-
-#else /* __FreeBSD__ */
-
-static struct cdevsw netmap_cdevsw = {
-	.d_version = D_VERSION,
-	.d_name = "netmap",
-	.d_open = netmap_open,
-	.d_mmap = netmap_mmap,
-	.d_mmap_single = netmap_mmap_single,
-	.d_ioctl = netmap_ioctl,
-	.d_poll = netmap_poll,
-	.d_close = netmap_close,
-};
-#endif /* __FreeBSD__ */
-
-#ifdef NM_BRIDGE
 /*
- *---- support for virtual bridge -----
- */
-
-/* ----- FreeBSD if_bridge hash function ------- */
-
-/*
- * The following hash function is adapted from "Hash Functions" by Bob Jenkins
- * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ * Module loader and unloader
  *
- * http://www.burtleburtle.net/bob/hash/spooky.html
+ * netmap_init() creates the /dev/netmap device and initializes
+ * all global variables. Returns 0 on success, errno on failure
+ * (but there is no chance)
+ *
+ * netmap_fini() destroys everything.
  */
-#define mix(a, b, c)                                                    \
-do {                                                                    \
-        a -= b; a -= c; a ^= (c >> 13);                                 \
-        b -= c; b -= a; b ^= (a << 8);                                  \
-        c -= a; c -= b; c ^= (b >> 13);                                 \
-        a -= b; a -= c; a ^= (c >> 12);                                 \
-        b -= c; b -= a; b ^= (a << 16);                                 \
-        c -= a; c -= b; c ^= (b >> 5);                                  \
-        a -= b; a -= c; a ^= (c >> 3);                                  \
-        b -= c; b -= a; b ^= (a << 10);                                 \
-        c -= a; c -= b; c ^= (b >> 15);                                 \
-} while (/*CONSTCOND*/0)
 
-static __inline uint32_t
-nm_bridge_rthash(const uint8_t *addr)
-{
-        uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
+static struct cdev *netmap_dev; /* /dev/netmap character device. */
+extern struct cdevsw netmap_cdevsw;
 
-        b += addr[5] << 8;
-        b += addr[4];
-        a += addr[3] << 24;
-        a += addr[2] << 16;
-        a += addr[1] << 8;
-        a += addr[0];
 
-        mix(a, b, c);
-#define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
-        return (c & BRIDGE_RTHASH_MASK);
-}
-
-#undef mix
-
-
-static int
-bdg_netmap_reg(struct ifnet *ifp, int onoff)
+void
+netmap_fini(void)
 {
-	int i, err = 0;
-	struct nm_bridge *b = ifp->if_bridge;
-
-	BDG_LOCK(b);
-	if (onoff) {
-		/* the interface must be already in the list.
-		 * only need to mark the port as active
-		 */
-		ND("should attach %s to the bridge", ifp->if_xname);
-		for (i=0; i < NM_BDG_MAXPORTS; i++)
-			if (b->bdg_ports[i] == ifp)
-				break;
-		if (i == NM_BDG_MAXPORTS) {
-			D("no more ports available");
-			err = EINVAL;
-			goto done;
-		}
-		ND("setting %s in netmap mode", ifp->if_xname);
-		ifp->if_capenable |= IFCAP_NETMAP;
-		NA(ifp)->bdg_port = i;
-		b->act_ports |= (1<<i);
-		b->bdg_ports[i] = ifp;
-	} else {
-		/* should be in the list, too -- remove from the mask */
-		ND("removing %s from netmap mode", ifp->if_xname);
-		ifp->if_capenable &= ~IFCAP_NETMAP;
-		i = NA(ifp)->bdg_port;
-		b->act_ports &= ~(1<<i);
-	}
-done:
-	BDG_UNLOCK(b);
-	return err;
+	// XXX destroy_bridges() ?
+	if (netmap_dev)
+		destroy_dev(netmap_dev);
+	netmap_mem_fini();
+	NMG_LOCK_DESTROY();
+	printf("netmap: unloaded module.\n");
 }
 
 
-static int
-nm_bdg_flush(struct nm_bdg_fwd *ft, int n, struct ifnet *ifp)
-{
-	int i, ifn;
-	uint64_t all_dst, dst;
-	uint32_t sh, dh;
-	uint64_t mysrc = 1 << NA(ifp)->bdg_port;
-	uint64_t smac, dmac;
-	struct netmap_slot *slot;
-	struct nm_bridge *b = ifp->if_bridge;
-
-	ND("prepare to send %d packets, act_ports 0x%x", n, b->act_ports);
-	/* only consider valid destinations */
-	all_dst = (b->act_ports & ~mysrc);
-	/* first pass: hash and find destinations */
-	for (i = 0; likely(i < n); i++) {
-		uint8_t *buf = ft[i].buf;
-		dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
-		smac = le64toh(*(uint64_t *)(buf + 4));
-		smac >>= 16;
-		if (unlikely(netmap_verbose)) {
-		    uint8_t *s = buf+6, *d = buf;
-		    D("%d len %4d %02x:%02x:%02x:%02x:%02x:%02x -> %02x:%02x:%02x:%02x:%02x:%02x",
-			i,
-			ft[i].len,
-			s[0], s[1], s[2], s[3], s[4], s[5],
-			d[0], d[1], d[2], d[3], d[4], d[5]);
-		}
-		/*
-		 * The hash is somewhat expensive, there might be some
-		 * worthwhile optimizations here.
-		 */
-		if ((buf[6] & 1) == 0) { /* valid src */
-		    	uint8_t *s = buf+6;
-			sh = nm_bridge_rthash(buf+6); // XXX hash of source
-			/* update source port forwarding entry */
-			b->ht[sh].mac = smac;	/* XXX expire ? */
-			b->ht[sh].ports = mysrc;
-			if (netmap_verbose)
-			    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
-				s[0], s[1], s[2], s[3], s[4], s[5], NA(ifp)->bdg_port);
-		}
-		dst = 0;
-		if ( (buf[0] & 1) == 0) { /* unicast */
-		    	uint8_t *d = buf;
-			dh = nm_bridge_rthash(buf); // XXX hash of dst
-			if (b->ht[dh].mac == dmac) {	/* found dst */
-				dst = b->ht[dh].ports;
-				if (netmap_verbose)
-				    D("dst %02x:%02x:%02x:%02x:%02x:%02x to port %x",
-					d[0], d[1], d[2], d[3], d[4], d[5], (uint32_t)(dst >> 16));
-			}
-		}
-		if (dst == 0)
-			dst = all_dst;
-		dst &= all_dst; /* only consider valid ports */
-		if (unlikely(netmap_verbose))
-			D("pkt goes to ports 0x%x", (uint32_t)dst);
-		ft[i].dst = dst;
-	}
-
-	/* second pass, scan interfaces and forward */
-	all_dst = (b->act_ports & ~mysrc);
-	for (ifn = 0; all_dst; ifn++) {
-		struct ifnet *dst_ifp = b->bdg_ports[ifn];
-		struct netmap_adapter *na;
-		struct netmap_kring *kring;
-		struct netmap_ring *ring;
-		int j, lim, sent, locked;
-
-		if (!dst_ifp)
-			continue;
-		ND("scan port %d %s", ifn, dst_ifp->if_xname);
-		dst = 1 << ifn;
-		if ((dst & all_dst) == 0)	/* skip if not set */
-			continue;
-		all_dst &= ~dst;	/* clear current node */
-		na = NA(dst_ifp);
-
-		ring = NULL;
-		kring = NULL;
-		lim = sent = locked = 0;
-		/* inside, scan slots */
-		for (i = 0; likely(i < n); i++) {
-			if ((ft[i].dst & dst) == 0)
-				continue;	/* not here */
-			if (!locked) {
-				kring = &na->rx_rings[0];
-				ring = kring->ring;
-				lim = kring->nkr_num_slots - 1;
-				na->nm_lock(dst_ifp, NETMAP_RX_LOCK, 0);
-				locked = 1;
-			}
-			if (unlikely(kring->nr_hwavail >= lim)) {
-				if (netmap_verbose)
-					D("rx ring full on %s", ifp->if_xname);
-				break;
-			}
-			j = kring->nr_hwcur + kring->nr_hwavail;
-			if (j > lim)
-				j -= kring->nkr_num_slots;
-			slot = &ring->slot[j];
-			ND("send %d %d bytes at %s:%d", i, ft[i].len, dst_ifp->if_xname, j);
-			pkt_copy(ft[i].buf, NMB(slot), ft[i].len);
-			slot->len = ft[i].len;
-			kring->nr_hwavail++;
-			sent++;
-		}
-		if (locked) {
-			ND("sent %d on %s", sent, dst_ifp->if_xname);
-			if (sent)
-				selwakeuppri(&kring->si, PI_NET);
-			na->nm_lock(dst_ifp, NETMAP_RX_UNLOCK, 0);
-		}
-	}
-	return 0;
-}
-
-/*
- * main dispatch routine
- */
-static int
-bdg_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
-{
-	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->tx_rings[ring_nr];
-	struct netmap_ring *ring = kring->ring;
-	int i, j, k, lim = kring->nkr_num_slots - 1;
-	struct nm_bdg_fwd *ft = (struct nm_bdg_fwd *)(ifp + 1);
-	int ft_i;	/* position in the forwarding table */
-
-	k = ring->cur;
-	if (k > lim)
-		return netmap_ring_reinit(kring);
-	if (do_lock)
-		na->nm_lock(ifp, NETMAP_TX_LOCK, ring_nr);
-
-	if (netmap_bridge <= 0) { /* testing only */
-		j = k; // used all
-		goto done;
-	}
-	if (netmap_bridge > NM_BDG_BATCH)
-		netmap_bridge = NM_BDG_BATCH;
-
-	ft_i = 0;	/* start from 0 */
-	for (j = kring->nr_hwcur; likely(j != k); j = unlikely(j == lim) ? 0 : j+1) {
-		struct netmap_slot *slot = &ring->slot[j];
-		int len = ft[ft_i].len = slot->len;
-		char *buf = ft[ft_i].buf = NMB(slot);
-
-		prefetch(buf);
-		if (unlikely(len < 14))
-			continue;
-		if (unlikely(++ft_i == netmap_bridge))
-			ft_i = nm_bdg_flush(ft, ft_i, ifp);
-	}
-	if (ft_i)
-		ft_i = nm_bdg_flush(ft, ft_i, ifp);
-	/* count how many packets we sent */
-	i = k - j;
-	if (i < 0)
-		i += kring->nkr_num_slots;
-	kring->nr_hwavail = kring->nkr_num_slots - 1 - i;
-	if (j != k)
-		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
-
-done:
-	kring->nr_hwcur = j;
-	ring->avail = kring->nr_hwavail;
-	if (do_lock)
-		na->nm_lock(ifp, NETMAP_TX_UNLOCK, ring_nr);
-
-	if (netmap_verbose)
-		D("%s ring %d lock %d", ifp->if_xname, ring_nr, do_lock);
-	return 0;
-}
-
-static int
-bdg_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
-{
-	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->rx_rings[ring_nr];
-	struct netmap_ring *ring = kring->ring;
-	u_int j, n, lim = kring->nkr_num_slots - 1;
-	u_int k = ring->cur, resvd = ring->reserved;
-
-	ND("%s ring %d lock %d avail %d",
-		ifp->if_xname, ring_nr, do_lock, kring->nr_hwavail);
-
-	if (k > lim)
-		return netmap_ring_reinit(kring);
-	if (do_lock)
-		na->nm_lock(ifp, NETMAP_RX_LOCK, ring_nr);
-
-	/* skip past packets that userspace has released */
-	j = kring->nr_hwcur;    /* netmap ring index */
-	if (resvd > 0) {
-		if (resvd + ring->avail >= lim + 1) {
-			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
-			ring->reserved = resvd = 0; // XXX panic...
-		}
-		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
-	}
-
-	if (j != k) { /* userspace has released some packets. */
-		n = k - j;
-		if (n < 0)
-			n += kring->nkr_num_slots;
-		ND("userspace releases %d packets", n);
-                for (n = 0; likely(j != k); n++) {
-                        struct netmap_slot *slot = &ring->slot[j];
-                        void *addr = NMB(slot);
-
-                        if (addr == netmap_buffer_base) { /* bad buf */
-                                if (do_lock)
-                                        na->nm_lock(ifp, NETMAP_RX_UNLOCK, ring_nr);
-                                return netmap_ring_reinit(kring);
-                        }
-			/* decrease refcount for buffer */
-
-			slot->flags &= ~NS_BUF_CHANGED;
-                        j = unlikely(j == lim) ? 0 : j + 1;
-                }
-                kring->nr_hwavail -= n;
-                kring->nr_hwcur = k;
-        }
-        /* tell userspace that there are new packets */
-        ring->avail = kring->nr_hwavail - resvd;
-
-	if (do_lock)
-		na->nm_lock(ifp, NETMAP_RX_UNLOCK, ring_nr);
-	return 0;
-}
-
-static void
-bdg_netmap_attach(struct ifnet *ifp)
-{
-	struct netmap_adapter na;
-
-	ND("attaching virtual bridge");
-	bzero(&na, sizeof(na));
-
-	na.ifp = ifp;
-	na.separate_locks = 1;
-	na.num_tx_desc = NM_BRIDGE_RINGSIZE;
-	na.num_rx_desc = NM_BRIDGE_RINGSIZE;
-	na.nm_txsync = bdg_netmap_txsync;
-	na.nm_rxsync = bdg_netmap_rxsync;
-	na.nm_register = bdg_netmap_reg;
-	netmap_attach(&na, 1);
-}
-
-#endif /* NM_BRIDGE */
-
-static struct cdev *netmap_dev; /* /dev/netmap character device. */
-
-
-/*
- * Module loader.
- *
- * Create the /dev/netmap device and initialize all global
- * variables.
- *
- * Return 0 on success, errno on failure.
- */
-static int
+int
 netmap_init(void)
 {
 	int error;
 
-	error = netmap_memory_init();
-	if (error != 0) {
-		printf("netmap: unable to initialize the memory allocator.\n");
-		return (error);
-	}
-	printf("netmap: loaded module\n");
-	netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
+	NMG_LOCK_INIT();
+
+	error = netmap_mem_init();
+	if (error != 0)
+		goto fail;
+	/*
+	 * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls
+	 * when the module is compiled in.
+	 * XXX could use make_dev_credv() to get error number
+	 */
+	netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
+		&netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600,
 			      "netmap");
+	if (!netmap_dev)
+		goto fail;
 
-#ifdef NM_BRIDGE
-	{
-	int i;
-	for (i = 0; i < NM_BRIDGES; i++)
-		mtx_init(&nm_bridges[i].bdg_lock, "bdg lock", "bdg_lock", MTX_DEF);
-	}
+	netmap_init_bridges();
+#ifdef __FreeBSD__
+	nm_vi_init_index();
 #endif
-	return (error);
+	printf("netmap: loaded module\n");
+	return (0);
+fail:
+	netmap_fini();
+	return (EINVAL); /* may be incorrect */
 }
-
-
-/*
- * Module unloader.
- *
- * Free all the memory, and destroy the ``/dev/netmap`` device.
- */
-static void
-netmap_fini(void)
-{
-	destroy_dev(netmap_dev);
-	netmap_memory_fini();
-	printf("netmap: unloaded module.\n");
-}
-
-
-#ifdef __FreeBSD__
-/*
- * Kernel entry point.
- *
- * Initialize/finalize the module and return.
- *
- * Return 0 on success, errno on failure.
- */
-static int
-netmap_loader(__unused struct module *module, int event, __unused void *arg)
-{
-	int error = 0;
-
-	switch (event) {
-	case MOD_LOAD:
-		error = netmap_init();
-		break;
-
-	case MOD_UNLOAD:
-		netmap_fini();
-		break;
-
-	default:
-		error = EOPNOTSUPP;
-		break;
-	}
-
-	return (error);
-}
-
-
-DEV_MODULE(netmap, netmap_loader, NULL);
-#endif /* __FreeBSD__ */

Added: trunk/sys/dev/netmap/netmap_freebsd.c
===================================================================
--- trunk/sys/dev/netmap/netmap_freebsd.c	                        (rev 0)
+++ trunk/sys/dev/netmap/netmap_freebsd.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,833 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD: stable/10/sys/dev/netmap/netmap_freebsd.c 281955 2015-04-24 23:26:44Z hiren $ */
+
+#include <sys/types.h>
+#include <sys/module.h>
+#include <sys/errno.h>
+#include <sys/param.h>  /* defines used in kernel.h */
+#include <sys/poll.h>  /* POLLIN, POLLOUT */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h>	/* DEV_MODULE */
+#include <sys/endian.h>
+
+#include <sys/rwlock.h>
+
+#include <vm/vm.h>      /* vtophys */
+#include <vm/pmap.h>    /* vtophys */
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/uma.h>
+
+
+#include <sys/malloc.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_types.h> /* IFT_ETHER */
+#include <net/ethernet.h> /* ether_ifdetach */
+#include <net/if_dl.h> /* LLADDR */
+#include <machine/bus.h>        /* bus_dmamap_* */
+#include <netinet/in.h>		/* in6_cksum_pseudo() */
+#include <machine/in_cksum.h>  /* in_pseudo(), in_cksum_hdr() */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+
+/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
+
+rawsum_t
+nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
+{
+	/* TODO XXX please use the FreeBSD implementation for this. */
+	uint16_t *words = (uint16_t *)data;
+	int nw = len / 2;
+	int i;
+
+	for (i = 0; i < nw; i++)
+		cur_sum += be16toh(words[i]);
+
+	if (len & 1)
+		cur_sum += (data[len-1] << 8);
+
+	return cur_sum;
+}
+
+/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
+ * return value is in network byte order.
+ */
+uint16_t
+nm_csum_fold(rawsum_t cur_sum)
+{
+	/* TODO XXX please use the FreeBSD implementation for this. */
+	while (cur_sum >> 16)
+		cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16);
+
+	return htobe16((~cur_sum) & 0xFFFF);
+}
+
+uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
+{
+#if 0
+	return in_cksum_hdr((void *)iph);
+#else
+	return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
+#endif
+}
+
+void
+nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+					size_t datalen, uint16_t *check)
+{
+#ifdef INET
+	uint16_t pseudolen = datalen + iph->protocol;
+
+	/* Compute and insert the pseudo-header cheksum. */
+	*check = in_pseudo(iph->saddr, iph->daddr,
+				 htobe16(pseudolen));
+	/* Compute the checksum on TCP/UDP header + payload
+	 * (includes the pseudo-header).
+	 */
+	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+#else
+	static int notsupported = 0;
+	if (!notsupported) {
+		notsupported = 1;
+		D("inet4 segmentation not supported");
+	}
+#endif
+}
+
+void
+nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+					size_t datalen, uint16_t *check)
+{
+#ifdef INET6
+	*check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
+	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+#else
+	static int notsupported = 0;
+	if (!notsupported) {
+		notsupported = 1;
+		D("inet6 segmentation not supported");
+	}
+#endif
+}
+
+
+/*
+ * Intercept the rx routine in the standard device driver.
+ * Second argument is non-zero to intercept, 0 to restore
+ */
+int
+netmap_catch_rx(struct netmap_adapter *na, int intercept)
+{
+	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+	struct ifnet *ifp = na->ifp;
+
+	if (intercept) {
+		if (gna->save_if_input) {
+			D("cannot intercept again");
+			return EINVAL; /* already set */
+		}
+		gna->save_if_input = ifp->if_input;
+		ifp->if_input = generic_rx_handler;
+	} else {
+		if (!gna->save_if_input){
+			D("cannot restore");
+			return EINVAL;  /* not saved */
+		}
+		ifp->if_input = gna->save_if_input;
+		gna->save_if_input = NULL;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Intercept the packet steering routine in the tx path,
+ * so that we can decide which queue is used for an mbuf.
+ * Second argument is non-zero to intercept, 0 to restore.
+ * On freebsd we just intercept if_transmit.
+ */
+void
+netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
+{
+	struct netmap_adapter *na = &gna->up.up;
+	struct ifnet *ifp = na->ifp;
+
+	if (enable) {
+		na->if_transmit = ifp->if_transmit;
+		ifp->if_transmit = netmap_transmit;
+	} else {
+		ifp->if_transmit = na->if_transmit;
+	}
+}
+
+
+/*
+ * Transmit routine used by generic_netmap_txsync(). Returns 0 on success
+ * and non-zero on error (which may be packet drops or other errors).
+ * addr and len identify the netmap buffer, m is the (preallocated)
+ * mbuf to use for transmissions.
+ *
+ * We should add a reference to the mbuf so the m_freem() at the end
+ * of the transmission does not consume resources.
+ *
+ * On FreeBSD, and on multiqueue cards, we can force the queue using
+ *      if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ *              i = m->m_pkthdr.flowid % adapter->num_queues;
+ *      else
+ *              i = curcpu % adapter->num_queues;
+ *
+ */
+int
+generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
+	void *addr, u_int len, u_int ring_nr)
+{
+	int ret;
+
+	/*
+	 * The mbuf should be a cluster from our special pool,
+	 * so we do not need to do an m_copyback but just copy
+	 * (and eventually, just reference the netmap buffer)
+	 */
+
+	if (GET_MBUF_REFCNT(m) != 1) {
+		D("invalid refcnt %d for %p",
+			GET_MBUF_REFCNT(m), m);
+		panic("in generic_xmit_frame");
+	}
+	// XXX the ext_size check is unnecessary if we link the netmap buf
+	if (m->m_ext.ext_size < len) {
+		RD(5, "size %d < len %d", m->m_ext.ext_size, len);
+		len = m->m_ext.ext_size;
+	}
+	if (0) { /* XXX seems to have negligible benefits */
+		m->m_ext.ext_buf = m->m_data = addr;
+	} else {
+		bcopy(addr, m->m_data, len);
+	}
+	m->m_len = m->m_pkthdr.len = len;
+	// inc refcount. All ours, we could skip the atomic
+	atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1);
+	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
+	m->m_pkthdr.flowid = ring_nr;
+	m->m_pkthdr.rcvif = ifp; /* used for tx notification */
+	ret = NA(ifp)->if_transmit(ifp, m);
+	return ret;
+}
+
+
+#if __FreeBSD_version >= 1100005
+struct netmap_adapter *
+netmap_getna(if_t ifp)
+{
+	return (NA((struct ifnet *)ifp));
+}
+#endif /* __FreeBSD_version >= 1100005 */
+
+/*
+ * The following two functions are empty until we have a generic
+ * way to extract the info from the ifp
+ */
+int
+generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
+{
+	D("called, in tx %d rx %d", *tx, *rx);
+	return 0;
+}
+
+
+void
+generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
+{
+	D("called, in txq %d rxq %d", *txq, *rxq);
+	*txq = netmap_generic_rings;
+	*rxq = netmap_generic_rings;
+}
+
+
+void
+netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
+{
+	ND("called");
+	mit->mit_pending = 0;
+	mit->mit_ring_idx = idx;
+	mit->mit_na = na;
+}
+
+
+void
+netmap_mitigation_start(struct nm_generic_mit *mit)
+{
+	ND("called");
+}
+
+
+void
+netmap_mitigation_restart(struct nm_generic_mit *mit)
+{
+	ND("called");
+}
+
+
+int
+netmap_mitigation_active(struct nm_generic_mit *mit)
+{
+	ND("called");
+	return 0;
+}
+
+
+void
+netmap_mitigation_cleanup(struct nm_generic_mit *mit)
+{
+	ND("called");
+}
+
+static int
+nm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+	return EINVAL;
+}
+
+static void
+nm_vi_start(struct ifnet *ifp)
+{
+	panic("nm_vi_start() must not be called");
+}
+
+/*
+ * Index manager of persistent virtual interfaces.
+ * It is used to decide the lowest byte of the MAC address.
+ * We use the same algorithm with management of bridge port index.
+ */
+#define NM_VI_MAX	255
+static struct {
+	uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */
+	uint8_t active;
+	struct mtx lock;
+} nm_vi_indices;
+
+void
+nm_vi_init_index(void)
+{
+	int i;
+	for (i = 0; i < NM_VI_MAX; i++)
+		nm_vi_indices.index[i] = i;
+	nm_vi_indices.active = 0;
+	mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF);
+}
+
+/* return -1 if no index available */
+static int
+nm_vi_get_index(void)
+{
+	int ret;
+
+	mtx_lock(&nm_vi_indices.lock);
+	ret = nm_vi_indices.active == NM_VI_MAX ? -1 :
+		nm_vi_indices.index[nm_vi_indices.active++];
+	mtx_unlock(&nm_vi_indices.lock);
+	return ret;
+}
+
+static void
+nm_vi_free_index(uint8_t val)
+{
+	int i, lim;
+
+	mtx_lock(&nm_vi_indices.lock);
+	lim = nm_vi_indices.active;
+	for (i = 0; i < lim; i++) {
+		if (nm_vi_indices.index[i] == val) {
+			/* swap index[lim-1] and j */
+			int tmp = nm_vi_indices.index[lim-1];
+			nm_vi_indices.index[lim-1] = val;
+			nm_vi_indices.index[i] = tmp;
+			nm_vi_indices.active--;
+			break;
+		}
+	}
+	if (lim == nm_vi_indices.active)
+		D("funny, index %u didn't found", val);
+	mtx_unlock(&nm_vi_indices.lock);
+}
+#undef NM_VI_MAX
+
+/*
+ * Implementation of a netmap-capable virtual interface that
+ * registered to the system.
+ * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9.
+ *
+ * Note: Linux sets refcount to 0 on allocation of net_device,
+ * then increments it on registration to the system.
+ * FreeBSD sets refcount to 1 on if_alloc(), and does not
+ * increment this refcount on if_attach().
+ */
+int
+nm_vi_persist(const char *name, struct ifnet **ret)
+{
+	struct ifnet *ifp;
+	u_short macaddr_hi;
+	uint32_t macaddr_mid;
+	u_char eaddr[6];
+	int unit = nm_vi_get_index(); /* just to decide MAC address */
+
+	if (unit < 0)
+		return EBUSY;
+	/*
+	 * We use the same MAC address generation method with tap
+	 * except for the highest octet is 00:be instead of 00:bd
+	 */
+	macaddr_hi = htons(0x00be); /* XXX tap + 1 */
+	macaddr_mid = (uint32_t) ticks;
+	bcopy(&macaddr_hi, eaddr, sizeof(short));
+	bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
+	eaddr[5] = (uint8_t)unit;
+
+	ifp = if_alloc(IFT_ETHER);
+	if (ifp == NULL) {
+		D("if_alloc failed");
+		return ENOMEM;
+	}
+	if_initname(ifp, name, IF_DUNIT_NONE);
+	ifp->if_mtu = 65536;
+	ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_init = (void *)nm_vi_dummy;
+	ifp->if_ioctl = nm_vi_dummy;
+	ifp->if_start = nm_vi_start;
+	ifp->if_mtu = ETHERMTU;
+	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+	ifp->if_capabilities |= IFCAP_LINKSTATE;
+	ifp->if_capenable |= IFCAP_LINKSTATE;
+
+	ether_ifattach(ifp, eaddr);
+	*ret = ifp;
+	return 0;
+}
+/* unregister from the system and drop the final refcount */
+void
+nm_vi_detach(struct ifnet *ifp)
+{
+	nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]);
+	ether_ifdetach(ifp);
+	if_free(ifp);
+}
+
+/*
+ * In order to track whether pages are still mapped, we hook into
+ * the standard cdev_pager and intercept the constructor and
+ * destructor.
+ */
+
+struct netmap_vm_handle_t {
+	struct cdev 		*dev;
+	struct netmap_priv_d	*priv;
+};
+
+
+static int
+netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
+    vm_ooffset_t foff, struct ucred *cred, u_short *color)
+{
+	struct netmap_vm_handle_t *vmh = handle;
+
+	if (netmap_verbose)
+		D("handle %p size %jd prot %d foff %jd",
+			handle, (intmax_t)size, prot, (intmax_t)foff);
+	if (color)
+		*color = 0;
+	dev_ref(vmh->dev);
+	return 0;
+}
+
+
+static void
+netmap_dev_pager_dtor(void *handle)
+{
+	struct netmap_vm_handle_t *vmh = handle;
+	struct cdev *dev = vmh->dev;
+	struct netmap_priv_d *priv = vmh->priv;
+
+	if (netmap_verbose)
+		D("handle %p", handle);
+	netmap_dtor(priv);
+	free(vmh, M_DEVBUF);
+	dev_rel(dev);
+}
+
+
+static int
+netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
+	int prot, vm_page_t *mres)
+{
+	struct netmap_vm_handle_t *vmh = object->handle;
+	struct netmap_priv_d *priv = vmh->priv;
+	vm_paddr_t paddr;
+	vm_page_t page;
+	vm_memattr_t memattr;
+	vm_pindex_t pidx;
+
+	ND("object %p offset %jd prot %d mres %p",
+			object, (intmax_t)offset, prot, mres);
+	memattr = object->memattr;
+	pidx = OFF_TO_IDX(offset);
+	paddr = netmap_mem_ofstophys(priv->np_mref, offset);
+	if (paddr == 0)
+		return VM_PAGER_FAIL;
+
+	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
+		/*
+		 * If the passed in result page is a fake page, update it with
+		 * the new physical address.
+		 */
+		page = *mres;
+		vm_page_updatefake(page, paddr, memattr);
+	} else {
+		/*
+		 * Replace the passed in reqpage page with our own fake page and
+		 * free up the all of the original pages.
+		 */
+#ifndef VM_OBJECT_WUNLOCK	/* FreeBSD < 10.x */
+#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
+#define VM_OBJECT_WLOCK	VM_OBJECT_LOCK
+#endif /* VM_OBJECT_WUNLOCK */
+
+		VM_OBJECT_WUNLOCK(object);
+		page = vm_page_getfake(paddr, memattr);
+		VM_OBJECT_WLOCK(object);
+		vm_page_lock(*mres);
+		vm_page_free(*mres);
+		vm_page_unlock(*mres);
+		*mres = page;
+		vm_page_insert(page, object, pidx);
+	}
+	page->valid = VM_PAGE_BITS_ALL;
+	return (VM_PAGER_OK);
+}
+
+
+static struct cdev_pager_ops netmap_cdev_pager_ops = {
+	.cdev_pg_ctor = netmap_dev_pager_ctor,
+	.cdev_pg_dtor = netmap_dev_pager_dtor,
+	.cdev_pg_fault = netmap_dev_pager_fault,
+};
+
+
+static int
+netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
+	vm_size_t objsize,  vm_object_t *objp, int prot)
+{
+	int error;
+	struct netmap_vm_handle_t *vmh;
+	struct netmap_priv_d *priv;
+	vm_object_t obj;
+
+	if (netmap_verbose)
+		D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
+		    (intmax_t )*foff, (intmax_t )objsize, objp, prot);
+
+	vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
+			      M_NOWAIT | M_ZERO);
+	if (vmh == NULL)
+		return ENOMEM;
+	vmh->dev = cdev;
+
+	NMG_LOCK();
+	error = devfs_get_cdevpriv((void**)&priv);
+	if (error)
+		goto err_unlock;
+	vmh->priv = priv;
+	priv->np_refcount++;
+	NMG_UNLOCK();
+
+	error = netmap_get_memory(priv);
+	if (error)
+		goto err_deref;
+
+	obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
+		&netmap_cdev_pager_ops, objsize, prot,
+		*foff, NULL);
+	if (obj == NULL) {
+		D("cdev_pager_allocate failed");
+		error = EINVAL;
+		goto err_deref;
+	}
+
+	*objp = obj;
+	return 0;
+
+err_deref:
+	NMG_LOCK();
+	priv->np_refcount--;
+err_unlock:
+	NMG_UNLOCK();
+// err:
+	free(vmh, M_DEVBUF);
+	return error;
+}
+
+
+// XXX can we remove this ?
+static int
+netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+	if (netmap_verbose)
+		D("dev %p fflag 0x%x devtype %d td %p",
+			dev, fflag, devtype, td);
+	return 0;
+}
+
+
+static int
+netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+	struct netmap_priv_d *priv;
+	int error;
+
+	(void)dev;
+	(void)oflags;
+	(void)devtype;
+	(void)td;
+
+	// XXX wait or nowait ?
+	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
+			      M_NOWAIT | M_ZERO);
+	if (priv == NULL)
+		return ENOMEM;
+
+	error = devfs_set_cdevpriv(priv, netmap_dtor);
+	if (error)
+	        return error;
+
+	priv->np_refcount = 1;
+
+	return 0;
+}
+
+/******************** kqueue support ****************/
+
+/*
+ * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
+ * We use a non-zero argument to distinguish the call from the one
+ * in kevent_scan() which instead also needs to run netmap_poll().
+ * The knote uses a global mutex for the time being. We might
+ * try to reuse the one in the si, but it is not allocated
+ * permanently so it might be a bit tricky.
+ *
+ * The *kqfilter function registers one or another f_event
+ * depending on read or write mode.
+ * In the call to f_event() td_fpop is NULL so any child function
+ * calling devfs_get_cdevpriv() would fail - and we need it in
+ * netmap_poll(). As a workaround we store priv into kn->kn_hook
+ * and pass it as first argument to netmap_poll(), which then
+ * uses the failure to tell that we are called from f_event()
+ * and do not need the selrecord().
+ */
+
+
+void
+freebsd_selwakeup(struct nm_selinfo *si, int pri)
+{
+	if (netmap_verbose)
+		D("on knote %p", &si->si.si_note);
+	selwakeuppri(&si->si, pri);
+	/* use a non-zero hint to tell the notification from the
+	 * call done in kqueue_scan() which uses 0
+	 */
+	KNOTE_UNLOCKED(&si->si.si_note, 0x100 /* notification */);
+}
+
+static void
+netmap_knrdetach(struct knote *kn)
+{
+	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
+	struct selinfo *si = &priv->np_rxsi->si;
+
+	D("remove selinfo %p", si);
+	knlist_remove(&si->si_note, kn, 0);
+}
+
+static void
+netmap_knwdetach(struct knote *kn)
+{
+	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
+	struct selinfo *si = &priv->np_txsi->si;
+
+	D("remove selinfo %p", si);
+	knlist_remove(&si->si_note, kn, 0);
+}
+
+/*
+ * callback from notifies (generated externally) and our
+ * calls to kevent(). The former we just return 1 (ready)
+ * since we do not know better.
+ * In the latter we call netmap_poll and return 0/1 accordingly.
+ */
+static int
+netmap_knrw(struct knote *kn, long hint, int events)
+{
+	struct netmap_priv_d *priv;
+	int revents;
+
+	if (hint != 0) {
+		ND(5, "call from notify");
+		return 1; /* assume we are ready */
+	}
+	priv = kn->kn_hook;
+	/* the notification may come from an external thread,
+	 * in which case we do not want to run the netmap_poll
+	 * This should be filtered above, but check just in case.
+	 */
+	if (curthread != priv->np_td) { /* should not happen */
+		RD(5, "curthread changed %p %p", curthread, priv->np_td);
+		return 1;
+	} else {
+		revents = netmap_poll((void *)priv, events, curthread);
+		return (events & revents) ? 1 : 0;
+	}
+}
+
+static int
+netmap_knread(struct knote *kn, long hint)
+{
+	return netmap_knrw(kn, hint, POLLIN);
+}
+
+static int
+netmap_knwrite(struct knote *kn, long hint)
+{
+	return netmap_knrw(kn, hint, POLLOUT);
+}
+
+static struct filterops netmap_rfiltops = {
+	.f_isfd = 1,
+	.f_detach = netmap_knrdetach,
+	.f_event = netmap_knread,
+};
+
+static struct filterops netmap_wfiltops = {
+	.f_isfd = 1,
+	.f_detach = netmap_knwdetach,
+	.f_event = netmap_knwrite,
+};
+
+
+/*
+ * This is called when a thread invokes kevent() to record
+ * a change in the configuration of the kqueue().
+ * The 'priv' should be the same as in the netmap device.
+ */
+static int
+netmap_kqfilter(struct cdev *dev, struct knote *kn)
+{
+	struct netmap_priv_d *priv;
+	int error;
+	struct netmap_adapter *na;
+	struct nm_selinfo *si;
+	int ev = kn->kn_filter;
+
+	if (ev != EVFILT_READ && ev != EVFILT_WRITE) {
+		D("bad filter request %d", ev);
+		return 1;
+	}
+	error = devfs_get_cdevpriv((void**)&priv);
+	if (error) {
+		D("device not yet setup");
+		return 1;
+	}
+	na = priv->np_na;
+	if (na == NULL) {
+		D("no netmap adapter for this file descriptor");
+		return 1;
+	}
+	/* the si is indicated in the priv */
+	si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi;
+	// XXX lock(priv) ?
+	kn->kn_fop = (ev == EVFILT_WRITE) ?
+		&netmap_wfiltops : &netmap_rfiltops;
+	kn->kn_hook = priv;
+	knlist_add(&si->si.si_note, kn, 1);
+	// XXX unlock(priv)
+	ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s",
+		na, na->ifp->if_xname, curthread, priv, kn,
+		priv->np_nifp,
+		kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH");
+	return 0;
+}
+
+struct cdevsw netmap_cdevsw = {
+	.d_version = D_VERSION,
+	.d_name = "netmap",
+	.d_open = netmap_open,
+	.d_mmap_single = netmap_mmap_single,
+	.d_ioctl = netmap_ioctl,
+	.d_poll = netmap_poll,
+	.d_kqfilter = netmap_kqfilter,
+	.d_close = netmap_close,
+};
+/*--- end of kqueue support ----*/
+
+/*
+ * Kernel entry point.
+ *
+ * Initialize/finalize the module and return.
+ *
+ * Return 0 on success, errno on failure.
+ */
+static int
+netmap_loader(__unused struct module *module, int event, __unused void *arg)
+{
+	int error = 0;
+
+	switch (event) {
+	case MOD_LOAD:
+		error = netmap_init();
+		break;
+
+	case MOD_UNLOAD:
+		netmap_fini();
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	return (error);
+}
+
+
+DEV_MODULE(netmap, netmap_loader, NULL);


Property changes on: trunk/sys/dev/netmap/netmap_freebsd.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/netmap/netmap_generic.c
===================================================================
--- trunk/sys/dev/netmap/netmap_generic.c	                        (rev 0)
+++ trunk/sys/dev/netmap/netmap_generic.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,865 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This module implements netmap support on top of standard,
+ * unmodified device drivers.
+ *
+ * A NIOCREGIF request is handled here if the device does not
+ * have native support. TX and RX rings are emulated as follows:
+ *
+ * NIOCREGIF
+ *	We preallocate a block of TX mbufs (roughly as many as
+ *	tx descriptors; the number is not critical) to speed up
+ *	operation during transmissions. The refcount on most of
+ *	these buffers is artificially bumped up so we can recycle
+ *	them more easily. Also, the destructor is intercepted
+ *	so we use it as an interrupt notification to wake up
+ *	processes blocked on a poll().
+ *
+ *	For each receive ring we allocate one "struct mbq"
+ *	(an mbuf tailq plus a spinlock). We intercept packets
+ *	(through if_input)
+ *	on the receive path and put them in the mbq from which
+ *	netmap receive routines can grab them.
+ *
+ * TX:
+ *	in the generic_txsync() routine, netmap buffers are copied
+ *	(or linked, in a future) to the preallocated mbufs
+ *	and pushed to the transmit queue. Some of these mbufs
+ *	(those with NS_REPORT, or otherwise every half ring)
+ *	have the refcount=1, others have refcount=2.
+ *	When the destructor is invoked, we take that as
+ *	a notification that all mbufs up to that one in
+ *	the specific ring have been completed, and generate
+ *	the equivalent of a transmit interrupt.
+ *
+ * RX:
+ *
+ */
+
+#ifdef __FreeBSD__
+
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD: stable/10/sys/dev/netmap/netmap_generic.c 297478 2016-04-01 01:39:44Z np $");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>   /* PROT_EXEC */
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h>        /* bus_dmamap_* in netmap_kern.h */
+
+// XXX temporary - D() defined here
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#define rtnl_lock()	ND("rtnl_lock called")
+#define rtnl_unlock()	ND("rtnl_unlock called")
+#define MBUF_TXQ(m)	((m)->m_pkthdr.flowid)
+#define MBUF_RXQ(m)	((m)->m_pkthdr.flowid)
+#define smp_mb()
+
+/*
+ * FreeBSD mbuf allocator/deallocator in emulation mode:
+ *
+ * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE
+ * so that the destructor, if invoked, will not free the packet.
+ *    In principle we should set the destructor only on demand,
+ * but since there might be a race we better do it on allocation.
+ * As a consequence, we also need to set the destructor or we
+ * would leak buffers.
+ */
+
+/*
+ * mbuf wrappers
+ */
+
+/* mbuf destructor, also need to change the type to EXT_EXTREF,
+ * add an M_NOFREE flag, and then clear the flag and
+ * chain into uma_zfree(zone_pack, mf)
+ * (or reinstall the buffer ?)
+ */
+#define SET_MBUF_DESTRUCTOR(m, fn)	do {		\
+	(m)->m_ext.ext_free = (void *)fn;	\
+	(m)->m_ext.ext_type = EXT_EXTREF;	\
+} while (0)
+
+static void
+netmap_default_mbuf_destructor(struct mbuf *m)
+{
+	/* restore original mbuf */
+	m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1;
+	m->m_ext.ext_arg1 = NULL;
+	m->m_ext.ext_type = EXT_PACKET;
+	m->m_ext.ext_free = NULL;
+	if (GET_MBUF_REFCNT(m) == 0)
+		SET_MBUF_REFCNT(m, 1);
+	uma_zfree(zone_pack, m);
+}
+
+static inline struct mbuf *
+netmap_get_mbuf(int len)
+{
+	struct mbuf *m;
+	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+	if (m) {
+		m->m_flags |= M_NOFREE;	/* XXXNP: Almost certainly incorrect. */
+		m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save
+		m->m_ext.ext_free = (void *)netmap_default_mbuf_destructor;
+		m->m_ext.ext_type = EXT_EXTREF;
+		ND(5, "create m %p refcnt %d", m, GET_MBUF_REFCNT(m));
+	}
+	return m;
+}
+
+
+
+#else /* linux */
+
+#include "bsd_glue.h"
+
+#include <linux/rtnetlink.h>    /* rtnl_[un]lock() */
+#include <linux/ethtool.h>      /* struct ethtool_ops, get_ringparam */
+#include <linux/hrtimer.h>
+
+//#define REG_RESET
+
+#endif /* linux */
+
+
+/* Common headers. */
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+
+
+/* ======================== usage stats =========================== */
+
+#ifdef RATE_GENERIC
+#define IFRATE(x) x
+struct rate_stats {
+	unsigned long txpkt;
+	unsigned long txsync;
+	unsigned long txirq;
+	unsigned long rxpkt;
+	unsigned long rxirq;
+	unsigned long rxsync;
+};
+
+struct rate_context {
+	unsigned refcount;
+	struct timer_list timer;
+	struct rate_stats new;
+	struct rate_stats old;
+};
+
+#define RATE_PRINTK(_NAME_) \
+	printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
+#define RATE_PERIOD  2
+static void rate_callback(unsigned long arg)
+{
+	struct rate_context * ctx = (struct rate_context *)arg;
+	struct rate_stats cur = ctx->new;
+	int r;
+
+	RATE_PRINTK(txpkt);
+	RATE_PRINTK(txsync);
+	RATE_PRINTK(txirq);
+	RATE_PRINTK(rxpkt);
+	RATE_PRINTK(rxsync);
+	RATE_PRINTK(rxirq);
+	printk("\n");
+
+	ctx->old = cur;
+	r = mod_timer(&ctx->timer, jiffies +
+			msecs_to_jiffies(RATE_PERIOD * 1000));
+	if (unlikely(r))
+		D("[v1000] Error: mod_timer()");
+}
+
+static struct rate_context rate_ctx;
+
+void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi)
+{
+    if (txp) rate_ctx.new.txpkt++;
+    if (txs) rate_ctx.new.txsync++;
+    if (txi) rate_ctx.new.txirq++;
+    if (rxp) rate_ctx.new.rxpkt++;
+    if (rxs) rate_ctx.new.rxsync++;
+    if (rxi) rate_ctx.new.rxirq++;
+}
+
+#else /* !RATE */
+#define IFRATE(x)
+#endif /* !RATE */
+
+
+/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
+
+/*
+ * Wrapper used by the generic adapter layer to notify
+ * the poller threads. Differently from netmap_rx_irq(), we check
+ * only NAF_NETMAP_ON instead of NAF_NATIVE_ON to enable the irq.
+ */
+static void
+netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
+{
+	struct netmap_adapter *na = NA(ifp);
+	if (unlikely(!nm_netmap_on(na)))
+		return;
+
+	netmap_common_irq(ifp, q, work_done);
+}
+
+
+/* Enable/disable netmap mode for a generic network interface. */
+static int
+generic_netmap_register(struct netmap_adapter *na, int enable)
+{
+	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+	struct mbuf *m;
+	int error;
+	int i, r;
+
+	if (!na)
+		return EINVAL;
+
+#ifdef REG_RESET
+	error = ifp->netdev_ops->ndo_stop(ifp);
+	if (error) {
+		return error;
+	}
+#endif /* REG_RESET */
+
+	if (enable) { /* Enable netmap mode. */
+		/* Init the mitigation support on all the rx queues. */
+		gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
+					M_DEVBUF, M_NOWAIT | M_ZERO);
+		if (!gna->mit) {
+			D("mitigation allocation failed");
+			error = ENOMEM;
+			goto out;
+		}
+		for (r=0; r<na->num_rx_rings; r++)
+			netmap_mitigation_init(&gna->mit[r], r, na);
+
+		/* Initialize the rx queue, as generic_rx_handler() can
+		 * be called as soon as netmap_catch_rx() returns.
+		 */
+		for (r=0; r<na->num_rx_rings; r++) {
+			mbq_safe_init(&na->rx_rings[r].rx_queue);
+		}
+
+		/*
+		 * Preallocate packet buffers for the tx rings.
+		 */
+		for (r=0; r<na->num_tx_rings; r++)
+			na->tx_rings[r].tx_pool = NULL;
+		for (r=0; r<na->num_tx_rings; r++) {
+			na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
+					M_DEVBUF, M_NOWAIT | M_ZERO);
+			if (!na->tx_rings[r].tx_pool) {
+				D("tx_pool allocation failed");
+				error = ENOMEM;
+				goto free_tx_pools;
+			}
+			for (i=0; i<na->num_tx_desc; i++)
+				na->tx_rings[r].tx_pool[i] = NULL;
+			for (i=0; i<na->num_tx_desc; i++) {
+				m = netmap_get_mbuf(NETMAP_BUF_SIZE(na));
+				if (!m) {
+					D("tx_pool[%d] allocation failed", i);
+					error = ENOMEM;
+					goto free_tx_pools;
+				}
+				na->tx_rings[r].tx_pool[i] = m;
+			}
+		}
+		rtnl_lock();
+		/* Prepare to intercept incoming traffic. */
+		error = netmap_catch_rx(na, 1);
+		if (error) {
+			D("netdev_rx_handler_register() failed (%d)", error);
+			goto register_handler;
+		}
+		na->na_flags |= NAF_NETMAP_ON;
+
+		/* Make netmap control the packet steering. */
+		netmap_catch_tx(gna, 1);
+
+		rtnl_unlock();
+
+#ifdef RATE_GENERIC
+		if (rate_ctx.refcount == 0) {
+			D("setup_timer()");
+			memset(&rate_ctx, 0, sizeof(rate_ctx));
+			setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
+			if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
+				D("Error: mod_timer()");
+			}
+		}
+		rate_ctx.refcount++;
+#endif /* RATE */
+
+	} else if (na->tx_rings[0].tx_pool) {
+		/* Disable netmap mode. We enter here only if the previous
+		   generic_netmap_register(na, 1) was successfull.
+		   If it was not, na->tx_rings[0].tx_pool was set to NULL by the
+		   error handling code below. */
+		rtnl_lock();
+
+		na->na_flags &= ~NAF_NETMAP_ON;
+
+		/* Release packet steering control. */
+		netmap_catch_tx(gna, 0);
+
+		/* Do not intercept packets on the rx path. */
+		netmap_catch_rx(na, 0);
+
+		rtnl_unlock();
+
+		/* Free the mbufs going to the netmap rings */
+		for (r=0; r<na->num_rx_rings; r++) {
+			mbq_safe_purge(&na->rx_rings[r].rx_queue);
+			mbq_safe_destroy(&na->rx_rings[r].rx_queue);
+		}
+
+		for (r=0; r<na->num_rx_rings; r++)
+			netmap_mitigation_cleanup(&gna->mit[r]);
+		free(gna->mit, M_DEVBUF);
+
+		for (r=0; r<na->num_tx_rings; r++) {
+			for (i=0; i<na->num_tx_desc; i++) {
+				m_freem(na->tx_rings[r].tx_pool[i]);
+			}
+			free(na->tx_rings[r].tx_pool, M_DEVBUF);
+		}
+
+#ifdef RATE_GENERIC
+		if (--rate_ctx.refcount == 0) {
+			D("del_timer()");
+			del_timer(&rate_ctx.timer);
+		}
+#endif
+	}
+
+#ifdef REG_RESET
+	error = ifp->netdev_ops->ndo_open(ifp);
+	if (error) {
+		goto free_tx_pools;
+	}
+#endif
+
+	return 0;
+
+register_handler:
+	rtnl_unlock();
+free_tx_pools:
+	for (r=0; r<na->num_tx_rings; r++) {
+		if (na->tx_rings[r].tx_pool == NULL)
+			continue;
+		for (i=0; i<na->num_tx_desc; i++)
+			if (na->tx_rings[r].tx_pool[i])
+				m_freem(na->tx_rings[r].tx_pool[i]);
+		free(na->tx_rings[r].tx_pool, M_DEVBUF);
+		na->tx_rings[r].tx_pool = NULL;
+	}
+	for (r=0; r<na->num_rx_rings; r++) {
+		netmap_mitigation_cleanup(&gna->mit[r]);
+		mbq_safe_destroy(&na->rx_rings[r].rx_queue);
+	}
+	free(gna->mit, M_DEVBUF);
+out:
+
+	return error;
+}
+
+/*
+ * Callback invoked when the device driver frees an mbuf used
+ * by netmap to transmit a packet. This usually happens when
+ * the NIC notifies the driver that transmission is completed.
+ */
+static void
+generic_mbuf_destructor(struct mbuf *m)
+{
+	netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
+#ifdef __FreeBSD__
+	if (netmap_verbose)
+		RD(5, "Tx irq (%p) queue %d index %d" , m, MBUF_TXQ(m), (int)(uintptr_t)m->m_ext.ext_arg1);
+	netmap_default_mbuf_destructor(m);
+#endif /* __FreeBSD__ */
+	IFRATE(rate_ctx.new.txirq++);
+}
+
+extern int netmap_adaptive_io;
+
+/* Record completed transmissions and update hwtail.
+ *
+ * The oldest tx buffer not yet completed is at nr_hwtail + 1,
+ * nr_hwcur is the first unsent buffer.
+ */
+static u_int
+generic_netmap_tx_clean(struct netmap_kring *kring)
+{
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int nm_i = nm_next(kring->nr_hwtail, lim);
+	u_int hwcur = kring->nr_hwcur;
+	u_int n = 0;
+	struct mbuf **tx_pool = kring->tx_pool;
+
+	while (nm_i != hwcur) { /* buffers not completed */
+		struct mbuf *m = tx_pool[nm_i];
+
+		if (unlikely(m == NULL)) {
+			/* this is done, try to replenish the entry */
+			tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(kring->na));
+			if (unlikely(m == NULL)) {
+				D("mbuf allocation failed, XXX error");
+				// XXX how do we proceed ? break ?
+				return -ENOMEM;
+			}
+		} else if (GET_MBUF_REFCNT(m) != 1) {
+			break; /* This mbuf is still busy: its refcnt is 2. */
+		}
+		n++;
+		nm_i = nm_next(nm_i, lim);
+#if 0 /* rate adaptation */
+		if (netmap_adaptive_io > 1) {
+			if (n >= netmap_adaptive_io)
+				break;
+		} else if (netmap_adaptive_io) {
+			/* if hwcur - nm_i < lim/8 do an early break
+			 * so we prevent the sender from stalling. See CVT.
+			 */
+			if (hwcur >= nm_i) {
+				if (hwcur - nm_i < lim/2)
+					break;
+			} else {
+				if (hwcur + lim + 1 - nm_i < lim/2)
+					break;
+			}
+		}
+#endif
+	}
+	kring->nr_hwtail = nm_prev(nm_i, lim);
+	ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
+
+	return n;
+}
+
+
+/*
+ * We have pending packets in the driver between nr_hwtail +1 and hwcur.
+ * Compute a position in the middle, to be used to generate
+ * a notification.
+ */
+static inline u_int
+generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
+{
+	u_int n = kring->nkr_num_slots;
+	u_int ntc = nm_next(kring->nr_hwtail, n-1);
+	u_int e;
+
+	if (hwcur >= ntc) {
+		e = (hwcur + ntc) / 2;
+	} else { /* wrap around */
+		e = (hwcur + n + ntc) / 2;
+		if (e >= n) {
+			e -= n;
+		}
+	}
+
+	if (unlikely(e >= n)) {
+		D("This cannot happen");
+		e = 0;
+	}
+
+	return e;
+}
+
+/*
+ * We have pending packets in the driver between nr_hwtail+1 and hwcur.
+ * Schedule a notification approximately in the middle of the two.
+ * There is a race but this is only called within txsync which does
+ * a double check.
+ */
+static void
+generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
+{
+	struct mbuf *m;
+	u_int e;
+
+	if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) {
+		return; /* all buffers are free */
+	}
+	e = generic_tx_event_middle(kring, hwcur);
+
+	m = kring->tx_pool[e];
+	ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? GET_MBUF_REFCNT(m) : -2 );
+	if (m == NULL) {
+		/* This can happen if there is already an event on the netmap
+		   slot 'e': There is nothing to do. */
+		return;
+	}
+	kring->tx_pool[e] = NULL;
+	SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
+
+	// XXX wmb() ?
+	/* Decrement the refcount an free it if we have the last one. */
+	m_freem(m);
+	smp_mb();
+}
+
+
+/*
+ * generic_netmap_txsync() transforms netmap buffers into mbufs
+ * and passes them to the standard device driver
+ * (ndo_start_xmit() or ifp->if_transmit() ).
+ * On linux this is not done directly, but using dev_queue_xmit(),
+ * since it implements the TX flow control (and takes some locks).
+ */
+static int
+generic_netmap_txsync(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+	struct ifnet *ifp = na->ifp;
+	struct netmap_ring *ring = kring->ring;
+	u_int nm_i;	/* index into the netmap ring */ // j
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = kring->rhead;
+	u_int ring_nr = kring->ring_id;
+
+	IFRATE(rate_ctx.new.txsync++);
+
+	// TODO: handle the case of mbuf allocation failure
+
+	rmb();
+
+	/*
+	 * First part: process new packets to send.
+	 */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {	/* we have new packets to send */
+		while (nm_i != head) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			u_int len = slot->len;
+			void *addr = NMB(na, slot);
+
+			/* device-specific */
+			struct mbuf *m;
+			int tx_ret;
+
+			NM_CHECK_ADDR_LEN(na, addr, len);
+
+			/* Tale a mbuf from the tx pool and copy in the user packet. */
+			m = kring->tx_pool[nm_i];
+			if (unlikely(!m)) {
+				RD(5, "This should never happen");
+				kring->tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(na));
+				if (unlikely(m == NULL)) {
+					D("mbuf allocation failed");
+					break;
+				}
+			}
+			/* XXX we should ask notifications when NS_REPORT is set,
+			 * or roughly every half frame. We can optimize this
+			 * by lazily requesting notifications only when a
+			 * transmission fails. Probably the best way is to
+			 * break on failures and set notifications when
+			 * ring->cur == ring->tail || nm_i != cur
+			 */
+			tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
+			if (unlikely(tx_ret)) {
+				ND(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
+						tx_ret, nm_i, head, kring->nr_hwtail);
+				/*
+				 * No room for this mbuf in the device driver.
+				 * Request a notification FOR A PREVIOUS MBUF,
+				 * then call generic_netmap_tx_clean(kring) to do the
+				 * double check and see if we can free more buffers.
+				 * If there is space continue, else break;
+				 * NOTE: the double check is necessary if the problem
+				 * occurs in the txsync call after selrecord().
+				 * Also, we need some way to tell the caller that not
+				 * all buffers were queued onto the device (this was
+				 * not a problem with native netmap driver where space
+				 * is preallocated). The bridge has a similar problem
+				 * and we solve it there by dropping the excess packets.
+				 */
+				generic_set_tx_event(kring, nm_i);
+				if (generic_netmap_tx_clean(kring)) { /* space now available */
+					continue;
+				} else {
+					break;
+				}
+			}
+			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+			nm_i = nm_next(nm_i, lim);
+			IFRATE(rate_ctx.new.txpkt ++);
+		}
+
+		/* Update hwcur to the next slot to transmit. */
+		kring->nr_hwcur = nm_i; /* not head, we could break early */
+	}
+
+	/*
+	 * Second, reclaim completed buffers
+	 */
+	if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
+		/* No more available slots? Set a notification event
+		 * on a netmap slot that will be cleaned in the future.
+		 * No doublecheck is performed, since txsync() will be
+		 * called twice by netmap_poll().
+		 */
+		generic_set_tx_event(kring, nm_i);
+	}
+	ND("tx #%d, hwtail = %d", n, kring->nr_hwtail);
+
+	generic_netmap_tx_clean(kring);
+
+	nm_txsync_finalize(kring);
+
+	return 0;
+}
+
+
+/*
+ * This handler is registered (through netmap_catch_rx())
+ * within the attached network interface
+ * in the RX subsystem, so that every mbuf passed up by
+ * the driver can be stolen to the network stack.
+ * Stolen packets are put in a queue where the
+ * generic_netmap_rxsync() callback can extract them.
+ */
+void
+generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
+{
+	struct netmap_adapter *na = NA(ifp);
+	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+	u_int work_done;
+	u_int rr = MBUF_RXQ(m); // receive ring number
+
+	if (rr >= na->num_rx_rings) {
+		rr = rr % na->num_rx_rings; // XXX expensive...
+	}
+
+	/* limit the size of the queue */
+	if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
+		m_freem(m);
+	} else {
+		mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
+	}
+
+	if (netmap_generic_mit < 32768) {
+		/* no rx mitigation, pass notification up */
+		netmap_generic_irq(na->ifp, rr, &work_done);
+		IFRATE(rate_ctx.new.rxirq++);
+	} else {
+		/* same as send combining, filter notification if there is a
+		 * pending timer, otherwise pass it up and start a timer.
+		 */
+		if (likely(netmap_mitigation_active(&gna->mit[rr]))) {
+			/* Record that there is some pending work. */
+			gna->mit[rr].mit_pending = 1;
+		} else {
+			netmap_generic_irq(na->ifp, rr, &work_done);
+			IFRATE(rate_ctx.new.rxirq++);
+			netmap_mitigation_start(&gna->mit[rr]);
+		}
+	}
+}
+
+/*
+ * generic_netmap_rxsync() extracts mbufs from the queue filled by
+ * generic_netmap_rx_handler() and puts their content in the netmap
+ * receive ring.
+ * Access must be protected because the rx handler is asynchronous,
+ */
+static int
+generic_netmap_rxsync(struct netmap_kring *kring, int flags)
+{
+	struct netmap_ring *ring = kring->ring;
+	struct netmap_adapter *na = kring->na;
+	u_int nm_i;	/* index into the netmap ring */ //j,
+	u_int n;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const head = nm_rxsync_prologue(kring);
+	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
+
+	if (head > lim)
+		return netmap_ring_reinit(kring);
+
+	/*
+	 * First part: import newly received packets.
+	 */
+	if (netmap_no_pendintr || force_update) {
+		/* extract buffers from the rx queue, stop at most one
+		 * slot before nr_hwcur (stop_i)
+		 */
+		uint16_t slot_flags = kring->nkr_slot_flags;
+		u_int stop_i = nm_prev(kring->nr_hwcur, lim);
+
+		nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
+		for (n = 0; nm_i != stop_i; n++) {
+			int len;
+			void *addr = NMB(na, &ring->slot[nm_i]);
+			struct mbuf *m;
+
+			/* we only check the address here on generic rx rings */
+			if (addr == NETMAP_BUF_BASE(na)) { /* Bad buffer */
+				return netmap_ring_reinit(kring);
+			}
+			/*
+			 * Call the locked version of the function.
+			 * XXX Ideally we could grab a batch of mbufs at once
+			 * and save some locking overhead.
+			 */
+			m = mbq_safe_dequeue(&kring->rx_queue);
+			if (!m)	/* no more data */
+				break;
+			len = MBUF_LEN(m);
+			m_copydata(m, 0, len, addr);
+			ring->slot[nm_i].len = len;
+			ring->slot[nm_i].flags = slot_flags;
+			m_freem(m);
+			nm_i = nm_next(nm_i, lim);
+		}
+		if (n) {
+			kring->nr_hwtail = nm_i;
+			IFRATE(rate_ctx.new.rxpkt += n);
+		}
+		kring->nr_kflags &= ~NKR_PENDINTR;
+	}
+
+	// XXX should we invert the order ?
+	/*
+	 * Second part: skip past packets that userspace has released.
+	 */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		/* Userspace has released some packets. */
+		for (n = 0; nm_i != head; n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+
+			slot->flags &= ~NS_BUF_CHANGED;
+			nm_i = nm_next(nm_i, lim);
+		}
+		kring->nr_hwcur = head;
+	}
+	/* tell userspace that there might be new packets. */
+	nm_rxsync_finalize(kring);
+	IFRATE(rate_ctx.new.rxsync++);
+
+	return 0;
+}
+
+static void
+generic_netmap_dtor(struct netmap_adapter *na)
+{
+	struct ifnet *ifp = na->ifp;
+	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
+	struct netmap_adapter *prev_na = gna->prev;
+
+	if (prev_na != NULL) {
+		D("Released generic NA %p", gna);
+		if_rele(na->ifp);
+		netmap_adapter_put(prev_na);
+	}
+	if (ifp != NULL) {
+		WNA(ifp) = prev_na;
+		D("Restored native NA %p", prev_na);
+		na->ifp = NULL;
+	}
+}
+
+/*
+ * generic_netmap_attach() makes it possible to use netmap on
+ * a device without native netmap support.
+ * This is less performant than native support but potentially
+ * faster than raw sockets or similar schemes.
+ *
+ * In this "emulated" mode, netmap rings do not necessarily
+ * have the same size as those in the NIC. We use a default
+ * value and possibly override it if the OS has ways to fetch the
+ * actual configuration.
+ */
+int
+generic_netmap_attach(struct ifnet *ifp)
+{
+	struct netmap_adapter *na;
+	struct netmap_generic_adapter *gna;
+	int retval;
+	u_int num_tx_desc, num_rx_desc;
+
+	num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
+
+	generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */
+	ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
+	if (num_tx_desc == 0 || num_rx_desc == 0) {
+		D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
+		return EINVAL;
+	}
+
+	gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (gna == NULL) {
+		D("no memory on attach, give up");
+		return ENOMEM;
+	}
+	na = (struct netmap_adapter *)gna;
+	na->ifp = ifp;
+	na->num_tx_desc = num_tx_desc;
+	na->num_rx_desc = num_rx_desc;
+	na->nm_register = &generic_netmap_register;
+	na->nm_txsync = &generic_netmap_txsync;
+	na->nm_rxsync = &generic_netmap_rxsync;
+	na->nm_dtor = &generic_netmap_dtor;
+	/* when using generic, NAF_NETMAP_ON is set so we force
+	 * NAF_SKIP_INTR to use the regular interrupt handler
+	 */
+	na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
+
+	ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
+			ifp->num_tx_queues, ifp->real_num_tx_queues,
+			ifp->tx_queue_len);
+	ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
+			ifp->num_rx_queues, ifp->real_num_rx_queues);
+
+	generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
+
+	retval = netmap_attach_common(na);
+	if (retval) {
+		free(gna, M_DEVBUF);
+	}
+
+	return retval;
+}


Property changes on: trunk/sys/dev/netmap/netmap_generic.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/netmap/netmap_kern.h
===================================================================
--- trunk/sys/dev/netmap/netmap_kern.h	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/netmap/netmap_kern.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,5 +1,7 @@
+/* $MidnightBSD$ */
 /*
- * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -24,8 +26,7 @@
  */
 
 /*
- * $MidnightBSD$
- * $Id: netmap_kern.h,v 1.2 2013-01-08 03:53:24 laffer1 Exp $
+ * $FreeBSD: stable/10/sys/dev/netmap/netmap_kern.h 278779 2015-02-14 19:41:26Z luigi $
  *
  * The header contains the definitions of constants and function
  * prototypes used only in kernelspace.
@@ -34,39 +35,108 @@
 #ifndef _NET_NETMAP_KERN_H_
 #define _NET_NETMAP_KERN_H_
 
+#define WITH_VALE	// comment out to disable VALE support
+#define WITH_PIPES
+#define WITH_MONITOR
+#define WITH_GENERIC
+
 #if defined(__FreeBSD__)
 
-#define likely(x)	__builtin_expect(!!(x), 1)
-#define unlikely(x)	__builtin_expect(!!(x), 0)
+#define likely(x)	__builtin_expect((long)!!(x), 1L)
+#define unlikely(x)	__builtin_expect((long)!!(x), 0L)
 
 #define	NM_LOCK_T	struct mtx
-#define	NM_SELINFO_T	struct selinfo
+
+/* netmap global lock */
+#define	NMG_LOCK_T	struct sx
+#define NMG_LOCK_INIT()	sx_init(&netmap_global_lock, \
+				"netmap global lock")
+#define NMG_LOCK_DESTROY()	sx_destroy(&netmap_global_lock)
+#define NMG_LOCK()	sx_xlock(&netmap_global_lock)
+#define NMG_UNLOCK()	sx_xunlock(&netmap_global_lock)
+#define NMG_LOCK_ASSERT()	sx_assert(&netmap_global_lock, SA_XLOCKED)
+
+#define	NM_SELINFO_T	struct nm_selinfo
 #define	MBUF_LEN(m)	((m)->m_pkthdr.len)
-#define	NM_SEND_UP(ifp, m)	((ifp)->if_input)(ifp, m)
+#define	MBUF_IFP(m)	((m)->m_pkthdr.rcvif)
+#define	NM_SEND_UP(ifp, m)	((NA(ifp))->if_input)(ifp, m)
 
+#define NM_ATOMIC_T	volatile int	// XXX ?
+/* atomic operations */
+#include <machine/atomic.h>
+#define NM_ATOMIC_TEST_AND_SET(p)       (!atomic_cmpset_acq_int((p), 0, 1))
+#define NM_ATOMIC_CLEAR(p)              atomic_store_rel_int((p), 0)
+
+#if __FreeBSD_version >= 1100030
+#define	WNA(_ifp)	(_ifp)->if_netmap
+#else /* older FreeBSD */
+#define	WNA(_ifp)	(_ifp)->if_pspare[0]
+#endif /* older FreeBSD */
+
+#if __FreeBSD_version >= 1100005
+struct netmap_adapter *netmap_getna(if_t ifp);
+#endif
+
+#if __FreeBSD_version >= 1100027
+#define GET_MBUF_REFCNT(m)      ((m)->m_ext.ext_cnt ? *((m)->m_ext.ext_cnt) : -1)
+#define SET_MBUF_REFCNT(m, x)   *((m)->m_ext.ext_cnt) = x
+#define PNT_MBUF_REFCNT(m)      ((m)->m_ext.ext_cnt)
+#else
+#define GET_MBUF_REFCNT(m)      ((m)->m_ext.ref_cnt ? *((m)->m_ext.ref_cnt) : -1)
+#define SET_MBUF_REFCNT(m, x)   *((m)->m_ext.ref_cnt) = x
+#define PNT_MBUF_REFCNT(m)      ((m)->m_ext.ref_cnt)
+#endif
+
+MALLOC_DECLARE(M_NETMAP);
+
+struct nm_selinfo {
+	struct selinfo si;
+	struct mtx m;
+};
+
+void freebsd_selwakeup(struct nm_selinfo *si, int pri);
+
+// XXX linux struct, not used in FreeBSD
+struct net_device_ops {
+};
+struct ethtool_ops {
+};
+struct hrtimer {
+};
+
 #elif defined (linux)
 
 #define	NM_LOCK_T	safe_spinlock_t	// see bsd_glue.h
 #define	NM_SELINFO_T	wait_queue_head_t
 #define	MBUF_LEN(m)	((m)->len)
-#define	NM_SEND_UP(ifp, m)	netif_rx(m)
+#define	MBUF_IFP(m)	((m)->dev)
+#define	NM_SEND_UP(ifp, m)  \
+                        do { \
+                            m->priority = NM_MAGIC_PRIORITY_RX; \
+                            netif_rx(m); \
+                        } while (0)
 
+#define NM_ATOMIC_T	volatile long unsigned int
+
+#define NM_MTX_T		struct mutex
+#define NM_MTX_INIT(m, s)	do { (void)s; mutex_init(&(m)); } while (0)
+#define NM_MTX_DESTROY(m)	do { (void)m; } while (0)
+#define NM_MTX_LOCK(m)		mutex_lock(&(m))
+#define NM_MTX_UNLOCK(m)	mutex_unlock(&(m))
+#define NM_MTX_LOCK_ASSERT(m)	mutex_is_locked(&(m))
+
+#define	NMG_LOCK_T		NM_MTX_T
+#define	NMG_LOCK_INIT()		NM_MTX_INIT(netmap_global_lock, \
+					"netmap_global_lock")
+#define	NMG_LOCK_DESTROY()	NM_MTX_DESTROY(netmap_global_lock)
+#define	NMG_LOCK()		NM_MTX_LOCK(netmap_global_lock)
+#define	NMG_UNLOCK()		NM_MTX_UNLOCK(netmap_global_lock)
+#define	NMG_LOCK_ASSERT()	NM_MTX_LOCK_ASSERT(netmap_global_lock)
+
 #ifndef DEV_NETMAP
 #define DEV_NETMAP
-#endif
+#endif /* DEV_NETMAP */
 
-/*
- * IFCAP_NETMAP goes into net_device's priv_flags (if_capenable).
- * This was 16 bits up to linux 2.6.36, so we need a 16 bit value on older
- * platforms and tolerate the clash with IFF_DYNAMIC and IFF_BRIDGE_PORT.
- * For the 32-bit value, 0x100000 has no clashes until at least 3.5.1
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
-#define IFCAP_NETMAP	0x8000
-#else
-#define IFCAP_NETMAP	0x100000
-#endif
-
 #elif defined (__APPLE__)
 
 #warning apple support is incomplete.
@@ -88,9 +158,9 @@
 	do {							\
 		struct timeval __xxts;				\
 		microtime(&__xxts);				\
-		printf("%03d.%06d %s [%d] " format "\n",	\
+		printf("%03d.%06d [%4d] %-25s " format "\n",	\
 		(int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec,	\
-		__FUNCTION__, __LINE__, ##__VA_ARGS__);		\
+		__LINE__, __FUNCTION__, ##__VA_ARGS__);		\
 	} while (0)
 
 /* rate limited, lps indicates how many per second */
@@ -106,21 +176,38 @@
 	} while (0)
 
 struct netmap_adapter;
+struct nm_bdg_fwd;
+struct nm_bridge;
+struct netmap_priv_d;
 
+const char *nm_dump_buf(char *p, int len, int lim, char *dst);
+
+#include "netmap_mbq.h"
+
+extern NMG_LOCK_T	netmap_global_lock;
+
 /*
  * private, kernel view of a ring. Keeps track of the status of
  * a ring across system calls.
  *
  *	nr_hwcur	index of the next buffer to refill.
- *			It corresponds to ring->cur - ring->reserved
+ *			It corresponds to ring->head
+ *			at the time the system call returns.
  *
- *	nr_hwavail	the number of slots "owned" by userspace.
- *			nr_hwavail =:= ring->avail + ring->reserved
+ *	nr_hwtail	index of the first buffer owned by the kernel.
+ *			On RX, hwcur->hwtail are receive buffers
+ *			not yet released. hwcur is advanced following
+ *			ring->head, hwtail is advanced on incoming packets,
+ *			and a wakeup is generated when hwtail passes ring->cur
+ *			    On TX, hwcur->rcur have been filled by the sender
+ *			but not sent yet to the NIC; rcur->hwtail are available
+ *			for new transmissions, and hwtail->hwcur-1 are pending
+ *			transmissions not yet acknowledged.
  *
  * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots.
  * This is so that, on a reset, buffers owned by userspace are not
  * modified by the kernel. In particular:
- * RX rings: the next empty buffer (hwcur + hwavail + hwofs) coincides with
+ * RX rings: the next empty buffer (hwtail + hwofs) coincides with
  * 	the next empty buffer as known by the hardware (next_to_check or so).
  * TX rings: hwcur + hwofs coincides with next_to_send
  *
@@ -127,55 +214,259 @@
  * For received packets, slot->flags is set to nkr_slot_flags
  * so we can provide a proper initial value (e.g. set NS_FORWARD
  * when operating in 'transparent' mode).
+ *
+ * The following fields are used to implement lock-free copy of packets
+ * from input to output ports in VALE switch:
+ *	nkr_hwlease	buffer after the last one being copied.
+ *			A writer in nm_bdg_flush reserves N buffers
+ *			from nr_hwlease, advances it, then does the
+ *			copy outside the lock.
+ *			In RX rings (used for VALE ports),
+ *			nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1
+ *			In TX rings (used for NIC or host stack ports)
+ *			nkr_hwcur <= nkr_hwlease < nkr_hwtail
+ *	nkr_leases	array of nkr_num_slots where writers can report
+ *			completion of their block. NR_NOSLOT (~0) indicates
+ *			that the writer has not finished yet
+ *	nkr_lease_idx	index of next free slot in nr_leases, to be assigned
+ *
+ * The kring is manipulated by txsync/rxsync and generic netmap function.
+ *
+ * Concurrent rxsync or txsync on the same ring are prevented through
+ * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need
+ * for NIC rings, and for TX rings attached to the host stack.
+ *
+ * RX rings attached to the host stack use an mbq (rx_queue) on both
+ * rxsync_from_host() and netmap_transmit(). The mbq is protected
+ * by its internal lock.
+ *
+ * RX rings attached to the VALE switch are accessed by both senders
+ * and receiver. They are protected through the q_lock on the RX ring.
  */
 struct netmap_kring {
-	struct netmap_ring *ring;
-	u_int nr_hwcur;
-	int nr_hwavail;
-	u_int nr_kflags;	/* private driver flags */
-#define NKR_PENDINTR	0x1	// Pending interrupt.
-	u_int nkr_num_slots;
+	struct netmap_ring	*ring;
 
+	uint32_t	nr_hwcur;
+	uint32_t	nr_hwtail;
+
+	/*
+	 * Copies of values in user rings, so we do not need to look
+	 * at the ring (which could be modified). These are set in the
+	 * *sync_prologue()/finalize() routines.
+	 */
+	uint32_t	rhead;
+	uint32_t	rcur;
+	uint32_t	rtail;
+
+	uint32_t	nr_kflags;	/* private driver flags */
+#define NKR_PENDINTR	0x1		// Pending interrupt.
+	uint32_t	nkr_num_slots;
+
+	/*
+	 * On a NIC reset, the NIC ring indexes may be reset but the
+	 * indexes in the netmap rings remain the same. nkr_hwofs
+	 * keeps track of the offset between the two.
+	 */
+	int32_t		nkr_hwofs;
+
 	uint16_t	nkr_slot_flags;	/* initial value for flags */
-	int	nkr_hwofs;	/* offset between NIC and netmap ring */
+
+	/* last_reclaim is opaque marker to help reduce the frequency
+	 * of operations such as reclaiming tx buffers. A possible use
+	 * is set it to ticks and do the reclaim only once per tick.
+	 */
+	uint64_t	last_reclaim;
+
+
+	NM_SELINFO_T	si;		/* poll/select wait queue */
+	NM_LOCK_T	q_lock;		/* protects kring and ring. */
+	NM_ATOMIC_T	nr_busy;	/* prevent concurrent syscalls */
+
 	struct netmap_adapter *na;
-	NM_SELINFO_T si;	/* poll/select wait queue */
-	NM_LOCK_T q_lock;	/* used if no device lock available */
+
+	/* The following fields are for VALE switch support */
+	struct nm_bdg_fwd *nkr_ft;
+	uint32_t	*nkr_leases;
+#define NR_NOSLOT	((uint32_t)~0)	/* used in nkr_*lease* */
+	uint32_t	nkr_hwlease;
+	uint32_t	nkr_lease_idx;
+
+	/* while nkr_stopped is set, no new [tr]xsync operations can
+	 * be started on this kring.
+	 * This is used by netmap_disable_all_rings()
+	 * to find a synchronization point where critical data
+	 * structures pointed to by the kring can be added or removed
+	 */
+	volatile int nkr_stopped;
+
+	/* Support for adapters without native netmap support.
+	 * On tx rings we preallocate an array of tx buffers
+	 * (same size as the netmap ring), on rx rings we
+	 * store incoming mbufs in a queue that is drained by
+	 * a rxsync.
+	 */
+	struct mbuf **tx_pool;
+	// u_int nr_ntc;		/* Emulation of a next-to-clean RX ring pointer. */
+	struct mbq rx_queue;            /* intercepted rx mbufs. */
+
+	uint32_t	ring_id;	/* debugging */
+	char name[64];			/* diagnostic */
+
+	/* [tx]sync callback for this kring.
+	 * The default nm_kring_create callback (netmap_krings_create)
+	 * sets the nm_sync callback of each hardware tx(rx) kring to
+	 * the corresponding nm_txsync(nm_rxsync) taken from the
+	 * netmap_adapter; moreover, it sets the sync callback
+	 * of the host tx(rx) ring to netmap_txsync_to_host
+	 * (netmap_rxsync_from_host).
+	 *
+	 * Overrides: the above configuration is not changed by
+	 * any of the nm_krings_create callbacks.
+	 */
+	int (*nm_sync)(struct netmap_kring *kring, int flags);
+
+#ifdef WITH_PIPES
+	struct netmap_kring *pipe;	/* if this is a pipe ring,
+					 * pointer to the other end
+					 */
+	struct netmap_ring *save_ring;	/* pointer to hidden rings
+       					 * (see netmap_pipe.c for details)
+					 */
+#endif /* WITH_PIPES */
+
+#ifdef WITH_MONITOR
+	/* pointer to the adapter that is monitoring this kring (if any)
+	 */
+	struct netmap_monitor_adapter *monitor;
+	/*
+	 * Monitors work by intercepting the txsync and/or rxsync of the
+	 * monitored krings. This is implemented by replacing
+	 * the nm_sync pointer above and saving the previous
+	 * one in save_sync below.
+	 */
+	int (*save_sync)(struct netmap_kring *kring, int flags);
+#endif
 } __attribute__((__aligned__(64)));
 
+
+/* return the next index, with wraparound */
+static inline uint32_t
+nm_next(uint32_t i, uint32_t lim)
+{
+	return unlikely (i == lim) ? 0 : i + 1;
+}
+
+
+/* return the previous index, with wraparound */
+static inline uint32_t
+nm_prev(uint32_t i, uint32_t lim)
+{
+	return unlikely (i == 0) ? lim : i - 1;
+}
+
+
 /*
- * This struct extends the 'struct adapter' (or
- * equivalent) device descriptor. It contains all fields needed to
- * support netmap operation.
+ *
+ * Here is the layout for the Rx and Tx rings.
+
+       RxRING                            TxRING
+
+      +-----------------+            +-----------------+
+      |                 |            |                 |
+      |XXX free slot XXX|            |XXX free slot XXX|
+      +-----------------+            +-----------------+
+head->| owned by user   |<-hwcur     | not sent to nic |<-hwcur
+      |                 |            | yet             |
+      +-----------------+            |                 |
+ cur->| available to    |            |                 |
+      | user, not read  |            +-----------------+
+      | yet             |       cur->| (being          |
+      |                 |            |  prepared)      |
+      |                 |            |                 |
+      +-----------------+            +     ------      +
+tail->|                 |<-hwtail    |                 |<-hwlease
+      | (being          | ...        |                 | ...
+      |  prepared)      | ...        |                 | ...
+      +-----------------+ ...        |                 | ...
+      |                 |<-hwlease   +-----------------+
+      |                 |      tail->|                 |<-hwtail
+      |                 |            |                 |
+      |                 |            |                 |
+      |                 |            |                 |
+      +-----------------+            +-----------------+
+
+ * The cur/tail (user view) and hwcur/hwtail (kernel view)
+ * are used in the normal operation of the card.
+ *
+ * When a ring is the output of a switch port (Rx ring for
+ * a VALE port, Tx ring for the host stack or NIC), slots
+ * are reserved in blocks through 'hwlease' which points
+ * to the next unused slot.
+ * On an Rx ring, hwlease is always after hwtail,
+ * and completions cause hwtail to advance.
+ * On a Tx ring, hwlease is always between cur and hwtail,
+ * and completions cause cur to advance.
+ *
+ * nm_kr_space() returns the maximum number of slots that
+ * can be assigned.
+ * nm_kr_lease() reserves the required number of buffers,
+ *    advances nkr_hwlease and also returns an entry in
+ *    a circular array where completions should be reported.
  */
+
+
+
+enum txrx { NR_RX = 0, NR_TX = 1 };
+
+struct netmap_vp_adapter; // forward
+
+/*
+ * The "struct netmap_adapter" extends the "struct adapter"
+ * (or equivalent) device descriptor.
+ * It contains all base fields needed to support netmap operation.
+ * There are in fact different types of netmap adapters
+ * (native, generic, VALE switch...) so a netmap_adapter is
+ * just the first field in the derived type.
+ */
 struct netmap_adapter {
 	/*
 	 * On linux we do not have a good way to tell if an interface
-	 * is netmap-capable. So we use the following trick:
+	 * is netmap-capable. So we always use the following trick:
 	 * NA(ifp) points here, and the first entry (which hopefully
 	 * always exists and is at least 32 bits) contains a magic
 	 * value which we can use to detect that the interface is good.
 	 */
 	uint32_t magic;
-	uint32_t na_flags;	/* future place for IFCAP_NETMAP */
+	uint32_t na_flags;	/* enabled, and other flags */
 #define NAF_SKIP_INTR	1	/* use the regular interrupt handler.
 				 * useful during initialization
 				 */
-	int refcount; /* number of user-space descriptors using this
+#define NAF_SW_ONLY	2	/* forward packets only to sw adapter */
+#define NAF_BDG_MAYSLEEP 4	/* the bridge is allowed to sleep when
+				 * forwarding packets coming from this
+				 * interface
+				 */
+#define NAF_MEM_OWNER	8	/* the adapter is responsible for the
+				 * deallocation of the memory allocator
+				 */
+#define NAF_NATIVE_ON   16      /* the adapter is native and the attached
+				 * interface is in netmap mode.
+				 * Virtual ports (vale, pipe, monitor...)
+				 * should never use this flag.
+				 */
+#define	NAF_NETMAP_ON	32	/* netmap is active (either native or
+				 * emulated). Where possible (e.g. FreeBSD)
+				 * IFCAP_NETMAP also mirrors this flag.
+				 */
+#define NAF_HOST_RINGS  64	/* the adapter supports the host rings */
+#define NAF_FORCE_NATIVE 128	/* the adapter is always NATIVE */
+#define	NAF_BUSY	(1U<<31) /* the adapter is used internally and
+				  * cannot be registered from userspace
+				  */
+	int active_fds; /* number of user-space descriptors using this
 			 interface, which is equal to the number of
 			 struct netmap_if objs in the mapped region. */
-	/*
-	 * The selwakeup in the interrupt thread can use per-ring
-	 * and/or global wait queues. We track how many clients
-	 * of each type we have so we can optimize the drivers,
-	 * and especially avoid huge contention on the locks.
-	 */
-	int na_single;	/* threads attached to a single hw queue */
-	int na_multi;	/* threads attached to multiple hw queues */
 
-	int separate_locks; /* set if the interface suports different
-			       locks for rx, tx and core. */
-
 	u_int num_rx_rings; /* number of adapter receive rings */
 	u_int num_tx_rings; /* number of adapter transmit rings */
 
@@ -189,82 +480,377 @@
 	struct netmap_kring *tx_rings; /* array of TX rings. */
 	struct netmap_kring *rx_rings; /* array of RX rings. */
 
+	void *tailroom;		       /* space below the rings array */
+				       /* (used for leases) */
+
+
 	NM_SELINFO_T tx_si, rx_si;	/* global wait queues */
 
+	/* count users of the global wait queues */
+	int tx_si_users, rx_si_users;
+
+	void *pdev; /* used to store pci device */
+
 	/* copy of if_qflush and if_transmit pointers, to intercept
 	 * packets from the network stack when netmap is active.
 	 */
 	int     (*if_transmit)(struct ifnet *, struct mbuf *);
 
+	/* copy of if_input for netmap_send_up() */
+	void     (*if_input)(struct ifnet *, struct mbuf *);
+
 	/* references to the ifnet and device routines, used by
 	 * the generic netmap functions.
 	 */
 	struct ifnet *ifp; /* adapter is ifp->if_softc */
 
-	NM_LOCK_T core_lock;	/* used if no device lock available */
+	/*---- callbacks for this netmap adapter -----*/
+	/*
+	 * nm_dtor() is the cleanup routine called when destroying
+	 *	the adapter.
+	 *	Called with NMG_LOCK held.
+	 *
+	 * nm_register() is called on NIOCREGIF and close() to enter
+	 *	or exit netmap mode on the NIC
+	 *	Called with NNG_LOCK held.
+	 *
+	 * nm_txsync() pushes packets to the underlying hw/switch
+	 *
+	 * nm_rxsync() collects packets from the underlying hw/switch
+	 *
+	 * nm_config() returns configuration information from the OS
+	 *	Called with NMG_LOCK held.
+	 *
+	 * nm_krings_create() create and init the tx_rings and
+	 * 	rx_rings arrays of kring structures. In particular,
+	 * 	set the nm_sync callbacks for each ring.
+	 * 	There is no need to also allocate the corresponding
+	 * 	netmap_rings, since netmap_mem_rings_create() will always
+	 * 	be called to provide the missing ones.
+	 *	Called with NNG_LOCK held.
+	 *
+	 * nm_krings_delete() cleanup and delete the tx_rings and rx_rings
+	 * 	arrays
+	 *	Called with NMG_LOCK held.
+	 *
+	 * nm_notify() is used to act after data have become available
+	 * 	(or the stopped state of the ring has changed)
+	 *	For hw devices this is typically a selwakeup(),
+	 *	but for NIC/host ports attached to a switch (or vice-versa)
+	 *	we also need to invoke the 'txsync' code downstream.
+	 */
+	void (*nm_dtor)(struct netmap_adapter *);
 
-	int (*nm_register)(struct ifnet *, int onoff);
-	void (*nm_lock)(struct ifnet *, int what, u_int ringid);
-	int (*nm_txsync)(struct ifnet *, u_int ring, int lock);
-	int (*nm_rxsync)(struct ifnet *, u_int ring, int lock);
+	int (*nm_register)(struct netmap_adapter *, int onoff);
+
+	int (*nm_txsync)(struct netmap_kring *kring, int flags);
+	int (*nm_rxsync)(struct netmap_kring *kring, int flags);
+#define NAF_FORCE_READ    1
+#define NAF_FORCE_RECLAIM 2
 	/* return configuration information */
-	int (*nm_config)(struct ifnet *, u_int *txr, u_int *txd,
-					u_int *rxr, u_int *rxd);
+	int (*nm_config)(struct netmap_adapter *,
+		u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
+	int (*nm_krings_create)(struct netmap_adapter *);
+	void (*nm_krings_delete)(struct netmap_adapter *);
+	int (*nm_notify)(struct netmap_adapter *,
+		u_int ring, enum txrx, int flags);
+#define NAF_DISABLE_NOTIFY 8	/* notify that the stopped state of the
+				 * ring has changed (kring->nkr_stopped)
+				 */
 
+#ifdef WITH_VALE
 	/*
+	 * nm_bdg_attach() initializes the na_vp field to point
+	 *      to an adapter that can be attached to a VALE switch. If the
+	 *      current adapter is already a VALE port, na_vp is simply a cast;
+	 *      otherwise, na_vp points to a netmap_bwrap_adapter.
+	 *      If applicable, this callback also initializes na_hostvp,
+	 *      that can be used to connect the adapter host rings to the
+	 *      switch.
+	 *      Called with NMG_LOCK held.
+	 *
+	 * nm_bdg_ctl() is called on the actual attach/detach to/from
+	 *      to/from the switch, to perform adapter-specific
+	 *      initializations
+	 *      Called with NMG_LOCK held.
+	 */
+	int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *);
+	int (*nm_bdg_ctl)(struct netmap_adapter *, struct nmreq *, int);
+
+	/* adapter used to attach this adapter to a VALE switch (if any) */
+	struct netmap_vp_adapter *na_vp;
+	/* adapter used to attach the host rings of this adapter
+	 * to a VALE switch (if any) */
+	struct netmap_vp_adapter *na_hostvp;
+#endif
+
+	/* standard refcount to control the lifetime of the adapter
+	 * (it should be equal to the lifetime of the corresponding ifp)
+	 */
+	int na_refcount;
+
+	/* memory allocator (opaque)
+	 * We also cache a pointer to the lut_entry for translating
+	 * buffer addresses, and the total number of buffers.
+	 */
+ 	struct netmap_mem_d *nm_mem;
+	struct lut_entry *na_lut;
+	uint32_t na_lut_objtotal;	/* max buffer index */
+	uint32_t na_lut_objsize;	/* buffer size */
+
+	/* additional information attached to this adapter
+	 * by other netmap subsystems. Currently used by
+	 * bwrap and LINUX/v1000.
+	 */
+	void *na_private;
+
+#ifdef WITH_PIPES
+	/* array of pipes that have this adapter as a parent */
+	struct netmap_pipe_adapter **na_pipes;
+	int na_next_pipe;	/* next free slot in the array */
+	int na_max_pipes;	/* size of the array */
+#endif /* WITH_PIPES */
+
+	char name[64];
+};
+
+
+/*
+ * If the NIC is owned by the kernel
+ * (i.e., bridge), neither another bridge nor user can use it;
+ * if the NIC is owned by a user, only users can share it.
+ * Evaluation must be done under NMG_LOCK().
+ */
+#define NETMAP_OWNED_BY_KERN(na)	((na)->na_flags & NAF_BUSY)
+#define NETMAP_OWNED_BY_ANY(na) \
+	(NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0))
+
+
+/*
+ * derived netmap adapters for various types of ports
+ */
+struct netmap_vp_adapter {	/* VALE software port */
+	struct netmap_adapter up;
+
+	/*
 	 * Bridge support:
 	 *
 	 * bdg_port is the port number used in the bridge;
-	 * na_bdg_refcount is a refcount used for bridge ports,
-	 *	when it goes to 0 we can detach+free this port
-	 *	(a bridge port is always attached if it exists;
-	 *	it is not always registered)
+	 * na_bdg points to the bridge this NA is attached to.
 	 */
 	int bdg_port;
-	int na_bdg_refcount;
+	struct nm_bridge *na_bdg;
+	int retry;
 
+	/* Offset of ethernet header for each packet. */
+	u_int virt_hdr_len;
+	/* Maximum Frame Size, used in bdg_mismatch_datapath() */
+	u_int mfs;
+};
+
+
+struct netmap_hw_adapter {	/* physical device */
+	struct netmap_adapter up;
+
+	struct net_device_ops nm_ndo;	// XXX linux only
+	struct ethtool_ops    nm_eto;	// XXX linux only
+	const struct ethtool_ops*   save_ethtool;
+
+	int (*nm_hw_register)(struct netmap_adapter *, int onoff);
+};
+
+#ifdef WITH_GENERIC
+/* Mitigation support. */
+struct nm_generic_mit {
+	struct hrtimer mit_timer;
+	int mit_pending;
+	int mit_ring_idx;  /* index of the ring being mitigated */
+	struct netmap_adapter *mit_na;  /* backpointer */
+};
+
+struct netmap_generic_adapter {	/* emulated device */
+	struct netmap_hw_adapter up;
+
+	/* Pointer to a previously used netmap adapter. */
+	struct netmap_adapter *prev;
+
+	/* generic netmap adapters support:
+	 * a net_device_ops struct overrides ndo_select_queue(),
+	 * save_if_input saves the if_input hook (FreeBSD),
+	 * mit implements rx interrupt mitigation,
+	 */
+	struct net_device_ops generic_ndo;
+	void (*save_if_input)(struct ifnet *, struct mbuf *);
+
+	struct nm_generic_mit *mit;
 #ifdef linux
-	struct net_device_ops nm_ndo;
-#endif /* linux */
+        netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *);
+#endif
 };
+#endif  /* WITH_GENERIC */
 
+static __inline int
+netmap_real_tx_rings(struct netmap_adapter *na)
+{
+	return na->num_tx_rings + !!(na->na_flags & NAF_HOST_RINGS);
+}
+
+static __inline int
+netmap_real_rx_rings(struct netmap_adapter *na)
+{
+	return na->num_rx_rings + !!(na->na_flags & NAF_HOST_RINGS);
+}
+
+#ifdef WITH_VALE
+
 /*
- * The combination of "enable" (ifp->if_capenable & IFCAP_NETMAP)
- * and refcount gives the status of the interface, namely:
+ * Bridge wrapper for non VALE ports attached to a VALE switch.
  *
- *	enable	refcount	Status
+ * The real device must already have its own netmap adapter (hwna).
+ * The bridge wrapper and the hwna adapter share the same set of
+ * netmap rings and buffers, but they have two separate sets of
+ * krings descriptors, with tx/rx meanings swapped:
  *
- *	FALSE	0		normal operation
- *	FALSE	!= 0		-- (impossible)
- *	TRUE	1		netmap mode
- *	TRUE	0		being deleted.
+ *                                  netmap
+ *           bwrap     krings       rings      krings      hwna
+ *         +------+   +------+     +-----+    +------+   +------+
+ *         |tx_rings->|      |\   /|     |----|      |<-tx_rings|
+ *         |      |   +------+ \ / +-----+    +------+   |      |
+ *         |      |             X                        |      |
+ *         |      |            / \                       |      |
+ *         |      |   +------+/   \+-----+    +------+   |      |
+ *         |rx_rings->|      |     |     |----|      |<-rx_rings|
+ *         |      |   +------+     +-----+    +------+   |      |
+ *         +------+                                      +------+
+ *
+ * - packets coming from the bridge go to the brwap rx rings,
+ *   which are also the hwna tx rings.  The bwrap notify callback
+ *   will then complete the hwna tx (see netmap_bwrap_notify).
+ *
+ * - packets coming from the outside go to the hwna rx rings,
+ *   which are also the bwrap tx rings.  The (overwritten) hwna
+ *   notify method will then complete the bridge tx
+ *   (see netmap_bwrap_intr_notify).
+ *
+ *   The bridge wrapper may optionally connect the hwna 'host' rings
+ *   to the bridge. This is done by using a second port in the
+ *   bridge and connecting it to the 'host' netmap_vp_adapter
+ *   contained in the netmap_bwrap_adapter. The brwap host adapter
+ *   cross-links the hwna host rings in the same way as shown above.
+ *
+ * - packets coming from the bridge and directed to the host stack
+ *   are handled by the bwrap host notify callback
+ *   (see netmap_bwrap_host_notify)
+ *
+ * - packets coming from the host stack are still handled by the
+ *   overwritten hwna notify callback (netmap_bwrap_intr_notify),
+ *   but are diverted to the host adapter depending on the ring number.
+ *
  */
+struct netmap_bwrap_adapter {
+	struct netmap_vp_adapter up;
+	struct netmap_vp_adapter host;  /* for host rings */
+	struct netmap_adapter *hwna;	/* the underlying device */
 
-#define NETMAP_DELETING(_na)  (  ((_na)->refcount == 0) &&	\
-	( (_na)->ifp->if_capenable & IFCAP_NETMAP) )
+	/* backup of the hwna notify callback */
+	int (*save_notify)(struct netmap_adapter *,
+			u_int ring, enum txrx, int flags);
+	/* backup of the hwna memory allocator */
+	struct netmap_mem_d *save_nmd;
 
+	/*
+	 * When we attach a physical interface to the bridge, we
+	 * allow the controlling process to terminate, so we need
+	 * a place to store the n_detmap_priv_d data structure.
+	 * This is only done when physical interfaces
+	 * are attached to a bridge.
+	 */
+	struct netmap_priv_d *na_kpriv;
+};
+int netmap_bwrap_attach(const char *name, struct netmap_adapter *);
+
+
+#endif /* WITH_VALE */
+
+#ifdef WITH_PIPES
+
+#define NM_MAXPIPES 	64	/* max number of pipes per adapter */
+
+struct netmap_pipe_adapter {
+	struct netmap_adapter up;
+
+	u_int id; 	/* pipe identifier */
+	int role;	/* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */
+
+	struct netmap_adapter *parent; /* adapter that owns the memory */
+	struct netmap_pipe_adapter *peer; /* the other end of the pipe */
+	int peer_ref;		/* 1 iff we are holding a ref to the peer */
+
+	u_int parent_slot; /* index in the parent pipe array */
+};
+
+#endif /* WITH_PIPES */
+
+
+/* return slots reserved to rx clients; used in drivers */
+static inline uint32_t
+nm_kr_rxspace(struct netmap_kring *k)
+{
+	int space = k->nr_hwtail - k->nr_hwcur;
+	if (space < 0)
+		space += k->nkr_num_slots;
+	ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail);
+
+	return space;
+}
+
+
+/* True if no space in the tx ring. only valid after txsync_prologue */
+static inline int
+nm_kr_txempty(struct netmap_kring *kring)
+{
+	return kring->rcur == kring->nr_hwtail;
+}
+
+
 /*
- * parameters for (*nm_lock)(adapter, what, index)
+ * protect against multiple threads using the same ring.
+ * also check that the ring has not been stopped.
+ * We only care for 0 or !=0 as a return code.
  */
-enum {
-	NETMAP_NO_LOCK = 0,
-	NETMAP_CORE_LOCK, NETMAP_CORE_UNLOCK,
-	NETMAP_TX_LOCK, NETMAP_TX_UNLOCK,
-	NETMAP_RX_LOCK, NETMAP_RX_UNLOCK,
-#ifdef __FreeBSD__
-#define	NETMAP_REG_LOCK		NETMAP_CORE_LOCK
-#define	NETMAP_REG_UNLOCK	NETMAP_CORE_UNLOCK
-#else
-	NETMAP_REG_LOCK, NETMAP_REG_UNLOCK
-#endif
-};
+#define NM_KR_BUSY	1
+#define NM_KR_STOPPED	2
 
-/* How to handle locking support in netmap_rx_irq/netmap_tx_irq */
-#define	NETMAP_LOCKED_ENTER	0x10000000	/* already locked on enter */
-#define	NETMAP_LOCKED_EXIT	0x20000000	/* keep locked on exit */
 
+static __inline void nm_kr_put(struct netmap_kring *kr)
+{
+	NM_ATOMIC_CLEAR(&kr->nr_busy);
+}
+
+
+static __inline int nm_kr_tryget(struct netmap_kring *kr)
+{
+	/* check a first time without taking the lock
+	 * to avoid starvation for nm_kr_get()
+	 */
+	if (unlikely(kr->nkr_stopped)) {
+		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
+		return NM_KR_STOPPED;
+	}
+	if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
+		return NM_KR_BUSY;
+	/* check a second time with lock held */
+	if (unlikely(kr->nkr_stopped)) {
+		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
+		nm_kr_put(kr);
+		return NM_KR_STOPPED;
+	}
+	return 0;
+}
+
+
 /*
- * The following are support routines used by individual drivers to
+ * The following functions are used by individual drivers to
  * support netmap operation.
  *
  * netmap_attach() initializes a struct netmap_adapter, allocating the
@@ -272,29 +858,339 @@
  *
  * netmap_detach() frees the memory allocated by netmap_attach().
  *
- * netmap_start() replaces the if_transmit routine of the interface,
+ * netmap_transmit() replaces the if_transmit routine of the interface,
  *	and is used to intercept packets coming from the stack.
  *
  * netmap_load_map/netmap_reload_map are helper routines to set/reset
  *	the dmamap for a packet buffer
  *
- * netmap_reset() is a helper routine to be called in the driver
- *	when reinitializing a ring.
+ * netmap_reset() is a helper routine to be called in the hw driver
+ *	when reinitializing a ring. It should not be called by
+ *	virtual ports (vale, pipes, monitor)
  */
-int netmap_attach(struct netmap_adapter *, int);
+int netmap_attach(struct netmap_adapter *);
 void netmap_detach(struct ifnet *);
-int netmap_start(struct ifnet *, struct mbuf *);
-enum txrx { NR_RX = 0, NR_TX = 1 };
+int netmap_transmit(struct ifnet *, struct mbuf *);
 struct netmap_slot *netmap_reset(struct netmap_adapter *na,
-	enum txrx tx, int n, u_int new_cur);
+	enum txrx tx, u_int n, u_int new_cur);
 int netmap_ring_reinit(struct netmap_kring *);
 
-extern u_int netmap_buf_size;
-#define NETMAP_BUF_SIZE	netmap_buf_size	// XXX remove
-extern int netmap_mitigate;
+/* default functions to handle rx/tx interrupts */
+int netmap_rx_irq(struct ifnet *, u_int, u_int *);
+#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
+void netmap_common_irq(struct ifnet *, u_int, u_int *work_done);
+
+
+#ifdef WITH_VALE
+/* functions used by external modules to interface with VALE */
+#define netmap_vp_to_ifp(_vp)	((_vp)->up.ifp)
+#define netmap_ifp_to_vp(_ifp)	(NA(_ifp)->na_vp)
+#define netmap_ifp_to_host_vp(_ifp) (NA(_ifp)->na_hostvp)
+#define netmap_bdg_idx(_vp)	((_vp)->bdg_port)
+const char *netmap_bdg_name(struct netmap_vp_adapter *);
+#else /* !WITH_VALE */
+#define netmap_vp_to_ifp(_vp)	NULL
+#define netmap_ifp_to_vp(_ifp)	NULL
+#define netmap_ifp_to_host_vp(_ifp) NULL
+#define netmap_bdg_idx(_vp)	-1
+#define netmap_bdg_name(_vp)	NULL
+#endif /* WITH_VALE */
+
+static inline int
+nm_native_on(struct netmap_adapter *na)
+{
+	return na && na->na_flags & NAF_NATIVE_ON;
+}
+
+static inline int
+nm_netmap_on(struct netmap_adapter *na)
+{
+	return na && na->na_flags & NAF_NETMAP_ON;
+}
+
+/* set/clear native flags and if_transmit/netdev_ops */
+static inline void
+nm_set_native_flags(struct netmap_adapter *na)
+{
+	struct ifnet *ifp = na->ifp;
+
+	na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON);
+#ifdef IFCAP_NETMAP /* or FreeBSD ? */
+	ifp->if_capenable |= IFCAP_NETMAP;
+#endif
+#ifdef __FreeBSD__
+	na->if_transmit = ifp->if_transmit;
+	ifp->if_transmit = netmap_transmit;
+#else
+	na->if_transmit = (void *)ifp->netdev_ops;
+	ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
+	((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops;
+	ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto;
+#endif
+}
+
+
+static inline void
+nm_clear_native_flags(struct netmap_adapter *na)
+{
+	struct ifnet *ifp = na->ifp;
+
+#ifdef __FreeBSD__
+	ifp->if_transmit = na->if_transmit;
+#else
+	ifp->netdev_ops = (void *)na->if_transmit;
+	ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
+#endif
+	na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON);
+#ifdef IFCAP_NETMAP /* or FreeBSD ? */
+	ifp->if_capenable &= ~IFCAP_NETMAP;
+#endif
+}
+
+
+/*
+ * validates parameters in the ring/kring, returns a value for head
+ * If any error, returns ring_size to force a reinit.
+ */
+uint32_t nm_txsync_prologue(struct netmap_kring *);
+
+
+/*
+ * validates parameters in the ring/kring, returns a value for head,
+ * and the 'reserved' value in the argument.
+ * If any error, returns ring_size lim to force a reinit.
+ */
+uint32_t nm_rxsync_prologue(struct netmap_kring *);
+
+
+/*
+ * update kring and ring at the end of txsync.
+ */
+static inline void
+nm_txsync_finalize(struct netmap_kring *kring)
+{
+	/* update ring tail to what the kernel knows */
+	kring->ring->tail = kring->rtail = kring->nr_hwtail;
+
+	/* note, head/rhead/hwcur might be behind cur/rcur
+	 * if no carrier
+	 */
+	ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
+		kring->name, kring->nr_hwcur, kring->nr_hwtail,
+		kring->rhead, kring->rcur, kring->rtail);
+}
+
+
+/*
+ * update kring and ring at the end of rxsync
+ */
+static inline void
+nm_rxsync_finalize(struct netmap_kring *kring)
+{
+	/* tell userspace that there might be new packets */
+	//struct netmap_ring *ring = kring->ring;
+	ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail,
+		kring->nr_hwtail);
+	kring->ring->tail = kring->rtail = kring->nr_hwtail;
+	/* make a copy of the state for next round */
+	kring->rhead = kring->ring->head;
+	kring->rcur = kring->ring->cur;
+}
+
+
+/* check/fix address and len in tx rings */
+#if 1 /* debug version */
+#define	NM_CHECK_ADDR_LEN(_na, _a, _l)	do {				\
+	if (_a == NETMAP_BUF_BASE(_na) || _l > NETMAP_BUF_SIZE(_na)) {	\
+		RD(5, "bad addr/len ring %d slot %d idx %d len %d",	\
+			kring->ring_id, nm_i, slot->buf_idx, len);	\
+		if (_l > NETMAP_BUF_SIZE(_na))				\
+			_l = NETMAP_BUF_SIZE(_na);			\
+	} } while (0)
+#else /* no debug version */
+#define	NM_CHECK_ADDR_LEN(_na, _a, _l)	do {				\
+		if (_l > NETMAP_BUF_SIZE(_na))				\
+			_l = NETMAP_BUF_SIZE(_na);			\
+	} while (0)
+#endif
+
+
+/*---------------------------------------------------------------*/
+/*
+ * Support routines used by netmap subsystems
+ * (native drivers, VALE, generic, pipes, monitors, ...)
+ */
+
+
+/* common routine for all functions that create a netmap adapter. It performs
+ * two main tasks:
+ * - if the na points to an ifp, mark the ifp as netmap capable
+ *   using na as its native adapter;
+ * - provide defaults for the setup callbacks and the memory allocator
+ */
+int netmap_attach_common(struct netmap_adapter *);
+/* common actions to be performed on netmap adapter destruction */
+void netmap_detach_common(struct netmap_adapter *);
+/* fill priv->np_[tr]xq{first,last} using the ringid and flags information
+ * coming from a struct nmreq
+ */
+int netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags);
+/* update the ring parameters (number and size of tx and rx rings).
+ * It calls the nm_config callback, if available.
+ */
+int netmap_update_config(struct netmap_adapter *na);
+/* create and initialize the common fields of the krings array.
+ * using the information that must be already available in the na.
+ * tailroom can be used to request the allocation of additional
+ * tailroom bytes after the krings array. This is used by
+ * netmap_vp_adapter's (i.e., VALE ports) to make room for
+ * leasing-related data structures
+ */
+int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
+/* deletes the kring array of the adapter. The array must have
+ * been created using netmap_krings_create
+ */
+void netmap_krings_delete(struct netmap_adapter *na);
+
+/* set the stopped/enabled status of ring
+ * When stopping, they also wait for all current activity on the ring to
+ * terminate. The status change is then notified using the na nm_notify
+ * callback.
+ */
+void netmap_set_txring(struct netmap_adapter *, u_int ring_id, int stopped);
+void netmap_set_rxring(struct netmap_adapter *, u_int ring_id, int stopped);
+/* set the stopped/enabled status of all rings of the adapter. */
+void netmap_set_all_rings(struct netmap_adapter *, int stopped);
+/* convenience wrappers for netmap_set_all_rings, used in drivers */
+void netmap_disable_all_rings(struct ifnet *);
+void netmap_enable_all_rings(struct ifnet *);
+
+int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
+
+struct netmap_if *
+netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
+	uint16_t ringid, uint32_t flags, int *err);
+
+
+
+u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
+int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
+
+
+#ifdef WITH_VALE
+/*
+ * The following bridge-related functions are used by other
+ * kernel modules.
+ *
+ * VALE only supports unicast or broadcast. The lookup
+ * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
+ * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
+ * XXX in practice "unknown" might be handled same as broadcast.
+ */
+typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
+		const struct netmap_vp_adapter *);
+typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
+typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
+struct netmap_bdg_ops {
+	bdg_lookup_fn_t lookup;
+	bdg_config_fn_t config;
+	bdg_dtor_fn_t	dtor;
+};
+
+u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
+		const struct netmap_vp_adapter *);
+
+#define	NM_BDG_MAXPORTS		254	/* up to 254 */
+#define	NM_BDG_BROADCAST	NM_BDG_MAXPORTS
+#define	NM_BDG_NOPORT		(NM_BDG_MAXPORTS+1)
+
+#define	NM_NAME			"vale"	/* prefix for bridge port name */
+
+/* these are redefined in case of no VALE support */
+int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+void netmap_init_bridges(void);
+int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops);
+int netmap_bdg_config(struct nmreq *nmr);
+
+#else /* !WITH_VALE */
+#define	netmap_get_bdg_na(_1, _2, _3)	0
+#define netmap_init_bridges(_1)
+#define	netmap_bdg_ctl(_1, _2)	EINVAL
+#endif /* !WITH_VALE */
+
+#ifdef WITH_PIPES
+/* max number of pipes per device */
+#define NM_MAXPIPES	64	/* XXX how many? */
+/* in case of no error, returns the actual number of pipes in nmr->nr_arg1 */
+int netmap_pipe_alloc(struct netmap_adapter *, struct nmreq *nmr);
+void netmap_pipe_dealloc(struct netmap_adapter *);
+int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+#else /* !WITH_PIPES */
+#define NM_MAXPIPES	0
+#define netmap_pipe_alloc(_1, _2) 	EOPNOTSUPP
+#define netmap_pipe_dealloc(_1)
+#define netmap_get_pipe_na(_1, _2, _3)	0
+#endif
+
+#ifdef WITH_MONITOR
+int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+#else
+#define netmap_get_monitor_na(_1, _2, _3) 0
+#endif
+
+/* Various prototypes */
+int netmap_poll(struct cdev *dev, int events, struct thread *td);
+int netmap_init(void);
+void netmap_fini(void);
+int netmap_get_memory(struct netmap_priv_d* p);
+void netmap_dtor(void *data);
+int netmap_dtor_locked(struct netmap_priv_d *priv);
+
+int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td);
+
+/* netmap_adapter creation/destruction */
+
+// #define NM_DEBUG_PUTGET 1
+
+#ifdef NM_DEBUG_PUTGET
+
+#define NM_DBG(f) __##f
+
+void __netmap_adapter_get(struct netmap_adapter *na);
+
+#define netmap_adapter_get(na) 				\
+	do {						\
+		struct netmap_adapter *__na = na;	\
+		D("getting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount);	\
+		__netmap_adapter_get(__na);		\
+	} while (0)
+
+int __netmap_adapter_put(struct netmap_adapter *na);
+
+#define netmap_adapter_put(na)				\
+	({						\
+		struct netmap_adapter *__na = na;	\
+		D("putting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount);	\
+		__netmap_adapter_put(__na);		\
+	})
+
+#else /* !NM_DEBUG_PUTGET */
+
+#define NM_DBG(f) f
+void netmap_adapter_get(struct netmap_adapter *na);
+int netmap_adapter_put(struct netmap_adapter *na);
+
+#endif /* !NM_DEBUG_PUTGET */
+
+
+/*
+ * module variables
+ */
+#define NETMAP_BUF_BASE(na)	((na)->na_lut[0].vaddr)
+#define NETMAP_BUF_SIZE(na)	((na)->na_lut_objsize)
+extern int netmap_mitigate;	// XXX not really used
 extern int netmap_no_pendintr;
-extern u_int netmap_total_buffers;
-extern char *netmap_buffer_base;
 extern int netmap_verbose;	// XXX debugging
 enum {                                  /* verbose flags */
 	NM_VERB_ON = 1,                 /* generic verbose */
@@ -307,13 +1203,15 @@
 	NM_VERB_NIC_TXSYNC = 0x2000,
 };
 
+extern int netmap_txsync_retry;
+extern int netmap_generic_mit;
+extern int netmap_generic_ringsize;
+extern int netmap_generic_rings;
+
 /*
  * NA returns a pointer to the struct netmap adapter from the ifp,
  * WNA is used to write it.
  */
-#ifndef WNA
-#define	WNA(_ifp)	(_ifp)->if_pspare[0]
-#endif
 #define	NA(_ifp)	((struct netmap_adapter *)WNA(_ifp))
 
 /*
@@ -348,7 +1246,12 @@
 #endif	/* linux */
 
 #ifdef __FreeBSD__
-/* Callback invoked by the dma machinery after a successfull dmamap_load */
+
+/* Assigns the device IOMMU domain to an allocator.
+ * Returns -ENOMEM in case the domain is different */
+#define nm_iommu_group_id(dev) (0)
+
+/* Callback invoked by the dma machinery after a successful dmamap_load */
 static void netmap_dmamap_cb(__unused void *arg,
     __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error)
 {
@@ -358,25 +1261,77 @@
  * XXX can we do it without a callback ?
  */
 static inline void
-netmap_load_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
+netmap_load_map(struct netmap_adapter *na,
+	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
 {
 	if (map)
-		bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE,
+		bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
 		    netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
 }
 
+static inline void
+netmap_unload_map(struct netmap_adapter *na,
+        bus_dma_tag_t tag, bus_dmamap_t map)
+{
+	if (map)
+		bus_dmamap_unload(tag, map);
+}
+
 /* update the map when a buffer changes. */
 static inline void
-netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
+netmap_reload_map(struct netmap_adapter *na,
+	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
 {
 	if (map) {
 		bus_dmamap_unload(tag, map);
-		bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE,
+		bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
 		    netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
 	}
 }
+
 #else /* linux */
 
+int nm_iommu_group_id(bus_dma_tag_t dev);
+extern size_t     netmap_mem_get_bufsize(struct netmap_mem_d *);
+#include <linux/dma-mapping.h>
+
+static inline void
+netmap_load_map(struct netmap_adapter *na,
+	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
+{
+	if (map) {
+		*map = dma_map_single(na->pdev, buf, netmap_mem_get_bufsize(na->nm_mem),
+				DMA_BIDIRECTIONAL);
+	}
+}
+
+static inline void
+netmap_unload_map(struct netmap_adapter *na,
+	bus_dma_tag_t tag, bus_dmamap_t map)
+{
+	u_int sz = netmap_mem_get_bufsize(na->nm_mem);
+
+	if (*map) {
+		dma_unmap_single(na->pdev, *map, sz,
+				DMA_BIDIRECTIONAL);
+	}
+}
+
+static inline void
+netmap_reload_map(struct netmap_adapter *na,
+	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
+{
+	u_int sz = netmap_mem_get_bufsize(na->nm_mem);
+
+	if (*map) {
+		dma_unmap_single(na->pdev, *map, sz,
+				DMA_BIDIRECTIONAL);
+	}
+
+	*map = dma_map_single(na->pdev, buf, sz,
+				DMA_BIDIRECTIONAL);
+}
+
 /*
  * XXX How do we redefine these functions:
  *
@@ -387,8 +1342,7 @@
  * unfortunately the direction is not, so we need to change
  * something to have a cross API
  */
-#define netmap_load_map(_t, _m, _b)
-#define netmap_reload_map(_t, _m, _b)
+
 #if 0
 	struct e1000_buffer *buffer_info =  &tx_ring->buffer_info[l];
 	/* set time_stamp *before* dma to help avoid a possible race */
@@ -418,6 +1372,7 @@
 
 #endif /* linux */
 
+
 /*
  * functions to map NIC to KRING indexes (n2k) and vice versa (k2n)
  */
@@ -456,9 +1411,6 @@
 };
 
 struct netmap_obj_pool;
-extern struct lut_entry *netmap_buffer_lut;
-#define NMB_VA(i)	(netmap_buffer_lut[i].vaddr)
-#define NMB_PA(i)	(netmap_buffer_lut[i].paddr)
 
 /*
  * NMB return the virtual address of a buffer (buffer 0 on bad index)
@@ -465,24 +1417,230 @@
  * PNMB also fills the physical address
  */
 static inline void *
-NMB(struct netmap_slot *slot)
+NMB(struct netmap_adapter *na, struct netmap_slot *slot)
 {
+	struct lut_entry *lut = na->na_lut;
 	uint32_t i = slot->buf_idx;
-	return (unlikely(i >= netmap_total_buffers)) ?  NMB_VA(0) : NMB_VA(i);
+	return (unlikely(i >= na->na_lut_objtotal)) ?
+		lut[0].vaddr : lut[i].vaddr;
 }
 
 static inline void *
-PNMB(struct netmap_slot *slot, uint64_t *pp)
+PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
 {
 	uint32_t i = slot->buf_idx;
-	void *ret = (i >= netmap_total_buffers) ? NMB_VA(0) : NMB_VA(i);
+	struct lut_entry *lut = na->na_lut;
+	void *ret = (i >= na->na_lut_objtotal) ? lut[0].vaddr : lut[i].vaddr;
 
-	*pp = (i >= netmap_total_buffers) ? NMB_PA(0) : NMB_PA(i);
+	*pp = (i >= na->na_lut_objtotal) ? lut[0].paddr : lut[i].paddr;
 	return ret;
 }
 
-/* default functions to handle rx/tx interrupts */
-int netmap_rx_irq(struct ifnet *, int, int *);
-#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
+/* Generic version of NMB, which uses device-specific memory. */
 
+
+
+void netmap_txsync_to_host(struct netmap_adapter *na);
+
+
+/*
+ * Structure associated to each thread which registered an interface.
+ *
+ * The first 4 fields of this structure are written by NIOCREGIF and
+ * read by poll() and NIOC?XSYNC.
+ *
+ * There is low contention among writers (a correct user program
+ * should have none) and among writers and readers, so we use a
+ * single global lock to protect the structure initialization;
+ * since initialization involves the allocation of memory,
+ * we reuse the memory allocator lock.
+ *
+ * Read access to the structure is lock free. Readers must check that
+ * np_nifp is not NULL before using the other fields.
+ * If np_nifp is NULL initialization has not been performed,
+ * so they should return an error to userspace.
+ *
+ * The ref_done field is used to regulate access to the refcount in the
+ * memory allocator. The refcount must be incremented at most once for
+ * each open("/dev/netmap"). The increment is performed by the first
+ * function that calls netmap_get_memory() (currently called by
+ * mmap(), NIOCGINFO and NIOCREGIF).
+ * If the refcount is incremented, it is then decremented when the
+ * private structure is destroyed.
+ */
+struct netmap_priv_d {
+	struct netmap_if * volatile np_nifp;	/* netmap if descriptor. */
+
+	struct netmap_adapter	*np_na;
+	uint32_t	np_flags;	/* from the ioctl */
+	u_int		np_txqfirst, np_txqlast; /* range of tx rings to scan */
+	u_int		np_rxqfirst, np_rxqlast; /* range of rx rings to scan */
+	uint16_t	np_txpoll;	/* XXX and also np_rxpoll ? */
+
+	struct netmap_mem_d     *np_mref;	/* use with NMG_LOCK held */
+	/* np_refcount is only used on FreeBSD */
+	int		np_refcount;	/* use with NMG_LOCK held */
+
+	/* pointers to the selinfo to be used for selrecord.
+	 * Either the local or the global one depending on the
+	 * number of rings.
+	 */
+	NM_SELINFO_T *np_rxsi, *np_txsi;
+	struct thread	*np_td;		/* kqueue, just debugging */
+};
+
+#ifdef WITH_MONITOR
+
+struct netmap_monitor_adapter {
+	struct netmap_adapter up;
+
+	struct netmap_priv_d priv;
+	uint32_t flags;
+};
+
+#endif /* WITH_MONITOR */
+
+
+#ifdef WITH_GENERIC
+/*
+ * generic netmap emulation for devices that do not have
+ * native netmap support.
+ */
+int generic_netmap_attach(struct ifnet *ifp);
+
+int netmap_catch_rx(struct netmap_adapter *na, int intercept);
+void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
+void netmap_catch_tx(struct netmap_generic_adapter *na, int enable);
+int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr);
+int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
+void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
+
+//#define RATE_GENERIC  /* Enables communication statistics for generic. */
+#ifdef RATE_GENERIC
+void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi);
+#else
+#define generic_rate(txp, txs, txi, rxp, rxs, rxi)
+#endif
+
+/*
+ * netmap_mitigation API. This is used by the generic adapter
+ * to reduce the number of interrupt requests/selwakeup
+ * to clients on incoming packets.
+ */
+void netmap_mitigation_init(struct nm_generic_mit *mit, int idx,
+                                struct netmap_adapter *na);
+void netmap_mitigation_start(struct nm_generic_mit *mit);
+void netmap_mitigation_restart(struct nm_generic_mit *mit);
+int netmap_mitigation_active(struct nm_generic_mit *mit);
+void netmap_mitigation_cleanup(struct nm_generic_mit *mit);
+#endif /* WITH_GENERIC */
+
+
+
+/* Shared declarations for the VALE switch. */
+
+/*
+ * Each transmit queue accumulates a batch of packets into
+ * a structure before forwarding. Packets to the same
+ * destination are put in a list using ft_next as a link field.
+ * ft_frags and ft_next are valid only on the first fragment.
+ */
+struct nm_bdg_fwd {	/* forwarding entry for a bridge */
+	void *ft_buf;		/* netmap or indirect buffer */
+	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
+	uint8_t _ft_port;	/* dst port (unused) */
+	uint16_t ft_flags;	/* flags, e.g. indirect */
+	uint16_t ft_len;	/* src fragment len */
+	uint16_t ft_next;	/* next packet to same destination */
+};
+
+/* struct 'virtio_net_hdr' from linux. */
+struct nm_vnet_hdr {
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM     1	/* Use csum_start, csum_offset */
+#define VIRTIO_NET_HDR_F_DATA_VALID    2	/* Csum is valid */
+    uint8_t flags;
+#define VIRTIO_NET_HDR_GSO_NONE         0       /* Not a GSO frame */
+#define VIRTIO_NET_HDR_GSO_TCPV4        1       /* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP          3       /* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6        4       /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN          0x80    /* TCP has ECN set */
+    uint8_t gso_type;
+    uint16_t hdr_len;
+    uint16_t gso_size;
+    uint16_t csum_start;
+    uint16_t csum_offset;
+};
+
+#define WORST_CASE_GSO_HEADER	(14+40+60)  /* IPv6 + TCP */
+
+/* Private definitions for IPv4, IPv6, UDP and TCP headers. */
+
+struct nm_iphdr {
+	uint8_t		version_ihl;
+	uint8_t		tos;
+	uint16_t	tot_len;
+	uint16_t	id;
+	uint16_t	frag_off;
+	uint8_t		ttl;
+	uint8_t		protocol;
+	uint16_t	check;
+	uint32_t	saddr;
+	uint32_t	daddr;
+	/*The options start here. */
+};
+
+struct nm_tcphdr {
+	uint16_t	source;
+	uint16_t	dest;
+	uint32_t	seq;
+	uint32_t	ack_seq;
+	uint8_t		doff;  /* Data offset + Reserved */
+	uint8_t		flags;
+	uint16_t	window;
+	uint16_t	check;
+	uint16_t	urg_ptr;
+};
+
+struct nm_udphdr {
+	uint16_t	source;
+	uint16_t	dest;
+	uint16_t	len;
+	uint16_t	check;
+};
+
+struct nm_ipv6hdr {
+	uint8_t		priority_version;
+	uint8_t		flow_lbl[3];
+
+	uint16_t	payload_len;
+	uint8_t		nexthdr;
+	uint8_t		hop_limit;
+
+	uint8_t		saddr[16];
+	uint8_t		daddr[16];
+};
+
+/* Type used to store a checksum (in host byte order) that hasn't been
+ * folded yet.
+ */
+#define rawsum_t uint32_t
+
+rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum);
+uint16_t nm_csum_ipv4(struct nm_iphdr *iph);
+void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+		      size_t datalen, uint16_t *check);
+void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+		      size_t datalen, uint16_t *check);
+uint16_t nm_csum_fold(rawsum_t cur_sum);
+
+void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
+			   struct netmap_vp_adapter *dst_na,
+			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
+			   u_int *j, u_int lim, u_int *howmany);
+
+/* persistent virtual port routines */
+int nm_vi_persist(const char *, struct ifnet **);
+void nm_vi_detach(struct ifnet *);
+void nm_vi_init_index(void);
+
 #endif /* _NET_NETMAP_KERN_H_ */

Added: trunk/sys/dev/netmap/netmap_mbq.c
===================================================================
--- trunk/sys/dev/netmap/netmap_mbq.c	                        (rev 0)
+++ trunk/sys/dev/netmap/netmap_mbq.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,164 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: stable/10/sys/dev/netmap/netmap_mbq.c 267282 2014-06-09 15:24:45Z luigi $
+ */
+
+
+#ifdef linux
+#include "bsd_glue.h"
+#else   /* __FreeBSD__ */
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#endif  /* __FreeBSD__ */
+
+#include "netmap_mbq.h"
+
+
+static inline void __mbq_init(struct mbq *q)
+{
+    q->head = q->tail = NULL;
+    q->count = 0;
+}
+
+
+void mbq_safe_init(struct mbq *q)
+{
+    mtx_init(&q->lock, "mbq", NULL, MTX_SPIN);
+    __mbq_init(q);
+}
+
+
+void mbq_init(struct mbq *q)
+{
+    __mbq_init(q);
+}
+
+
+static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m)
+{
+    m->m_nextpkt = NULL;
+    if (q->tail) {
+        q->tail->m_nextpkt = m;
+        q->tail = m;
+    } else {
+        q->head = q->tail = m;
+    }
+    q->count++;
+}
+
+
+void mbq_safe_enqueue(struct mbq *q, struct mbuf *m)
+{
+    mbq_lock(q);
+    __mbq_enqueue(q, m);
+    mbq_unlock(q);
+}
+
+
+void mbq_enqueue(struct mbq *q, struct mbuf *m)
+{
+    __mbq_enqueue(q, m);
+}
+
+
+static inline struct mbuf *__mbq_dequeue(struct mbq *q)
+{
+    struct mbuf *ret = NULL;
+
+    if (q->head) {
+        ret = q->head;
+        q->head = ret->m_nextpkt;
+        if (q->head == NULL) {
+            q->tail = NULL;
+        }
+        q->count--;
+        ret->m_nextpkt = NULL;
+    }
+
+    return ret;
+}
+
+
+struct mbuf *mbq_safe_dequeue(struct mbq *q)
+{
+    struct mbuf *ret;
+
+    mbq_lock(q);
+    ret =  __mbq_dequeue(q);
+    mbq_unlock(q);
+
+    return ret;
+}
+
+
+struct mbuf *mbq_dequeue(struct mbq *q)
+{
+    return __mbq_dequeue(q);
+}
+
+
+/* XXX seems pointless to have a generic purge */
+static void __mbq_purge(struct mbq *q, int safe)
+{
+    struct mbuf *m;
+
+    for (;;) {
+        m = safe ? mbq_safe_dequeue(q) : mbq_dequeue(q);
+        if (m) {
+            m_freem(m);
+        } else {
+            break;
+        }
+    }
+}
+
+
+void mbq_purge(struct mbq *q)
+{
+    __mbq_purge(q, 0);
+}
+
+
+void mbq_safe_purge(struct mbq *q)
+{
+    __mbq_purge(q, 1);
+}
+
+
+void mbq_safe_destroy(struct mbq *q)
+{
+    mtx_destroy(&q->lock);
+}
+
+
+void mbq_destroy(struct mbq *q)
+{
+}


Property changes on: trunk/sys/dev/netmap/netmap_mbq.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/netmap/netmap_mbq.h
===================================================================
--- trunk/sys/dev/netmap/netmap_mbq.h	                        (rev 0)
+++ trunk/sys/dev/netmap/netmap_mbq.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,90 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: stable/10/sys/dev/netmap/netmap_mbq.h 270252 2014-08-20 23:34:36Z luigi $
+ */
+
+
+#ifndef __NETMAP_MBQ_H__
+#define __NETMAP_MBQ_H__
+
+/*
+ * These function implement an mbuf tailq with an optional lock.
+ * The base functions act ONLY ON THE QUEUE, whereas the "safe"
+ * variants (mbq_safe_*) also handle the lock.
+ */
+
+/* XXX probably rely on a previous definition of SPINLOCK_T */
+#ifdef linux
+#define SPINLOCK_T  safe_spinlock_t
+#else
+#define SPINLOCK_T  struct mtx
+#endif
+
+/* A FIFO queue of mbufs with an optional lock. */
+struct mbq {
+    struct mbuf *head;
+    struct mbuf *tail;
+    int count;
+    SPINLOCK_T lock;
+};
+
+/* XXX "destroy" does not match "init" as a name.
+ * We should also clarify whether init can be used while
+ * holding a lock, and whether mbq_safe_destroy() is a NOP.
+ */
+void mbq_init(struct mbq *q);
+void mbq_destroy(struct mbq *q);
+void mbq_enqueue(struct mbq *q, struct mbuf *m);
+struct mbuf *mbq_dequeue(struct mbq *q);
+void mbq_purge(struct mbq *q);
+
+static inline void
+mbq_lock(struct mbq *q)
+{
+	mtx_lock_spin(&q->lock);
+}
+
+static inline void
+mbq_unlock(struct mbq *q)
+{
+	mtx_unlock_spin(&q->lock);
+}
+
+
+void mbq_safe_init(struct mbq *q);
+void mbq_safe_destroy(struct mbq *q);
+void mbq_safe_enqueue(struct mbq *q, struct mbuf *m);
+struct mbuf *mbq_safe_dequeue(struct mbq *q);
+void mbq_safe_purge(struct mbq *q);
+
+static inline unsigned int mbq_len(struct mbq *q)
+{
+    return q->count;
+}
+
+#endif /* __NETMAP_MBQ_H_ */


Property changes on: trunk/sys/dev/netmap/netmap_mbq.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/netmap/netmap_mem2.c
===================================================================
--- trunk/sys/dev/netmap/netmap_mem2.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/netmap/netmap_mem2.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * Copyright (C) 2012-2013 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
+ * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -8,7 +9,7 @@
  *      notice, this list of conditions and the following disclaimer.
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
+ *      documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -23,94 +24,42 @@
  * SUCH DAMAGE.
  */
 
-/*
- * $MidnightBSD$
- * $Id: netmap_mem2.c,v 1.2 2013-01-08 03:53:24 laffer1 Exp $
- *
- * New memory allocator for netmap
- */
+#ifdef linux
+#include "bsd_glue.h"
+#endif /* linux */
 
-/*
- * The new version allocates three regions:
- *	nm_if_pool      for the struct netmap_if
- *	nm_ring_pool    for the struct netmap_ring
- *	nm_buf_pool    for the packet buffers.
- *
- * All regions need to be page-sized as we export them to
- * userspace through mmap. Only the latter need to be dma-able,
- * but for convenience use the same type of allocator for all.
- *
- * Once mapped, the three regions are exported to userspace
- * as a contiguous block, starting from nm_if_pool. Each
- * cluster (and pool) is an integral number of pages.
- *   [ . . . ][ . . . . . .][ . . . . . . . . . .]
- *    nm_if     nm_ring            nm_buf
- *
- * The userspace areas contain offsets of the objects in userspace.
- * When (at init time) we write these offsets, we find out the index
- * of the object, and from there locate the offset from the beginning
- * of the region.
- *
- * The invididual allocators manage a pool of memory for objects of
- * the same size.
- * The pool is split into smaller clusters, whose size is a
- * multiple of the page size. The cluster size is chosen
- * to minimize the waste for a given max cluster size
- * (we do it by brute force, as we have relatively few objects
- * per cluster).
- *
- * Objects are aligned to the cache line (64 bytes) rounding up object
- * sizes when needed. A bitmap contains the state of each object.
- * Allocation scans the bitmap; this is done only on attach, so we are not
- * too worried about performance
- *
- * For each allocator we can define (thorugh sysctl) the size and
- * number of each object. Memory is allocated at the first use of a
- * netmap file descriptor, and can be freed when all such descriptors
- * have been released (including unmapping the memory).
- * If memory is scarce, the system tries to get as much as possible
- * and the sysctl values reflect the actual allocation.
- * Together with desired values, the sysctl export also absolute
- * min and maximum values that cannot be overridden.
- *
- * struct netmap_if:
- *	variable size, max 16 bytes per ring pair plus some fixed amount.
- *	1024 bytes should be large enough in practice.
- *
- *	In the worst case we have one netmap_if per ring in the system.
- *
- * struct netmap_ring
- *	variable size, 8 byte per slot plus some fixed amount.
- *	Rings can be large (e.g. 4k slots, or >32Kbytes).
- *	We default to 36 KB (9 pages), and a few hundred rings.
- *
- * struct netmap_buffer
- *	The more the better, both because fast interfaces tend to have
- *	many slots, and because we may want to use buffers to store
- *	packets in userspace avoiding copies.
- *	Must contain a full frame (eg 1518, or more for vlans, jumbo
- *	frames etc.) plus be nicely aligned, plus some NICs restrict
- *	the size to multiple of 1K or so. Default to 2K
- */
+#ifdef __APPLE__
+#include "osx_glue.h"
+#endif /* __APPLE__ */
 
+#ifdef __FreeBSD__
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD: stable/10/sys/dev/netmap/netmap_mem2.c 283343 2015-05-24 01:48:33Z pkelsey $");
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <vm/vm.h>	/* vtophys */
+#include <vm/pmap.h>	/* vtophys */
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+#include <machine/bus.h>	/* bus_dmamap_* */
+
+#endif /* __FreeBSD__ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include "netmap_mem2.h"
+
 #define NETMAP_BUF_MAX_NUM	20*4096*2	/* large machine */
 
-#ifdef linux
-// XXX a mtx would suffice here 20130415 lr
-// #define NMA_LOCK_T		safe_spinlock_t
-#define NMA_LOCK_T		struct semaphore
-#define NMA_LOCK_INIT()		sema_init(&nm_mem.nm_mtx, 1)
-#define NMA_LOCK_DESTROY()
-#define NMA_LOCK()		down(&nm_mem.nm_mtx)
-#define NMA_UNLOCK()		up(&nm_mem.nm_mtx)
-#else /* !linux */
-#define NMA_LOCK_T		struct mtx
-#define NMA_LOCK_INIT()		mtx_init(&nm_mem.nm_mtx, "netmap memory allocator lock", NULL, MTX_DEF)
-#define NMA_LOCK_DESTROY()	mtx_destroy(&nm_mem.nm_mtx)
-#define NMA_LOCK()		mtx_lock(&nm_mem.nm_mtx)
-#define NMA_UNLOCK()		mtx_unlock(&nm_mem.nm_mtx)
-#endif /* linux */
+#define NETMAP_POOL_MAX_NAMSZ	32
 
+
 enum {
 	NETMAP_IF_POOL   = 0,
 	NETMAP_RING_POOL,
@@ -123,30 +72,23 @@
 	u_int size;
 	u_int num;
 };
+struct netmap_obj_pool {
+	char name[NETMAP_POOL_MAX_NAMSZ];	/* name of the allocator */
 
+	/* ---------------------------------------------------*/
+	/* these are only meaningful if the pool is finalized */
+	/* (see 'finalized' field in netmap_mem_d)            */
+	u_int objtotal;         /* actual total number of objects. */
+	u_int memtotal;		/* actual total memory space */
+	u_int numclusters;	/* actual number of clusters */
 
-struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
-	[NETMAP_IF_POOL] = {
-		.size = 1024,
-		.num  = 100,
-	},
-	[NETMAP_RING_POOL] = {
-		.size = 9*PAGE_SIZE,
-		.num  = 200,
-	},
-	[NETMAP_BUF_POOL] = {
-		.size = 2048,
-		.num  = NETMAP_BUF_MAX_NUM,
-	},
-};
+	u_int objfree;          /* number of free objects. */
 
+	struct lut_entry *lut;  /* virt,phys addresses, objtotal entries */
+	uint32_t *bitmap;       /* one bit per buffer, 1 means free */
+	uint32_t bitmap_slots;	/* number of uint32 entries in bitmap */
+	/* ---------------------------------------------------*/
 
-struct netmap_obj_pool {
-	char name[16];		/* name of the allocator */
-	u_int objtotal;         /* actual total number of objects. */
-	u_int objfree;          /* number of free objects. */
-	u_int clustentries;	/* actual objects per cluster */
-
 	/* limits */
 	u_int objminsize;	/* minimum object size */
 	u_int objmaxsize;	/* maximum object size */
@@ -153,35 +95,125 @@
 	u_int nummin;		/* minimum number of objects */
 	u_int nummax;		/* maximum number of objects */
 
-	/* the total memory space is _numclusters*_clustsize */
-	u_int _numclusters;	/* how many clusters */
-	u_int _clustsize;        /* cluster size */
-	u_int _objsize;		/* actual object size */
+	/* these are changed only by config */
+	u_int _objtotal;	/* total number of objects */
+	u_int _objsize;		/* object size */
+	u_int _clustsize;       /* cluster size */
+	u_int _clustentries;    /* objects per cluster */
+	u_int _numclusters;	/* number of clusters */
 
-	u_int _memtotal;	/* _numclusters*_clustsize */
-	struct lut_entry *lut;  /* virt,phys addresses, objtotal entries */
-	uint32_t *bitmap;       /* one bit per buffer, 1 means free */
-	uint32_t bitmap_slots;	/* number of uint32 entries in bitmap */
+	/* requested values */
+	u_int r_objtotal;
+	u_int r_objsize;
 };
 
+#ifdef linux
+// XXX a mtx would suffice here 20130415 lr
+#define NMA_LOCK_T		struct semaphore
+#else /* !linux */
+#define NMA_LOCK_T		struct mtx
+#endif /* linux */
 
+typedef int (*netmap_mem_config_t)(struct netmap_mem_d*);
+typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*);
+typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*);
+
+typedef uint16_t nm_memid_t;
+
 struct netmap_mem_d {
 	NMA_LOCK_T nm_mtx;  /* protect the allocator */
 	u_int nm_totalsize; /* shorthand */
 
-	int finalized;		/* !=0 iff preallocation done */
+	u_int flags;
+#define NETMAP_MEM_FINALIZED	0x1	/* preallocation done */
 	int lasterr;		/* last error for curr config */
 	int refcount;		/* existing priv structures */
 	/* the three allocators */
 	struct netmap_obj_pool pools[NETMAP_POOLS_NR];
+
+	netmap_mem_config_t   config;	/* called with NMA_LOCK held */
+	netmap_mem_finalize_t finalize;	/* called with NMA_LOCK held */
+	netmap_mem_deref_t    deref;	/* called with NMA_LOCK held */
+
+	nm_memid_t nm_id;	/* allocator identifier */
+	int nm_grp;	/* iommu groupd id */
+
+	/* list of all existing allocators, sorted by nm_id */
+	struct netmap_mem_d *prev, *next;
 };
 
+/* accessor functions */
+struct lut_entry*
+netmap_mem_get_lut(struct netmap_mem_d *nmd)
+{
+	return nmd->pools[NETMAP_BUF_POOL].lut;
+}
+
+u_int
+netmap_mem_get_buftotal(struct netmap_mem_d *nmd)
+{
+	return nmd->pools[NETMAP_BUF_POOL].objtotal;
+}
+
+size_t
+netmap_mem_get_bufsize(struct netmap_mem_d *nmd)
+{
+	return nmd->pools[NETMAP_BUF_POOL]._objsize;
+}
+
+#ifdef linux
+#define NMA_LOCK_INIT(n)	sema_init(&(n)->nm_mtx, 1)
+#define NMA_LOCK_DESTROY(n)
+#define NMA_LOCK(n)		down(&(n)->nm_mtx)
+#define NMA_UNLOCK(n)		up(&(n)->nm_mtx)
+#else /* !linux */
+#define NMA_LOCK_INIT(n)	mtx_init(&(n)->nm_mtx, "netmap memory allocator lock", NULL, MTX_DEF)
+#define NMA_LOCK_DESTROY(n)	mtx_destroy(&(n)->nm_mtx)
+#define NMA_LOCK(n)		mtx_lock(&(n)->nm_mtx)
+#define NMA_UNLOCK(n)		mtx_unlock(&(n)->nm_mtx)
+#endif /* linux */
+
+
+struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
+	[NETMAP_IF_POOL] = {
+		.size = 1024,
+		.num  = 100,
+	},
+	[NETMAP_RING_POOL] = {
+		.size = 9*PAGE_SIZE,
+		.num  = 200,
+	},
+	[NETMAP_BUF_POOL] = {
+		.size = 2048,
+		.num  = NETMAP_BUF_MAX_NUM,
+	},
+};
+
+struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
+	[NETMAP_IF_POOL] = {
+		.size = 1024,
+		.num  = 1,
+	},
+	[NETMAP_RING_POOL] = {
+		.size = 5*PAGE_SIZE,
+		.num  = 4,
+	},
+	[NETMAP_BUF_POOL] = {
+		.size = 2048,
+		.num  = 4098,
+	},
+};
+
+
 /*
  * nm_mem is the memory allocator used for all physical interfaces
  * running in netmap mode.
  * Virtual (VALE) ports will have each its own allocator.
  */
-static struct netmap_mem_d nm_mem = {	/* Our memory allocator. */
+static int netmap_mem_global_config(struct netmap_mem_d *nmd);
+static int netmap_mem_global_finalize(struct netmap_mem_d *nmd);
+static void netmap_mem_global_deref(struct netmap_mem_d *nmd);
+struct netmap_mem_d nm_mem = {	/* Our memory allocator. */
 	.pools = {
 		[NETMAP_IF_POOL] = {
 			.name 	= "netmap_if",
@@ -205,62 +237,214 @@
 			.nummax	    = 1000000, /* one million! */
 		},
 	},
+	.config   = netmap_mem_global_config,
+	.finalize = netmap_mem_global_finalize,
+	.deref    = netmap_mem_global_deref,
+
+	.nm_id = 1,
+	.nm_grp = -1,
+
+	.prev = &nm_mem,
+	.next = &nm_mem,
 };
 
-// XXX logically belongs to nm_mem
-struct lut_entry *netmap_buffer_lut;	/* exported */
 
+struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
+
+/* blueprint for the private memory allocators */
+static int netmap_mem_private_config(struct netmap_mem_d *nmd);
+static int netmap_mem_private_finalize(struct netmap_mem_d *nmd);
+static void netmap_mem_private_deref(struct netmap_mem_d *nmd);
+const struct netmap_mem_d nm_blueprint = {
+	.pools = {
+		[NETMAP_IF_POOL] = {
+			.name 	= "%s_if",
+			.objminsize = sizeof(struct netmap_if),
+			.objmaxsize = 4096,
+			.nummin     = 1,
+			.nummax	    = 100,
+		},
+		[NETMAP_RING_POOL] = {
+			.name 	= "%s_ring",
+			.objminsize = sizeof(struct netmap_ring),
+			.objmaxsize = 32*PAGE_SIZE,
+			.nummin     = 2,
+			.nummax	    = 1024,
+		},
+		[NETMAP_BUF_POOL] = {
+			.name	= "%s_buf",
+			.objminsize = 64,
+			.objmaxsize = 65536,
+			.nummin     = 4,
+			.nummax	    = 1000000, /* one million! */
+		},
+	},
+	.config   = netmap_mem_private_config,
+	.finalize = netmap_mem_private_finalize,
+	.deref    = netmap_mem_private_deref,
+
+	.flags = NETMAP_MEM_PRIVATE,
+};
+
 /* memory allocator related sysctls */
 
 #define STRINGIFY(x) #x
 
+
 #define DECLARE_SYSCTLS(id, name) \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
 	    CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
-        SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
-            CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
-        SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
-            CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
-        SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
-            CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s")
+	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
+	    CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
+	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
+	    CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
+	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
+	    CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \
+	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \
+	    CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \
+	    "Default size of private netmap " STRINGIFY(name) "s"); \
+	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_num, \
+	    CTLFLAG_RW, &netmap_min_priv_params[id].num, 0, \
+	    "Default number of private netmap " STRINGIFY(name) "s")
 
+SYSCTL_DECL(_dev_netmap);
 DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
 DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
 DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
 
+static int
+nm_mem_assign_id(struct netmap_mem_d *nmd)
+{
+	nm_memid_t id;
+	struct netmap_mem_d *scan = netmap_last_mem_d;
+	int error = ENOMEM;
+
+	NMA_LOCK(&nm_mem);
+
+	do {
+		/* we rely on unsigned wrap around */
+		id = scan->nm_id + 1;
+		if (id == 0) /* reserve 0 as error value */
+			id = 1;
+		scan = scan->next;
+		if (id != scan->nm_id) {
+			nmd->nm_id = id;
+			nmd->prev = scan->prev;
+			nmd->next = scan;
+			scan->prev->next = nmd;
+			scan->prev = nmd;
+			netmap_last_mem_d = nmd;
+			error = 0;
+			break;
+		}
+	} while (scan != netmap_last_mem_d);
+
+	NMA_UNLOCK(&nm_mem);
+	return error;
+}
+
+static void
+nm_mem_release_id(struct netmap_mem_d *nmd)
+{
+	NMA_LOCK(&nm_mem);
+
+	nmd->prev->next = nmd->next;
+	nmd->next->prev = nmd->prev;
+
+	if (netmap_last_mem_d == nmd)
+		netmap_last_mem_d = nmd->prev;
+
+	nmd->prev = nmd->next = NULL;
+
+	NMA_UNLOCK(&nm_mem);
+}
+
+static int
+nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
+{
+	int err = 0, id;
+	id = nm_iommu_group_id(dev);
+	if (netmap_verbose)
+		D("iommu_group %d", id);
+
+	NMA_LOCK(nmd);
+
+	if (nmd->nm_grp < 0)
+		nmd->nm_grp = id;
+
+	if (nmd->nm_grp != id)
+		nmd->lasterr = err = ENOMEM;
+
+	NMA_UNLOCK(nmd);
+	return err;
+}
+
 /*
- * Convert a userspace offset to a physical address.
- * XXX only called in the FreeBSD's netmap_mmap()
- * because in linux we map everything at once.
- *
  * First, find the allocator that contains the requested offset,
  * then locate the cluster through a lookup table.
  */
-static inline vm_paddr_t
-netmap_ofstophys(vm_offset_t offset)
+vm_paddr_t
+netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
 {
 	int i;
-	vm_offset_t o = offset;
-	struct netmap_obj_pool *p = nm_mem.pools;
+	vm_ooffset_t o = offset;
+	vm_paddr_t pa;
+	struct netmap_obj_pool *p;
 
-	for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i]._memtotal, i++) {
-		if (offset >= p[i]._memtotal)
+	NMA_LOCK(nmd);
+	p = nmd->pools;
+
+	for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) {
+		if (offset >= p[i].memtotal)
 			continue;
 		// now lookup the cluster's address
-		return p[i].lut[offset / p[i]._objsize].paddr +
+		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr) +
 			offset % p[i]._objsize;
+		NMA_UNLOCK(nmd);
+		return pa;
 	}
 	/* this is only in case of errors */
 	D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
-		p[NETMAP_IF_POOL]._memtotal,
-		p[NETMAP_IF_POOL]._memtotal
-			+ p[NETMAP_RING_POOL]._memtotal,
-		p[NETMAP_IF_POOL]._memtotal
-			+ p[NETMAP_RING_POOL]._memtotal
-			+ p[NETMAP_BUF_POOL]._memtotal);
+		p[NETMAP_IF_POOL].memtotal,
+		p[NETMAP_IF_POOL].memtotal
+			+ p[NETMAP_RING_POOL].memtotal,
+		p[NETMAP_IF_POOL].memtotal
+			+ p[NETMAP_RING_POOL].memtotal
+			+ p[NETMAP_BUF_POOL].memtotal);
+	NMA_UNLOCK(nmd);
 	return 0;	// XXX bad address
 }
 
+int
+netmap_mem_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
+	nm_memid_t *id)
+{
+	int error = 0;
+	NMA_LOCK(nmd);
+	error = nmd->config(nmd);
+	if (error)
+		goto out;
+	if (size) {
+		if (nmd->flags & NETMAP_MEM_FINALIZED) {
+			*size = nmd->nm_totalsize;
+		} else {
+			int i;
+			*size = 0;
+			for (i = 0; i < NETMAP_POOLS_NR; i++) {
+				struct netmap_obj_pool *p = nmd->pools + i;
+				*size += (p->_numclusters * p->_clustsize);
+			}
+		}
+	}
+	if (memflags)
+		*memflags = nmd->flags;
+	if (id)
+		*id = nmd->nm_id;
+out:
+	NMA_UNLOCK(nmd);
+	return error;
+}
+
 /*
  * we store objects by kernel address, need to find the offset
  * within the pool to export the value to userspace.
@@ -270,7 +454,7 @@
 static ssize_t
 netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
 {
-	int i, k = p->clustentries, n = p->objtotal;
+	int i, k = p->_clustentries, n = p->objtotal;
 	ssize_t ofs = 0;
 
 	for (i = 0; i < n; i += k, ofs += p->_clustsize) {
@@ -291,25 +475,35 @@
 }
 
 /* Helper functions which convert virtual addresses to offsets */
-#define netmap_if_offset(v)					\
-	netmap_obj_offset(&nm_mem.pools[NETMAP_IF_POOL], (v))
+#define netmap_if_offset(n, v)					\
+	netmap_obj_offset(&(n)->pools[NETMAP_IF_POOL], (v))
 
-#define netmap_ring_offset(v)					\
-    (nm_mem.pools[NETMAP_IF_POOL]._memtotal + 			\
-	netmap_obj_offset(&nm_mem.pools[NETMAP_RING_POOL], (v)))
+#define netmap_ring_offset(n, v)				\
+    ((n)->pools[NETMAP_IF_POOL].memtotal + 			\
+	netmap_obj_offset(&(n)->pools[NETMAP_RING_POOL], (v)))
 
-#define netmap_buf_offset(v)					\
-    (nm_mem.pools[NETMAP_IF_POOL]._memtotal +			\
-	nm_mem.pools[NETMAP_RING_POOL]._memtotal +		\
-	netmap_obj_offset(&nm_mem.pools[NETMAP_BUF_POOL], (v)))
+#define netmap_buf_offset(n, v)					\
+    ((n)->pools[NETMAP_IF_POOL].memtotal +			\
+	(n)->pools[NETMAP_RING_POOL].memtotal +		\
+	netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)))
 
 
+ssize_t
+netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *addr)
+{
+	ssize_t v;
+	NMA_LOCK(nmd);
+	v = netmap_if_offset(nmd, addr);
+	NMA_UNLOCK(nmd);
+	return v;
+}
+
 /*
  * report the index, and use start position as a hint,
  * otherwise buffer allocation becomes terribly expensive.
  */
 static void *
-netmap_obj_malloc(struct netmap_obj_pool *p, int len, uint32_t *start, uint32_t *index)
+netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_t *index)
 {
 	uint32_t i = 0;			/* index in the bitmap */
 	uint32_t mask, j;		/* slot counter */
@@ -322,7 +516,7 @@
 	}
 
 	if (p->objfree == 0) {
-		D("%s allocator: run out of memory", p->name);
+		D("no more %s objects", p->name);
 		return NULL;
 	}
 	if (start)
@@ -355,28 +549,41 @@
 
 
 /*
- * free by index, not by address. This is slow, but is only used
- * for a small number of objects (rings, nifp)
+ * free by index, not by address.
+ * XXX should we also cleanup the content ?
  */
-static void
+static int
 netmap_obj_free(struct netmap_obj_pool *p, uint32_t j)
 {
+	uint32_t *ptr, mask;
+
 	if (j >= p->objtotal) {
 		D("invalid index %u, max %u", j, p->objtotal);
-		return;
+		return 1;
 	}
-	p->bitmap[j / 32] |= (1 << (j % 32));
-	p->objfree++;
-	return;
+	ptr = &p->bitmap[j / 32];
+	mask = (1 << (j % 32));
+	if (*ptr & mask) {
+		D("ouch, double free on buffer %d", j);
+		return 1;
+	} else {
+		*ptr |= mask;
+		p->objfree++;
+		return 0;
+	}
 }
 
+/*
+ * free by address. This is slow but is only used for a few
+ * objects (rings, nifp)
+ */
 static void
 netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
 {
-	int i, j, n = p->_memtotal / p->_clustsize;
+	u_int i, j, n = p->numclusters;
 
-	for (i = 0, j = 0; i < n; i++, j += p->clustentries) {
-		void *base = p->lut[i * p->clustentries].vaddr;
+	for (i = 0, j = 0; i < n; i++, j += p->_clustentries) {
+		void *base = p->lut[i * p->_clustentries].vaddr;
 		ssize_t relofs = (ssize_t) vaddr - (ssize_t) base;
 
 		/* Given address, is out of the scope of the current cluster.*/
@@ -384,7 +591,7 @@
 			continue;
 
 		j = j + relofs / p->_objsize;
-		KASSERT(j != 0, ("Cannot free object 0"));
+		/* KASSERT(j != 0, ("Cannot free object 0")); */
 		netmap_obj_free(p, j);
 		return;
 	}
@@ -392,43 +599,94 @@
 	    vaddr, p->name);
 }
 
-#define netmap_if_malloc(len)	netmap_obj_malloc(&nm_mem.pools[NETMAP_IF_POOL], len, NULL, NULL)
-#define netmap_if_free(v)	netmap_obj_free_va(&nm_mem.pools[NETMAP_IF_POOL], (v))
-#define netmap_ring_malloc(len)	netmap_obj_malloc(&nm_mem.pools[NETMAP_RING_POOL], len, NULL, NULL)
-#define netmap_ring_free(v)	netmap_obj_free_va(&nm_mem.pools[NETMAP_RING_POOL], (v))
-#define netmap_buf_malloc(_pos, _index)			\
-	netmap_obj_malloc(&nm_mem.pools[NETMAP_BUF_POOL], NETMAP_BUF_SIZE, _pos, _index)
+#define netmap_mem_bufsize(n)	\
+	((n)->pools[NETMAP_BUF_POOL]._objsize)
 
+#define netmap_if_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL)
+#define netmap_if_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v))
+#define netmap_ring_malloc(n, len)	netmap_obj_malloc(&(n)->pools[NETMAP_RING_POOL], len, NULL, NULL)
+#define netmap_ring_free(n, v)		netmap_obj_free_va(&(n)->pools[NETMAP_RING_POOL], (v))
+#define netmap_buf_malloc(n, _pos, _index)			\
+	netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index)
 
+
+#if 0 // XXX unused
 /* Return the index associated to the given packet buffer */
-#define netmap_buf_index(v)						\
-    (netmap_obj_offset(&nm_mem.pools[NETMAP_BUF_POOL], (v)) / nm_mem.pools[NETMAP_BUF_POOL]._objsize)
+#define netmap_buf_index(n, v)						\
+    (netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n))
+#endif
 
+/*
+ * allocate extra buffers in a linked list.
+ * returns the actual number.
+ */
+uint32_t
+netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
+{
+	struct netmap_mem_d *nmd = na->nm_mem;
+	uint32_t i, pos = 0; /* opaque, scan position in the bitmap */
 
+	NMA_LOCK(nmd);
+
+	*head = 0;	/* default, 'null' index ie empty list */
+	for (i = 0 ; i < n; i++) {
+		uint32_t cur = *head;	/* save current head */
+		uint32_t *p = netmap_buf_malloc(nmd, &pos, head);
+		if (p == NULL) {
+			D("no more buffers after %d of %d", i, n);
+			*head = cur; /* restore */
+			break;
+		}
+		RD(5, "allocate buffer %d -> %d", *head, cur);
+		*p = cur; /* link to previous head */
+	}
+
+	NMA_UNLOCK(nmd);
+
+	return i;
+}
+
+static void
+netmap_extra_free(struct netmap_adapter *na, uint32_t head)
+{
+        struct lut_entry *lut = na->na_lut;
+	struct netmap_mem_d *nmd = na->nm_mem;
+	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
+	uint32_t i, cur, *buf;
+
+	D("freeing the extra list");
+	for (i = 0; head >=2 && head < p->objtotal; i++) {
+		cur = head;
+		buf = lut[head].vaddr;
+		head = *buf;
+		*buf = 0;
+		if (netmap_obj_free(p, cur))
+			break;
+	}
+	if (head != 0)
+		D("breaking with head %d", head);
+	D("freed %d buffers", i);
+}
+
+
 /* Return nonzero on error */
 static int
-netmap_new_bufs(struct netmap_if *nifp,
-                struct netmap_slot *slot, u_int n)
+netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
 {
-	struct netmap_obj_pool *p = &nm_mem.pools[NETMAP_BUF_POOL];
-	int i = 0;	/* slot counter */
+	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
+	u_int i = 0;	/* slot counter */
 	uint32_t pos = 0;	/* slot in p->bitmap */
 	uint32_t index = 0;	/* buffer index */
 
-	(void)nifp;	/* UNUSED */
 	for (i = 0; i < n; i++) {
-		void *vaddr = netmap_buf_malloc(&pos, &index);
+		void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
 		if (vaddr == NULL) {
-			D("unable to locate empty packet buffer");
+			D("no more buffers after %d of %d", i, n);
 			goto cleanup;
 		}
 		slot[i].buf_idx = index;
 		slot[i].len = p->_objsize;
-		/* XXX setting flags=NS_BUF_CHANGED forces a pointer reload
-		 * in the NIC ring. This is a hack that hides missing
-		 * initializations in the drivers, and should go away.
-		 */
-		// slot[i].flags = NS_BUF_CHANGED;
+		slot[i].flags = 0;
 	}
 
 	ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos);
@@ -443,11 +701,24 @@
 	return (ENOMEM);
 }
 
+static void
+netmap_mem_set_ring(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n, uint32_t index)
+{
+	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
+	u_int i;
 
+	for (i = 0; i < n; i++) {
+		slot[i].buf_idx = index;
+		slot[i].len = p->_objsize;
+		slot[i].flags = 0;
+	}
+}
+
+
 static void
-netmap_free_buf(struct netmap_if *nifp, uint32_t i)
+netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
 {
-	struct netmap_obj_pool *p = &nm_mem.pools[NETMAP_BUF_POOL];
+	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
 
 	if (i < 2 || i >= p->objtotal) {
 		D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
@@ -456,9 +727,22 @@
 	netmap_obj_free(p, i);
 }
 
+
 static void
+netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
+{
+	u_int i;
+
+	for (i = 0; i < n; i++) {
+		if (slot[i].buf_idx > 2)
+			netmap_free_buf(nmd, slot[i].buf_idx);
+	}
+}
+
+static void
 netmap_reset_obj_allocator(struct netmap_obj_pool *p)
 {
+
 	if (p == NULL)
 		return;
 	if (p->bitmap)
@@ -465,10 +749,18 @@
 		free(p->bitmap, M_NETMAP);
 	p->bitmap = NULL;
 	if (p->lut) {
-		int i;
-		for (i = 0; i < p->objtotal; i += p->clustentries) {
+		u_int i;
+		size_t sz = p->_clustsize;
+
+		/*
+		 * Free each cluster allocated in
+		 * netmap_finalize_obj_allocator().  The cluster start
+		 * addresses are stored at multiples of p->_clusterentries
+		 * in the lut.
+		 */
+		for (i = 0; i < p->objtotal; i += p->_clustentries) {
 			if (p->lut[i].vaddr)
-				contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
+				contigfree(p->lut[i].vaddr, sz, M_NETMAP);
 		}
 		bzero(p->lut, sizeof(struct lut_entry) * p->objtotal);
 #ifdef linux
@@ -478,6 +770,10 @@
 #endif
 	}
 	p->lut = NULL;
+	p->objtotal = 0;
+	p->memtotal = 0;
+	p->numclusters = 0;
+	p->objfree = 0;
 }
 
 /*
@@ -495,8 +791,7 @@
  * We receive a request for objtotal objects, of size objsize each.
  * Internally we may round up both numbers, as we allocate objects
  * in small clusters multiple of the page size.
- * In the allocator we don't need to store the objsize,
- * but we do need to keep track of objtotal' and clustentries,
+ * We need to keep track of objtotal and clustentries,
  * as they are needed when freeing memory.
  *
  * XXX note -- userspace needs the buffers to be contiguous,
@@ -508,16 +803,21 @@
 static int
 netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int objsize)
 {
-	int i, n;
+	int i;
 	u_int clustsize;	/* the cluster size, multiple of page size */
 	u_int clustentries;	/* how many objects per entry */
 
-#define MAX_CLUSTSIZE	(1<<17)
-#define LINE_ROUND	64
+	/* we store the current request, so we can
+	 * detect configuration changes later */
+	p->r_objtotal = objtotal;
+	p->r_objsize = objsize;
+
+#define MAX_CLUSTSIZE	(1<<22)		// 4 MB
+#define LINE_ROUND	NM_CACHE_ALIGN	// 64
 	if (objsize >= MAX_CLUSTSIZE) {
 		/* we could do it but there is no point */
 		D("unsupported allocation for %d bytes", objsize);
-		goto error;
+		return EINVAL;
 	}
 	/* make sure objsize is a multiple of LINE_ROUND */
 	i = (objsize & (LINE_ROUND - 1));
@@ -528,12 +828,12 @@
 	if (objsize < p->objminsize || objsize > p->objmaxsize) {
 		D("requested objsize %d out of range [%d, %d]",
 			objsize, p->objminsize, p->objmaxsize);
-		goto error;
+		return EINVAL;
 	}
 	if (objtotal < p->nummin || objtotal > p->nummax) {
 		D("requested objtotal %d out of range [%d, %d]",
 			objtotal, p->nummin, p->nummax);
-		goto error;
+		return EINVAL;
 	}
 	/*
 	 * Compute number of objects using a brute-force approach:
@@ -550,15 +850,14 @@
 			clustentries = i;
 			break;
 		}
-		if (delta > ( (clustentries*objsize) % PAGE_SIZE) )
-			clustentries = i;
 	}
-	// D("XXX --- ouch, delta %d (bad for buffers)", delta);
-	/* compute clustsize and round to the next page */
+	/* exact solution not found */
+	if (clustentries == 0) {
+		D("unsupported allocation for %d bytes", objsize);
+		return EINVAL;
+	}
+	/* compute clustsize */
 	clustsize = clustentries * objsize;
-	i =  (clustsize & (PAGE_SIZE - 1));
-	if (i)
-		clustsize += PAGE_SIZE - i;
 	if (netmap_verbose)
 		D("objsize %d clustsize %d objects %d",
 			objsize, clustsize, clustentries);
@@ -567,22 +866,15 @@
 	 * The number of clusters is n = ceil(objtotal/clustentries)
 	 * objtotal' = n * clustentries
 	 */
-	p->clustentries = clustentries;
+	p->_clustentries = clustentries;
 	p->_clustsize = clustsize;
-	n = (objtotal + clustentries - 1) / clustentries;
-	p->_numclusters = n;
-	p->objtotal = n * clustentries;
-	p->objfree = p->objtotal - 2; /* obj 0 and 1 are reserved */
-	p->_memtotal = p->_numclusters * p->_clustsize;
+	p->_numclusters = (objtotal + clustentries - 1) / clustentries;
+
+	/* actual values (may be larger than requested) */
 	p->_objsize = objsize;
+	p->_objtotal = p->_numclusters * clustentries;
 
 	return 0;
-
-error:
-	p->_objsize = objsize;
-	p->objtotal = objtotal;
-
-	return EINVAL;
 }
 
 
@@ -590,8 +882,13 @@
 static int
 netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
 {
-	int i, n;
+	int i; /* must be signed */
+	size_t n;
 
+	/* optimistically assume we have enough memory */
+	p->numclusters = p->_numclusters;
+	p->objtotal = p->_objtotal;
+
 	n = sizeof(struct lut_entry) * p->objtotal;
 #ifdef linux
 	p->lut = vmalloc(n);
@@ -599,7 +896,7 @@
 	p->lut = malloc(n, M_NETMAP, M_NOWAIT | M_ZERO);
 #endif
 	if (p->lut == NULL) {
-		D("Unable to create lookup table (%d bytes) for '%s'", n, p->name);
+		D("Unable to create lookup table (%d bytes) for '%s'", (int)n, p->name);
 		goto clean;
 	}
 
@@ -607,7 +904,7 @@
 	n = (p->objtotal + 31) / 32;
 	p->bitmap = malloc(sizeof(uint32_t) * n, M_NETMAP, M_NOWAIT | M_ZERO);
 	if (p->bitmap == NULL) {
-		D("Unable to create bitmap (%d entries) for allocator '%s'", n,
+		D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
 		    p->name);
 		goto clean;
 	}
@@ -616,33 +913,48 @@
 	/*
 	 * Allocate clusters, init pointers and bitmap
 	 */
-	for (i = 0; i < p->objtotal;) {
-		int lim = i + p->clustentries;
+
+	n = p->_clustsize;
+	for (i = 0; i < (int)p->objtotal;) {
+		int lim = i + p->_clustentries;
 		char *clust;
 
-		clust = contigmalloc(p->_clustsize, M_NETMAP, M_NOWAIT | M_ZERO,
-		    0, -1UL, PAGE_SIZE, 0);
+		clust = contigmalloc(n, M_NETMAP, M_NOWAIT | M_ZERO,
+		    (size_t)0, -1UL, PAGE_SIZE, 0);
 		if (clust == NULL) {
 			/*
 			 * If we get here, there is a severe memory shortage,
 			 * so halve the allocated memory to reclaim some.
-			 * XXX check boundaries
 			 */
 			D("Unable to create cluster at %d for '%s' allocator",
 			    i, p->name);
+			if (i < 2) /* nothing to halve */
+				goto out;
 			lim = i / 2;
 			for (i--; i >= lim; i--) {
 				p->bitmap[ (i>>5) ] &=  ~( 1 << (i & 31) );
-				if (i % p->clustentries == 0 && p->lut[i].vaddr)
+				if (i % p->_clustentries == 0 && p->lut[i].vaddr)
 					contigfree(p->lut[i].vaddr,
-						p->_clustsize, M_NETMAP);
+						n, M_NETMAP);
+				p->lut[i].vaddr = NULL;
 			}
+		out:
 			p->objtotal = i;
-			p->objfree = p->objtotal - 2;
-			p->_numclusters = i / p->clustentries;
-			p->_memtotal = p->_numclusters * p->_clustsize;
+			/* we may have stopped in the middle of a cluster */
+			p->numclusters = (i + p->_clustentries - 1) / p->_clustentries;
 			break;
 		}
+		/*
+		 * Set bitmap and lut state for all buffers in the current
+		 * cluster.
+		 *
+		 * [i, lim) is the set of buffer indexes that cover the
+		 * current cluster.
+		 *
+		 * 'clust' is really the address of the current buffer in
+		 * the current cluster as we index through it with a stride
+		 * of p->_objsize.
+		 */
 		for (; i < lim; i++, clust += p->_objsize) {
 			p->bitmap[ (i>>5) ] |=  ( 1 << (i & 31) );
 			p->lut[i].vaddr = clust;
@@ -649,11 +961,14 @@
 			p->lut[i].paddr = vtophys(clust);
 		}
 	}
-	p->bitmap[0] = ~3; /* objs 0 and 1 is always busy */
+	p->objfree = p->objtotal;
+	p->memtotal = p->numclusters * p->_clustsize;
+	if (p->objfree == 0)
+		goto clean;
 	if (netmap_verbose)
 		D("Pre-allocated %d clusters (%d/%dKB) for '%s'",
-		    p->_numclusters, p->_clustsize >> 10,
-		    p->_memtotal >> 10, p->name);
+		    p->numclusters, p->_clustsize >> 10,
+		    p->memtotal >> 10, p->name);
 
 	return 0;
 
@@ -664,126 +979,320 @@
 
 /* call with lock held */
 static int
-netmap_memory_config_changed(void)
+netmap_memory_config_changed(struct netmap_mem_d *nmd)
 {
 	int i;
 
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
-		if (nm_mem.pools[i]._objsize != netmap_params[i].size ||
-		    nm_mem.pools[i].objtotal != netmap_params[i].num)
+		if (nmd->pools[i].r_objsize != netmap_params[i].size ||
+		    nmd->pools[i].r_objtotal != netmap_params[i].num)
 		    return 1;
 	}
 	return 0;
 }
 
+static void
+netmap_mem_reset_all(struct netmap_mem_d *nmd)
+{
+	int i;
 
+	if (netmap_verbose)
+		D("resetting %p", nmd);
+	for (i = 0; i < NETMAP_POOLS_NR; i++) {
+		netmap_reset_obj_allocator(&nmd->pools[i]);
+	}
+	nmd->flags  &= ~NETMAP_MEM_FINALIZED;
+}
+
+static int
+netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
+{
+	int i, lim = p->_objtotal;
+
+	if (na->pdev == NULL)
+		return 0;
+
+#ifdef __FreeBSD__
+	(void)i;
+	(void)lim;
+	D("unsupported on FreeBSD");
+#else /* linux */
+	for (i = 2; i < lim; i++) {
+		netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr);
+	}
+#endif /* linux */
+
+	return 0;
+}
+
+static int
+netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
+{
+#ifdef __FreeBSD__
+	D("unsupported on FreeBSD");
+#else /* linux */
+	int i, lim = p->_objtotal;
+
+	if (na->pdev == NULL)
+		return 0;
+
+	for (i = 2; i < lim; i++) {
+		netmap_load_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr,
+				p->lut[i].vaddr);
+	}
+#endif /* linux */
+
+	return 0;
+}
+
+static int
+netmap_mem_finalize_all(struct netmap_mem_d *nmd)
+{
+	int i;
+	if (nmd->flags & NETMAP_MEM_FINALIZED)
+		return 0;
+	nmd->lasterr = 0;
+	nmd->nm_totalsize = 0;
+	for (i = 0; i < NETMAP_POOLS_NR; i++) {
+		nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]);
+		if (nmd->lasterr)
+			goto error;
+		nmd->nm_totalsize += nmd->pools[i].memtotal;
+	}
+	/* buffers 0 and 1 are reserved */
+	nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
+	nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
+	nmd->flags |= NETMAP_MEM_FINALIZED;
+
+	if (netmap_verbose)
+		D("interfaces %d KB, rings %d KB, buffers %d MB",
+		    nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
+		    nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
+		    nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
+
+	if (netmap_verbose)
+		D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
+
+
+	return 0;
+error:
+	netmap_mem_reset_all(nmd);
+	return nmd->lasterr;
+}
+
+
+
+void
+netmap_mem_private_delete(struct netmap_mem_d *nmd)
+{
+	if (nmd == NULL)
+		return;
+	if (netmap_verbose)
+		D("deleting %p", nmd);
+	if (nmd->refcount > 0)
+		D("bug: deleting mem allocator with refcount=%d!", nmd->refcount);
+	nm_mem_release_id(nmd);
+	if (netmap_verbose)
+		D("done deleting %p", nmd);
+	NMA_LOCK_DESTROY(nmd);
+	free(nmd, M_DEVBUF);
+}
+
+static int
+netmap_mem_private_config(struct netmap_mem_d *nmd)
+{
+	/* nothing to do, we are configured on creation
+ 	 * and configuration never changes thereafter
+ 	 */
+	return 0;
+}
+
+static int
+netmap_mem_private_finalize(struct netmap_mem_d *nmd)
+{
+	int err;
+	nmd->refcount++;
+	err = netmap_mem_finalize_all(nmd);
+	return err;
+
+}
+
+static void
+netmap_mem_private_deref(struct netmap_mem_d *nmd)
+{
+	if (--nmd->refcount <= 0)
+		netmap_mem_reset_all(nmd);
+}
+
+
+/*
+ * allocator for private memory
+ */
+struct netmap_mem_d *
+netmap_mem_private_new(const char *name, u_int txr, u_int txd,
+	u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int *perr)
+{
+	struct netmap_mem_d *d = NULL;
+	struct netmap_obj_params p[NETMAP_POOLS_NR];
+	int i, err;
+	u_int v, maxd;
+
+	d = malloc(sizeof(struct netmap_mem_d),
+			M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (d == NULL) {
+		err = ENOMEM;
+		goto error;
+	}
+
+	*d = nm_blueprint;
+
+	err = nm_mem_assign_id(d);
+	if (err)
+		goto error;
+
+	/* account for the fake host rings */
+	txr++;
+	rxr++;
+
+	/* copy the min values */
+	for (i = 0; i < NETMAP_POOLS_NR; i++) {
+		p[i] = netmap_min_priv_params[i];
+	}
+
+	/* possibly increase them to fit user request */
+	v = sizeof(struct netmap_if) + sizeof(ssize_t) * (txr + rxr);
+	if (p[NETMAP_IF_POOL].size < v)
+		p[NETMAP_IF_POOL].size = v;
+	v = 2 + 4 * npipes;
+	if (p[NETMAP_IF_POOL].num < v)
+		p[NETMAP_IF_POOL].num = v;
+	maxd = (txd > rxd) ? txd : rxd;
+	v = sizeof(struct netmap_ring) + sizeof(struct netmap_slot) * maxd;
+	if (p[NETMAP_RING_POOL].size < v)
+		p[NETMAP_RING_POOL].size = v;
+	/* each pipe endpoint needs two tx rings (1 normal + 1 host, fake)
+         * and two rx rings (again, 1 normal and 1 fake host)
+         */
+	v = txr + rxr + 8 * npipes;
+	if (p[NETMAP_RING_POOL].num < v)
+		p[NETMAP_RING_POOL].num = v;
+	/* for each pipe we only need the buffers for the 4 "real" rings.
+         * On the other end, the pipe ring dimension may be different from
+         * the parent port ring dimension. As a compromise, we allocate twice the
+         * space actually needed if the pipe rings were the same size as the parent rings
+         */
+	v = (4 * npipes + rxr) * rxd + (4 * npipes + txr) * txd + 2 + extra_bufs;
+		/* the +2 is for the tx and rx fake buffers (indices 0 and 1) */
+	if (p[NETMAP_BUF_POOL].num < v)
+		p[NETMAP_BUF_POOL].num = v;
+
+	if (netmap_verbose)
+		D("req if %d*%d ring %d*%d buf %d*%d",
+			p[NETMAP_IF_POOL].num,
+			p[NETMAP_IF_POOL].size,
+			p[NETMAP_RING_POOL].num,
+			p[NETMAP_RING_POOL].size,
+			p[NETMAP_BUF_POOL].num,
+			p[NETMAP_BUF_POOL].size);
+
+	for (i = 0; i < NETMAP_POOLS_NR; i++) {
+		snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
+				nm_blueprint.pools[i].name,
+				name);
+		err = netmap_config_obj_allocator(&d->pools[i],
+				p[i].num, p[i].size);
+		if (err)
+			goto error;
+	}
+
+	d->flags &= ~NETMAP_MEM_FINALIZED;
+
+	NMA_LOCK_INIT(d);
+
+	return d;
+error:
+	netmap_mem_private_delete(d);
+	if (perr)
+		*perr = err;
+	return NULL;
+}
+
+
 /* call with lock held */
 static int
-netmap_memory_config(void)
+netmap_mem_global_config(struct netmap_mem_d *nmd)
 {
 	int i;
 
-	if (!netmap_memory_config_changed())
+	if (nmd->refcount)
+		/* already in use, we cannot change the configuration */
 		goto out;
 
+	if (!netmap_memory_config_changed(nmd))
+		goto out;
+
 	D("reconfiguring");
 
-	if (nm_mem.finalized) {
+	if (nmd->flags & NETMAP_MEM_FINALIZED) {
 		/* reset previous allocation */
 		for (i = 0; i < NETMAP_POOLS_NR; i++) {
-			netmap_reset_obj_allocator(&nm_mem.pools[i]);
+			netmap_reset_obj_allocator(&nmd->pools[i]);
 		}
-		nm_mem.finalized = 0;
-        }
+		nmd->flags &= ~NETMAP_MEM_FINALIZED;
+	}
 
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
-		nm_mem.lasterr = netmap_config_obj_allocator(&nm_mem.pools[i],
+		nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i],
 				netmap_params[i].num, netmap_params[i].size);
-		if (nm_mem.lasterr)
+		if (nmd->lasterr)
 			goto out;
 	}
 
-	D("Have %d KB for interfaces, %d KB for rings and %d MB for buffers",
-	    nm_mem.pools[NETMAP_IF_POOL]._memtotal >> 10,
-	    nm_mem.pools[NETMAP_RING_POOL]._memtotal >> 10,
-	    nm_mem.pools[NETMAP_BUF_POOL]._memtotal >> 20);
-
 out:
 
-	return nm_mem.lasterr;
+	return nmd->lasterr;
 }
 
-/* call with lock held */
 static int
-netmap_memory_finalize(void)
+netmap_mem_global_finalize(struct netmap_mem_d *nmd)
 {
-	int i;
-	u_int totalsize = 0;
-
-	nm_mem.refcount++;
-	if (nm_mem.refcount > 1) {
-		ND("busy (refcount %d)", nm_mem.refcount);
-		goto out;
-	}
-
+	int err;
+		
 	/* update configuration if changed */
-	if (netmap_memory_config())
+	if (netmap_mem_global_config(nmd))
 		goto out;
 
-	if (nm_mem.finalized) {
+	nmd->refcount++;
+
+	if (nmd->flags & NETMAP_MEM_FINALIZED) {
 		/* may happen if config is not changed */
 		ND("nothing to do");
 		goto out;
 	}
 
-	for (i = 0; i < NETMAP_POOLS_NR; i++) {
-		nm_mem.lasterr = netmap_finalize_obj_allocator(&nm_mem.pools[i]);
-		if (nm_mem.lasterr)
-			goto cleanup;
-		totalsize += nm_mem.pools[i]._memtotal;
-	}
-	nm_mem.nm_totalsize = totalsize;
+	if (netmap_mem_finalize_all(nmd))
+		goto out;
 
-	/* backward compatibility */
-	netmap_buf_size = nm_mem.pools[NETMAP_BUF_POOL]._objsize;
-	netmap_total_buffers = nm_mem.pools[NETMAP_BUF_POOL].objtotal;
+	nmd->lasterr = 0;
 
-	netmap_buffer_lut = nm_mem.pools[NETMAP_BUF_POOL].lut;
-	netmap_buffer_base = nm_mem.pools[NETMAP_BUF_POOL].lut[0].vaddr;
-
-	nm_mem.finalized = 1;
-	nm_mem.lasterr = 0;
-
-	/* make sysctl values match actual values in the pools */
-	for (i = 0; i < NETMAP_POOLS_NR; i++) {
-		netmap_params[i].size = nm_mem.pools[i]._objsize;
-		netmap_params[i].num  = nm_mem.pools[i].objtotal;
-	}
-
 out:
-	if (nm_mem.lasterr)
-		nm_mem.refcount--;
+	if (nmd->lasterr)
+		nmd->refcount--;
+	err = nmd->lasterr;
 
-	return nm_mem.lasterr;
+	return err;
 
-cleanup:
-	for (i = 0; i < NETMAP_POOLS_NR; i++) {
-		netmap_reset_obj_allocator(&nm_mem.pools[i]);
-	}
-	nm_mem.refcount--;
-
-	return nm_mem.lasterr;
 }
 
-static int
-netmap_memory_init(void)
+int
+netmap_mem_init(void)
 {
-	NMA_LOCK_INIT();
+	NMA_LOCK_INIT(&nm_mem);
 	return (0);
 }
 
-static void
-netmap_memory_fini(void)
+void
+netmap_mem_fini(void)
 {
 	int i;
 
@@ -790,184 +1299,308 @@
 	for (i = 0; i < NETMAP_POOLS_NR; i++) {
 	    netmap_destroy_obj_allocator(&nm_mem.pools[i]);
 	}
-	NMA_LOCK_DESTROY();
+	NMA_LOCK_DESTROY(&nm_mem);
 }
 
 static void
 netmap_free_rings(struct netmap_adapter *na)
 {
-	int i;
+	struct netmap_kring *kring;
+	struct netmap_ring *ring;
 	if (!na->tx_rings)
 		return;
-	for (i = 0; i < na->num_tx_rings + 1; i++) {
-		netmap_ring_free(na->tx_rings[i].ring);
-		na->tx_rings[i].ring = NULL;
+	for (kring = na->tx_rings; kring != na->rx_rings; kring++) {
+		ring = kring->ring;
+		if (ring == NULL)
+			continue;
+		netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
+		netmap_ring_free(na->nm_mem, ring);
+		kring->ring = NULL;
 	}
-	for (i = 0; i < na->num_rx_rings + 1; i++) {
-		netmap_ring_free(na->rx_rings[i].ring);
-		na->rx_rings[i].ring = NULL;
+	for (/* cont'd from above */; kring != na->tailroom; kring++) {
+		ring = kring->ring;
+		if (ring == NULL)
+			continue;
+		netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
+		netmap_ring_free(na->nm_mem, ring);
+		kring->ring = NULL;
 	}
-	free(na->tx_rings, M_DEVBUF);
-	na->tx_rings = na->rx_rings = NULL;
 }
 
-
-
-/* call with NMA_LOCK held */
-/*
- * Allocate the per-fd structure netmap_if.
- * If this is the first instance, also allocate the krings, rings etc.
+/* call with NMA_LOCK held *
+ *
+ * Allocate netmap rings and buffers for this card
+ * The rings are contiguous, but have variable size.
+ * The kring array must follow the layout described
+ * in netmap_krings_create().
  */
-static void *
-netmap_if_new(const char *ifname, struct netmap_adapter *na)
+int
+netmap_mem_rings_create(struct netmap_adapter *na)
 {
-	struct netmap_if *nifp;
 	struct netmap_ring *ring;
-	ssize_t base; /* handy for relative offsets between rings and nifp */
-	u_int i, len, ndesc, ntx, nrx;
+	u_int len, ndesc;
 	struct netmap_kring *kring;
+	u_int i;
 
-	if (netmap_update_config(na)) {
-		/* configuration mismatch, report and fail */
-		return NULL;
-	}
-	ntx = na->num_tx_rings + 1; /* shorthand, include stack ring */
-	nrx = na->num_rx_rings + 1; /* shorthand, include stack ring */
-	/*
-	 * the descriptor is followed inline by an array of offsets
-	 * to the tx and rx rings in the shared memory region.
-	 */
-	len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t);
-	nifp = netmap_if_malloc(len);
-	if (nifp == NULL) {
-		return NULL;
-	}
+	NMA_LOCK(na->nm_mem);
 
-	/* initialize base fields -- override const */
-	*(int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
-	*(int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
-	strncpy(nifp->ni_name, ifname, IFNAMSIZ);
-
-	(na->refcount)++;	/* XXX atomic ? we are under lock */
-	if (na->refcount > 1) { /* already setup, we are done */
-		goto final;
-	}
-
-	len = (ntx + nrx) * sizeof(struct netmap_kring);
-	na->tx_rings = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
-	if (na->tx_rings == NULL) {
-		D("Cannot allocate krings for %s", ifname);
-		goto cleanup;
-	}
-	na->rx_rings = na->tx_rings + ntx;
-
-	/*
-	 * First instance, allocate netmap rings and buffers for this card
-	 * The rings are contiguous, but have variable size.
-	 */
-	for (i = 0; i < ntx; i++) { /* Transmit rings */
-		kring = &na->tx_rings[i];
-		ndesc = na->num_tx_desc;
-		bzero(kring, sizeof(*kring));
+        /* transmit rings */
+	for (i =0, kring = na->tx_rings; kring != na->rx_rings; kring++, i++) {
+		if (kring->ring) {
+			ND("%s %ld already created", kring->name, kring - na->tx_rings);
+			continue; /* already created by somebody else */
+		}
+		ndesc = kring->nkr_num_slots;
 		len = sizeof(struct netmap_ring) +
 			  ndesc * sizeof(struct netmap_slot);
-		ring = netmap_ring_malloc(len);
+		ring = netmap_ring_malloc(na->nm_mem, len);
 		if (ring == NULL) {
-			D("Cannot allocate tx_ring[%d] for %s", i, ifname);
+			D("Cannot allocate tx_ring");
 			goto cleanup;
 		}
-		ND("txring[%d] at %p ofs %d", i, ring);
-		kring->na = na;
+		ND("txring at %p", ring);
 		kring->ring = ring;
-		*(int *)(uintptr_t)&ring->num_slots = kring->nkr_num_slots = ndesc;
-		*(ssize_t *)(uintptr_t)&ring->buf_ofs =
-		    (nm_mem.pools[NETMAP_IF_POOL]._memtotal +
-			nm_mem.pools[NETMAP_RING_POOL]._memtotal) -
-			netmap_ring_offset(ring);
+		*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
+		*(int64_t *)(uintptr_t)&ring->buf_ofs =
+		    (na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
+			na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
+			netmap_ring_offset(na->nm_mem, ring);
 
-		/*
-		 * IMPORTANT:
-		 * Always keep one slot empty, so we can detect new
-		 * transmissions comparing cur and nr_hwcur (they are
-		 * the same only if there are no new transmissions).
-		 */
-		ring->avail = kring->nr_hwavail = ndesc - 1;
-		ring->cur = kring->nr_hwcur = 0;
-		*(int *)(uintptr_t)&ring->nr_buf_size = NETMAP_BUF_SIZE;
-		ND("initializing slots for txring[%d]", i);
-		if (netmap_new_bufs(nifp, ring->slot, ndesc)) {
-			D("Cannot allocate buffers for tx_ring[%d] for %s", i, ifname);
-			goto cleanup;
+		/* copy values from kring */
+		ring->head = kring->rhead;
+		ring->cur = kring->rcur;
+		ring->tail = kring->rtail;
+		*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
+			netmap_mem_bufsize(na->nm_mem);
+		ND("%s h %d c %d t %d", kring->name,
+			ring->head, ring->cur, ring->tail);
+		ND("initializing slots for txring");
+		if (i != na->num_tx_rings || (na->na_flags & NAF_HOST_RINGS)) {
+			/* this is a real ring */
+			if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
+				D("Cannot allocate buffers for tx_ring");
+				goto cleanup;
+			}
+		} else {
+			/* this is a fake tx ring, set all indices to 0 */
+			netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
 		}
 	}
 
-	for (i = 0; i < nrx; i++) { /* Receive rings */
-		kring = &na->rx_rings[i];
-		ndesc = na->num_rx_desc;
-		bzero(kring, sizeof(*kring));
+	/* receive rings */
+	for ( i = 0 /* kring cont'd from above */ ; kring != na->tailroom; kring++, i++) {
+		if (kring->ring) {
+			ND("%s %ld already created", kring->name, kring - na->rx_rings);
+			continue; /* already created by somebody else */
+		}
+		ndesc = kring->nkr_num_slots;
 		len = sizeof(struct netmap_ring) +
 			  ndesc * sizeof(struct netmap_slot);
-		ring = netmap_ring_malloc(len);
+		ring = netmap_ring_malloc(na->nm_mem, len);
 		if (ring == NULL) {
-			D("Cannot allocate rx_ring[%d] for %s", i, ifname);
+			D("Cannot allocate rx_ring");
 			goto cleanup;
 		}
-		ND("rxring[%d] at %p ofs %d", i, ring);
-
-		kring->na = na;
+		ND("rxring at %p", ring);
 		kring->ring = ring;
-		*(int *)(uintptr_t)&ring->num_slots = kring->nkr_num_slots = ndesc;
-		*(ssize_t *)(uintptr_t)&ring->buf_ofs =
-		    (nm_mem.pools[NETMAP_IF_POOL]._memtotal +
-		        nm_mem.pools[NETMAP_RING_POOL]._memtotal) -
-			netmap_ring_offset(ring);
+		*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
+		*(int64_t *)(uintptr_t)&ring->buf_ofs =
+		    (na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
+		        na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
+			netmap_ring_offset(na->nm_mem, ring);
 
-		ring->cur = kring->nr_hwcur = 0;
-		ring->avail = kring->nr_hwavail = 0; /* empty */
-		*(int *)(uintptr_t)&ring->nr_buf_size = NETMAP_BUF_SIZE;
-		ND("initializing slots for rxring[%d]", i);
-		if (netmap_new_bufs(nifp, ring->slot, ndesc)) {
-			D("Cannot allocate buffers for rx_ring[%d] for %s", i, ifname);
-			goto cleanup;
+		/* copy values from kring */
+		ring->head = kring->rhead;
+		ring->cur = kring->rcur;
+		ring->tail = kring->rtail;
+		*(int *)(uintptr_t)&ring->nr_buf_size =
+			netmap_mem_bufsize(na->nm_mem);
+		ND("%s h %d c %d t %d", kring->name,
+			ring->head, ring->cur, ring->tail);
+		ND("initializing slots for rxring %p", ring);
+		if (i != na->num_rx_rings || (na->na_flags & NAF_HOST_RINGS)) {
+			/* this is a real ring */
+			if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
+				D("Cannot allocate buffers for rx_ring");
+				goto cleanup;
+			}
+		} else {
+			/* this is a fake rx ring, set all indices to 1 */
+			netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 1);
 		}
 	}
-#ifdef linux
-	// XXX initialize the selrecord structs.
-	for (i = 0; i < ntx; i++)
-		init_waitqueue_head(&na->tx_rings[i].si);
-	for (i = 0; i < nrx; i++)
-		init_waitqueue_head(&na->rx_rings[i].si);
-	init_waitqueue_head(&na->tx_si);
-	init_waitqueue_head(&na->rx_si);
-#endif
-final:
+
+	NMA_UNLOCK(na->nm_mem);
+
+	return 0;
+
+cleanup:
+	netmap_free_rings(na);
+
+	NMA_UNLOCK(na->nm_mem);
+
+	return ENOMEM;
+}
+
+void
+netmap_mem_rings_delete(struct netmap_adapter *na)
+{
+	/* last instance, release bufs and rings */
+	NMA_LOCK(na->nm_mem);
+
+	netmap_free_rings(na);
+
+	NMA_UNLOCK(na->nm_mem);
+}
+
+
+/* call with NMA_LOCK held */
+/*
+ * Allocate the per-fd structure netmap_if.
+ *
+ * We assume that the configuration stored in na
+ * (number of tx/rx rings and descs) does not change while
+ * the interface is in netmap mode.
+ */
+struct netmap_if *
+netmap_mem_if_new(struct netmap_adapter *na)
+{
+	struct netmap_if *nifp;
+	ssize_t base; /* handy for relative offsets between rings and nifp */
+	u_int i, len, ntx, nrx;
+
+	/* account for the (eventually fake) host rings */
+	ntx = na->num_tx_rings + 1;
+	nrx = na->num_rx_rings + 1;
 	/*
+	 * the descriptor is followed inline by an array of offsets
+	 * to the tx and rx rings in the shared memory region.
+	 */
+
+	NMA_LOCK(na->nm_mem);
+
+	len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t);
+	nifp = netmap_if_malloc(na->nm_mem, len);
+	if (nifp == NULL) {
+		NMA_UNLOCK(na->nm_mem);
+		return NULL;
+	}
+
+	/* initialize base fields -- override const */
+	*(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
+	*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
+	strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ);
+
+	/*
 	 * fill the slots for the rx and tx rings. They contain the offset
 	 * between the ring and nifp, so the information is usable in
 	 * userspace to reach the ring from the nifp.
 	 */
-	base = netmap_if_offset(nifp);
+	base = netmap_if_offset(na->nm_mem, nifp);
 	for (i = 0; i < ntx; i++) {
 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
-			netmap_ring_offset(na->tx_rings[i].ring) - base;
+			netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base;
 	}
 	for (i = 0; i < nrx; i++) {
 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+ntx] =
-			netmap_ring_offset(na->rx_rings[i].ring) - base;
+			netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base;
 	}
+
+	NMA_UNLOCK(na->nm_mem);
+
 	return (nifp);
-cleanup:
-	netmap_free_rings(na);
-	netmap_if_free(nifp);
-	(na->refcount)--;
-	return NULL;
 }
 
-/* call with NMA_LOCK held */
+void
+netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
+{
+	if (nifp == NULL)
+		/* nothing to do */
+		return;
+	NMA_LOCK(na->nm_mem);
+	if (nifp->ni_bufs_head)
+		netmap_extra_free(na, nifp->ni_bufs_head);
+	netmap_if_free(na->nm_mem, nifp);
+
+	NMA_UNLOCK(na->nm_mem);
+}
+
 static void
-netmap_memory_deref(void)
+netmap_mem_global_deref(struct netmap_mem_d *nmd)
 {
-	nm_mem.refcount--;
+
+	nmd->refcount--;
+	if (!nmd->refcount)
+		nmd->nm_grp = -1;
 	if (netmap_verbose)
-		D("refcount = %d", nm_mem.refcount);
+		D("refcount = %d", nmd->refcount);
+
 }
+
+int
+netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
+{
+	if (nm_mem_assign_group(nmd, na->pdev) < 0) {
+		return ENOMEM;
+	} else {
+		NMA_LOCK(nmd);
+		nmd->finalize(nmd);
+		NMA_UNLOCK(nmd);
+	}
+
+	if (!nmd->lasterr && na->pdev)
+		netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
+
+	return nmd->lasterr;
+}
+
+void
+netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
+{
+	NMA_LOCK(nmd);
+	netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
+	if (nmd->refcount == 1) {
+		u_int i;
+
+		/*
+		 * Reset the allocator when it falls out of use so that any
+		 * pool resources leaked by unclean application exits are
+		 * reclaimed.
+		 */
+		for (i = 0; i < NETMAP_POOLS_NR; i++) {
+			struct netmap_obj_pool *p;
+			u_int j;
+			
+			p = &nmd->pools[i];
+			p->objfree = p->objtotal;
+			/*
+			 * Reproduce the net effect of the M_ZERO malloc()
+			 * and marking of free entries in the bitmap that
+			 * occur in finalize_obj_allocator()
+			 */
+			memset(p->bitmap,
+			    '\0',
+			    sizeof(uint32_t) * ((p->objtotal + 31) / 32));
+			
+			/*
+			 * Set all the bits in the bitmap that have
+			 * corresponding buffers to 1 to indicate they are
+			 * free.
+			 */
+			for (j = 0; j < p->objtotal; j++) {
+				if (p->lut[j].vaddr != NULL) {
+					p->bitmap[ (j>>5) ] |=  ( 1 << (j & 31) );
+				}
+			}
+		}
+
+		/*
+		 * Per netmap_mem_finalize_all(),
+		 * buffers 0 and 1 are reserved
+		 */
+		nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
+		nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
+	}
+	nmd->deref(nmd);
+	NMA_UNLOCK(nmd);
+}

Added: trunk/sys/dev/netmap/netmap_mem2.h
===================================================================
--- trunk/sys/dev/netmap/netmap_mem2.h	                        (rev 0)
+++ trunk/sys/dev/netmap/netmap_mem2.h	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,146 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: stable/10/sys/dev/netmap/netmap_mem2.h 270252 2014-08-20 23:34:36Z luigi $
+ *
+ * (New) memory allocator for netmap
+ */
+
+/*
+ * This allocator creates three memory pools:
+ *	nm_if_pool	for the struct netmap_if
+ *	nm_ring_pool	for the struct netmap_ring
+ *	nm_buf_pool	for the packet buffers.
+ *
+ * that contain netmap objects. Each pool is made of a number of clusters,
+ * multiple of a page size, each containing an integer number of objects.
+ * The clusters are contiguous in user space but not in the kernel.
+ * Only nm_buf_pool needs to be dma-able,
+ * but for convenience use the same type of allocator for all.
+ *
+ * Once mapped, the three pools are exported to userspace
+ * as a contiguous block, starting from nm_if_pool. Each
+ * cluster (and pool) is an integral number of pages.
+ *   [ . . . ][ . . . . . .][ . . . . . . . . . .]
+ *    nm_if     nm_ring            nm_buf
+ *
+ * The userspace areas contain offsets of the objects in userspace.
+ * When (at init time) we write these offsets, we find out the index
+ * of the object, and from there locate the offset from the beginning
+ * of the region.
+ *
+ * The invididual allocators manage a pool of memory for objects of
+ * the same size.
+ * The pool is split into smaller clusters, whose size is a
+ * multiple of the page size. The cluster size is chosen
+ * to minimize the waste for a given max cluster size
+ * (we do it by brute force, as we have relatively few objects
+ * per cluster).
+ *
+ * Objects are aligned to the cache line (64 bytes) rounding up object
+ * sizes when needed. A bitmap contains the state of each object.
+ * Allocation scans the bitmap; this is done only on attach, so we are not
+ * too worried about performance
+ *
+ * For each allocator we can define (thorugh sysctl) the size and
+ * number of each object. Memory is allocated at the first use of a
+ * netmap file descriptor, and can be freed when all such descriptors
+ * have been released (including unmapping the memory).
+ * If memory is scarce, the system tries to get as much as possible
+ * and the sysctl values reflect the actual allocation.
+ * Together with desired values, the sysctl export also absolute
+ * min and maximum values that cannot be overridden.
+ *
+ * struct netmap_if:
+ *	variable size, max 16 bytes per ring pair plus some fixed amount.
+ *	1024 bytes should be large enough in practice.
+ *
+ *	In the worst case we have one netmap_if per ring in the system.
+ *
+ * struct netmap_ring
+ *	variable size, 8 byte per slot plus some fixed amount.
+ *	Rings can be large (e.g. 4k slots, or >32Kbytes).
+ *	We default to 36 KB (9 pages), and a few hundred rings.
+ *
+ * struct netmap_buffer
+ *	The more the better, both because fast interfaces tend to have
+ *	many slots, and because we may want to use buffers to store
+ *	packets in userspace avoiding copies.
+ *	Must contain a full frame (eg 1518, or more for vlans, jumbo
+ *	frames etc.) plus be nicely aligned, plus some NICs restrict
+ *	the size to multiple of 1K or so. Default to 2K
+ */
+#ifndef _NET_NETMAP_MEM2_H_
+#define _NET_NETMAP_MEM2_H_
+
+
+
+/* We implement two kinds of netmap_mem_d structures:
+ *
+ * - global: used by hardware NICS;
+ *
+ * - private: used by VALE ports.
+ *
+ * In both cases, the netmap_mem_d structure has the same lifetime as the
+ * netmap_adapter of the corresponding NIC or port. It is the responsibility of
+ * the client code to delete the private allocator when the associated
+ * netmap_adapter is freed (this is implemented by the NAF_MEM_OWNER flag in
+ * netmap.c).  The 'refcount' field counts the number of active users of the
+ * structure. The global allocator uses this information to prevent/allow
+ * reconfiguration. The private allocators release all their memory when there
+ * are no active users.  By 'active user' we mean an existing netmap_priv
+ * structure holding a reference to the allocator.
+ */
+
+extern struct netmap_mem_d nm_mem;
+
+struct lut_entry* netmap_mem_get_lut(struct netmap_mem_d *);
+u_int      netmap_mem_get_buftotal(struct netmap_mem_d *);
+size_t     netmap_mem_get_bufsize(struct netmap_mem_d *);
+vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
+int	   netmap_mem_finalize(struct netmap_mem_d *, struct netmap_adapter *);
+int 	   netmap_mem_init(void);
+void 	   netmap_mem_fini(void);
+struct netmap_if * netmap_mem_if_new(struct netmap_adapter *);
+void 	   netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
+int	   netmap_mem_rings_create(struct netmap_adapter *);
+void	   netmap_mem_rings_delete(struct netmap_adapter *);
+void 	   netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
+int	   netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
+ssize_t    netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
+struct netmap_mem_d* netmap_mem_private_new(const char *name,
+	u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes,
+	int* error);
+void	   netmap_mem_private_delete(struct netmap_mem_d *);
+
+#define NETMAP_MEM_PRIVATE	0x2	/* allocator uses private address space */
+#define NETMAP_MEM_IO		0x4	/* the underlying memory is mmapped I/O */
+
+uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);
+
+
+#endif


Property changes on: trunk/sys/dev/netmap/netmap_mem2.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/netmap/netmap_monitor.c
===================================================================
--- trunk/sys/dev/netmap/netmap_monitor.c	                        (rev 0)
+++ trunk/sys/dev/netmap/netmap_monitor.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,499 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: stable/10/sys/dev/netmap/netmap_monitor.c 278779 2015-02-14 19:41:26Z luigi $
+ *
+ * Monitors
+ *
+ * netmap monitors can be used to do zero-copy monitoring of network traffic
+ * on another adapter, when the latter adapter is working in netmap mode.
+ *
+ * Monitors offer to userspace the same interface as any other netmap port,
+ * with as many pairs of netmap rings as the monitored adapter.
+ * However, only the rx rings are actually used. Each monitor rx ring receives
+ * the traffic transiting on both the tx and rx corresponding rings in the
+ * monitored adapter. During registration, the user can choose if she wants
+ * to intercept tx only, rx only, or both tx and rx traffic.
+ *
+ * The monitor only sees the frames after they have been consumed in the
+ * monitored adapter:
+ *
+ *  - For tx traffic, this is after the slots containing the frames have been
+ *    marked as free. Note that this may happen at a considerably delay after
+ *    frame transmission, since freeing of slots is often done lazily.
+ *
+ *  - For rx traffic, this is after the consumer on the monitored adapter
+ *    has released them. In most cases, the consumer is a userspace
+ *    application which may have modified the frame contents.
+ *
+ * If the monitor is not able to cope with the stream of frames, excess traffic
+ * will be dropped.
+ *
+ * Each ring can be monitored by at most one monitor. This may change in the
+ * future, if we implement monitor chaining.
+ *
+ */
+
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>	/* defines used in kernel.h */
+#include <sys/kernel.h>	/* types used in module initialization */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h>	/* bus_dmamap_* */
+#include <sys/refcount.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#else
+
+#error	Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#ifdef WITH_MONITOR
+
+#define NM_MONITOR_MAXSLOTS 4096
+
+/* monitor works by replacing the nm_sync callbacks in the monitored rings.
+ * The actions to be performed are the same on both tx and rx rings, so we
+ * have collected them here
+ */
+static int
+netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr)
+{
+	struct netmap_monitor_adapter *mna = kring->monitor;
+	struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id];
+	struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
+	int error;
+	int rel_slots, free_slots, busy;
+	u_int beg, end, i;
+	u_int lim = kring->nkr_num_slots - 1,
+	      mlim = mkring->nkr_num_slots - 1;
+
+	/* get the relased slots (rel_slots) */
+	beg = *ringptr;
+	error = kring->save_sync(kring, flags);
+	if (error)
+		return error;
+	end = *ringptr;
+	rel_slots = end - beg;
+	if (rel_slots < 0)
+		rel_slots += kring->nkr_num_slots;
+
+	if (!rel_slots) {
+		return 0;
+	}
+
+	/* we need to lock the monitor receive ring, since it
+	 * is the target of bot tx and rx traffic from the monitored
+	 * adapter
+	 */
+	mtx_lock(&mkring->q_lock);
+	/* get the free slots available on the monitor ring */
+	i = mkring->nr_hwtail;
+	busy = i - mkring->nr_hwcur;
+	if (busy < 0)
+		busy += mkring->nkr_num_slots;
+	free_slots = mlim - busy;
+
+	if (!free_slots) {
+		mtx_unlock(&mkring->q_lock);
+		return 0;
+	}
+
+	/* swap min(free_slots, rel_slots) slots */
+	if (free_slots < rel_slots) {
+		beg += (rel_slots - free_slots);
+		if (beg > lim)
+			beg = 0;
+		rel_slots = free_slots;
+	}
+
+	for ( ; rel_slots; rel_slots--) {
+		struct netmap_slot *s = &ring->slot[beg];
+		struct netmap_slot *ms = &mring->slot[i];
+		uint32_t tmp;
+
+		tmp = ms->buf_idx;
+		ms->buf_idx = s->buf_idx;
+		s->buf_idx = tmp;
+
+		tmp = ms->len;
+		ms->len = s->len;
+		s->len = tmp;
+
+		s->flags |= NS_BUF_CHANGED;
+
+		beg = nm_next(beg, lim);
+		i = nm_next(i, mlim);
+
+	}
+	mb();
+	mkring->nr_hwtail = i;
+
+	mtx_unlock(&mkring->q_lock);
+	/* notify the new frames to the monitor */
+	mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0);
+	return 0;
+}
+
+/* callback used to replace the nm_sync callback in the monitored tx rings */
+static int
+netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
+{
+        ND("%s %x", kring->name, flags);
+        return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail);
+}
+
+/* callback used to replace the nm_sync callback in the monitored rx rings */
+static int
+netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
+{
+        ND("%s %x", kring->name, flags);
+        return netmap_monitor_parent_sync(kring, flags, &kring->rcur);
+}
+
+/* nm_sync callback for the monitor's own tx rings.
+ * This makes no sense and always returns error
+ */
+static int
+netmap_monitor_txsync(struct netmap_kring *kring, int flags)
+{
+        D("%s %x", kring->name, flags);
+	return EIO;
+}
+
+/* nm_sync callback for the monitor's own rx rings.
+ * Note that the lock in netmap_monitor_parent_sync only protects
+ * writers among themselves. Synchronization between writers
+ * (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync)
+ * and readers (i.e., netmap_monitor_rxsync) relies on memory barriers.
+ */
+static int
+netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
+{
+        ND("%s %x", kring->name, flags);
+	kring->nr_hwcur = kring->rcur;
+	mb();
+	nm_rxsync_finalize(kring);
+        return 0;
+}
+
+/* nm_krings_create callbacks for monitors.
+ * We could use the default netmap_hw_krings_monitor, but
+ * we don't need the mbq.
+ */
+static int
+netmap_monitor_krings_create(struct netmap_adapter *na)
+{
+	return netmap_krings_create(na, 0);
+}
+
+
+/* nm_register callback for monitors.
+ *
+ * On registration, replace the nm_sync callbacks in the monitored
+ * rings with our own, saving the previous ones in the monitored
+ * rings themselves, where they are used by netmap_monitor_parent_sync.
+ *
+ * On de-registration, restore the original callbacks. We need to
+ * stop traffic while we are doing this, since the monitored adapter may
+ * have already started executing a netmap_monitor_parent_sync
+ * and may not like the kring->save_sync pointer to become NULL.
+ */
+static int
+netmap_monitor_reg(struct netmap_adapter *na, int onoff)
+{
+	struct netmap_monitor_adapter *mna =
+		(struct netmap_monitor_adapter *)na;
+	struct netmap_priv_d *priv = &mna->priv;
+	struct netmap_adapter *pna = priv->np_na;
+	struct netmap_kring *kring;
+	int i;
+
+	ND("%p: onoff %d", na, onoff);
+	if (onoff) {
+		if (!nm_netmap_on(pna)) {
+			/* parent left netmap mode, fatal */
+			return ENXIO;
+		}
+		if (mna->flags & NR_MONITOR_TX) {
+			for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
+				kring = &pna->tx_rings[i];
+				kring->save_sync = kring->nm_sync;
+				kring->nm_sync = netmap_monitor_parent_txsync;
+			}
+		}
+		if (mna->flags & NR_MONITOR_RX) {
+			for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
+				kring = &pna->rx_rings[i];
+				kring->save_sync = kring->nm_sync;
+				kring->nm_sync = netmap_monitor_parent_rxsync;
+			}
+		}
+		na->na_flags |= NAF_NETMAP_ON;
+	} else {
+		if (!nm_netmap_on(pna)) {
+			/* parent left netmap mode, nothing to restore */
+			return 0;
+		}
+		na->na_flags &= ~NAF_NETMAP_ON;
+		if (mna->flags & NR_MONITOR_TX) {
+			for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
+				netmap_set_txring(pna, i, 1 /* stopped */);
+				kring = &pna->tx_rings[i];
+				kring->nm_sync = kring->save_sync;
+				kring->save_sync = NULL;
+				netmap_set_txring(pna, i, 0 /* enabled */);
+			}
+		}
+		if (mna->flags & NR_MONITOR_RX) {
+			for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
+				netmap_set_rxring(pna, i, 1 /* stopped */);
+				kring = &pna->rx_rings[i];
+				kring->nm_sync = kring->save_sync;
+				kring->save_sync = NULL;
+				netmap_set_rxring(pna, i, 0 /* enabled */);
+			}
+		}
+	}
+	return 0;
+}
+/* nm_krings_delete callback for monitors */
+static void
+netmap_monitor_krings_delete(struct netmap_adapter *na)
+{
+	netmap_krings_delete(na);
+}
+
+
+/* nm_dtor callback for monitors */
+static void
+netmap_monitor_dtor(struct netmap_adapter *na)
+{
+	struct netmap_monitor_adapter *mna =
+		(struct netmap_monitor_adapter *)na;
+	struct netmap_priv_d *priv = &mna->priv;
+	struct netmap_adapter *pna = priv->np_na;
+	int i;
+
+	ND("%p", na);
+	if (nm_netmap_on(pna)) {
+		/* parent still in netmap mode, mark its krings as free */
+		if (mna->flags & NR_MONITOR_TX) {
+			for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
+				pna->tx_rings[i].monitor = NULL;
+			}
+		}
+		if (mna->flags & NR_MONITOR_RX) {
+			for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
+				pna->rx_rings[i].monitor = NULL;
+			}
+		}
+	}
+	netmap_adapter_put(pna);
+}
+
+
+/* check if nmr is a request for a monitor adapter that we can satisfy */
+int
+netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
+{
+	struct nmreq pnmr;
+	struct netmap_adapter *pna; /* parent adapter */
+	struct netmap_monitor_adapter *mna;
+	int i, error;
+
+	if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
+		ND("not a monitor");
+		return 0;
+	}
+	/* this is a request for a monitor adapter */
+
+	D("flags %x", nmr->nr_flags);
+
+	mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (mna == NULL) {
+		D("memory error");
+		return ENOMEM;
+	}
+
+	/* first, try to find the adapter that we want to monitor
+	 * We use the same nmr, after we have turned off the monitor flags.
+	 * In this way we can potentially monitor everything netmap understands,
+	 * except other monitors.
+	 */
+	memcpy(&pnmr, nmr, sizeof(pnmr));
+	pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX);
+	error = netmap_get_na(&pnmr, &pna, create);
+	if (error) {
+		D("parent lookup failed: %d", error);
+		return error;
+	}
+	D("found parent: %s", pna->name);
+
+	if (!nm_netmap_on(pna)) {
+		/* parent not in netmap mode */
+		/* XXX we can wait for the parent to enter netmap mode,
+		 * by intercepting its nm_register callback (2014-03-16)
+		 */
+		D("%s not in netmap mode", pna->name);
+		error = EINVAL;
+		goto put_out;
+	}
+
+	/* grab all the rings we need in the parent */
+	mna->priv.np_na = pna;
+	error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
+	if (error) {
+		D("ringid error");
+		goto put_out;
+	}
+	if (nmr->nr_flags & NR_MONITOR_TX) {
+		for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
+			struct netmap_kring *kring = &pna->tx_rings[i];
+			if (kring->monitor) {
+				error = EBUSY;
+				D("ring busy");
+				goto release_out;
+			}
+			kring->monitor = mna;
+		}
+	}
+	if (nmr->nr_flags & NR_MONITOR_RX) {
+		for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
+			struct netmap_kring *kring = &pna->rx_rings[i];
+			if (kring->monitor) {
+				error = EBUSY;
+				D("ring busy");
+				goto release_out;
+			}
+			kring->monitor = mna;
+		}
+	}
+
+	snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name);
+
+	/* the monitor supports the host rings iff the parent does */
+	mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
+	mna->up.nm_txsync = netmap_monitor_txsync;
+	mna->up.nm_rxsync = netmap_monitor_rxsync;
+	mna->up.nm_register = netmap_monitor_reg;
+	mna->up.nm_dtor = netmap_monitor_dtor;
+	mna->up.nm_krings_create = netmap_monitor_krings_create;
+	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
+	mna->up.nm_mem = pna->nm_mem;
+	mna->up.na_lut = pna->na_lut;
+	mna->up.na_lut_objtotal = pna->na_lut_objtotal;
+	mna->up.na_lut_objsize = pna->na_lut_objsize;
+
+	mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
+	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
+	 * in the parent
+	 */
+	mna->up.num_rx_rings = pna->num_rx_rings;
+	if (pna->num_tx_rings > pna->num_rx_rings)
+		mna->up.num_rx_rings = pna->num_tx_rings;
+	/* by default, the number of slots is the same as in
+	 * the parent rings, but the user may ask for a different
+	 * number
+	 */
+	mna->up.num_tx_desc = nmr->nr_tx_slots;
+	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
+			1, NM_MONITOR_MAXSLOTS, NULL);
+	mna->up.num_rx_desc = nmr->nr_rx_slots;
+	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
+			1, NM_MONITOR_MAXSLOTS, NULL);
+	error = netmap_attach_common(&mna->up);
+	if (error) {
+		D("attach_common error");
+		goto release_out;
+	}
+
+	/* remember the traffic directions we have to monitor */
+	mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX));
+
+	*na = &mna->up;
+	netmap_adapter_get(*na);
+
+	/* write the configuration back */
+	nmr->nr_tx_rings = mna->up.num_tx_rings;
+	nmr->nr_rx_rings = mna->up.num_rx_rings;
+	nmr->nr_tx_slots = mna->up.num_tx_desc;
+	nmr->nr_rx_slots = mna->up.num_rx_desc;
+
+	/* keep the reference to the parent */
+	D("monitor ok");
+
+	return 0;
+
+release_out:
+	D("monitor error");
+	for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
+		if (pna->tx_rings[i].monitor == mna)
+			pna->tx_rings[i].monitor = NULL;
+	}
+	for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
+		if (pna->rx_rings[i].monitor == mna)
+			pna->rx_rings[i].monitor = NULL;
+	}
+put_out:
+	netmap_adapter_put(pna);
+	free(mna, M_DEVBUF);
+	return error;
+}
+
+
+#endif /* WITH_MONITOR */


Property changes on: trunk/sys/dev/netmap/netmap_monitor.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/netmap/netmap_offloadings.c
===================================================================
--- trunk/sys/dev/netmap/netmap_offloadings.c	                        (rev 0)
+++ trunk/sys/dev/netmap/netmap_offloadings.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,402 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD: stable/10/sys/dev/netmap/netmap_offloadings.c 270252 2014-08-20 23:34:36Z luigi $ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>	/* defines used in kernel.h */
+#include <sys/kernel.h>	/* types used in module initialization */
+#include <sys/sockio.h>
+#include <sys/socketvar.h>	/* struct socket */
+#include <sys/socket.h> /* sockaddrs */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h>	/* bus_dmamap_* */
+#include <sys/endian.h>
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#else
+
+#error	Unsupported platform
+
+#endif /* unsupported */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+
+
+
+/* This routine is called by bdg_mismatch_datapath() when it finishes
+ * accumulating bytes for a segment, in order to fix some fields in the
+ * segment headers (which still contain the same content as the header
+ * of the original GSO packet). 'buf' points to the beginning (e.g.
+ * the ethernet header) of the segment, and 'len' is its length.
+ */
+static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
+			    u_int segmented_bytes, u_int last_segment,
+			    u_int tcp, u_int iphlen)
+{
+	struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
+	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
+	uint16_t *check = NULL;
+	uint8_t *check_data = NULL;
+
+	if (iphlen == 20) {
+		/* Set the IPv4 "Total Length" field. */
+		iph->tot_len = htobe16(len-14);
+		ND("ip total length %u", be16toh(ip->tot_len));
+
+		/* Set the IPv4 "Identification" field. */
+		iph->id = htobe16(be16toh(iph->id) + idx);
+		ND("ip identification %u", be16toh(iph->id));
+
+		/* Compute and insert the IPv4 header checksum. */
+		iph->check = 0;
+		iph->check = nm_csum_ipv4(iph);
+		ND("IP csum %x", be16toh(iph->check));
+	} else {/* if (iphlen == 40) */
+		/* Set the IPv6 "Payload Len" field. */
+		ip6h->payload_len = htobe16(len-14-iphlen);
+	}
+
+	if (tcp) {
+		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
+
+		/* Set the TCP sequence number. */
+		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
+		ND("tcp seq %u", be32toh(tcph->seq));
+
+		/* Zero the PSH and FIN TCP flags if this is not the last
+		   segment. */
+		if (!last_segment)
+			tcph->flags &= ~(0x8 | 0x1);
+		ND("last_segment %u", last_segment);
+
+		check = &tcph->check;
+		check_data = (uint8_t *)tcph;
+	} else { /* UDP */
+		struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
+
+		/* Set the UDP 'Length' field. */
+		udph->len = htobe16(len-14-iphlen);
+
+		check = &udph->check;
+		check_data = (uint8_t *)udph;
+	}
+
+	/* Compute and insert TCP/UDP checksum. */
+	*check = 0;
+	if (iphlen == 20)
+		nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
+	else
+		nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
+
+	ND("TCP/UDP csum %x", be16toh(*check));
+}
+
+
+/* The VALE mismatch datapath implementation. */
+void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
+			   struct netmap_vp_adapter *dst_na,
+			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
+			   u_int *j, u_int lim, u_int *howmany)
+{
+	struct netmap_slot *slot = NULL;
+	struct nm_vnet_hdr *vh = NULL;
+	/* Number of source slots to process. */
+	u_int frags = ft_p->ft_frags;
+	struct nm_bdg_fwd *ft_end = ft_p + frags;
+
+	/* Source and destination pointers. */
+	uint8_t *dst, *src;
+	size_t src_len, dst_len;
+
+	u_int j_start = *j;
+	u_int dst_slots = 0;
+
+	/* If the source port uses the offloadings, while destination doesn't,
+	 * we grab the source virtio-net header and do the offloadings here.
+	 */
+	if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
+		vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
+	}
+
+	/* Init source and dest pointers. */
+	src = ft_p->ft_buf;
+	src_len = ft_p->ft_len;
+	slot = &ring->slot[*j];
+	dst = NMB(&dst_na->up, slot);
+	dst_len = src_len;
+
+	/* We are processing the first input slot and there is a mismatch
+	 * between source and destination virt_hdr_len (SHL and DHL).
+	 * When the a client is using virtio-net headers, the header length
+	 * can be:
+	 *    - 10: the header corresponds to the struct nm_vnet_hdr
+	 *    - 12: the first 10 bytes correspond to the struct
+	 *          virtio_net_hdr, and the last 2 bytes store the
+	 *          "mergeable buffers" info, which is an optional
+	 *	    hint that can be zeroed for compability
+	 *
+	 * The destination header is therefore built according to the
+	 * following table:
+	 *
+	 * SHL | DHL | destination header
+	 * -----------------------------
+	 *   0 |  10 | zero
+	 *   0 |  12 | zero
+	 *  10 |   0 | doesn't exist
+	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
+	 *  12 |   0 | doesn't exist
+	 *  12 |  10 | copied from the first 10 bytes of source header
+	 */
+	bzero(dst, dst_na->virt_hdr_len);
+	if (na->virt_hdr_len && dst_na->virt_hdr_len)
+		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
+	/* Skip the virtio-net headers. */
+	src += na->virt_hdr_len;
+	src_len -= na->virt_hdr_len;
+	dst += dst_na->virt_hdr_len;
+	dst_len = dst_na->virt_hdr_len + src_len;
+
+	/* Here it could be dst_len == 0 (which implies src_len == 0),
+	 * so we avoid passing a zero length fragment.
+	 */
+	if (dst_len == 0) {
+		ft_p++;
+		src = ft_p->ft_buf;
+		src_len = ft_p->ft_len;
+		dst_len = src_len;
+	}
+
+	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		u_int gso_bytes = 0;
+		/* Length of the GSO packet header. */
+		u_int gso_hdr_len = 0;
+		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
+		uint8_t *gso_hdr = NULL;
+		/* Index of the current segment. */
+		u_int gso_idx = 0;
+		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
+		u_int segmented_bytes = 0;
+		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
+		u_int iphlen = 0;
+		/* Is this a TCP or an UDP GSO packet? */
+		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
+				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
+
+		/* Segment the GSO packet contained into the input slots (frags). */
+		while (ft_p != ft_end) {
+			size_t copy;
+
+			/* Grab the GSO header if we don't have it. */
+			if (!gso_hdr) {
+				uint16_t ethertype;
+
+				gso_hdr = src;
+
+				/* Look at the 'Ethertype' field to see if this packet
+				 * is IPv4 or IPv6.
+				 */
+				ethertype = be16toh(*((uint16_t *)(gso_hdr  + 12)));
+				if (ethertype == 0x0800)
+					iphlen = 20;
+				else /* if (ethertype == 0x86DD) */
+					iphlen = 40;
+				ND(3, "type=%04x", ethertype);
+
+				/* Compute gso_hdr_len. For TCP we need to read the
+				 * content of the 'Data Offset' field.
+				 */
+				if (tcp) {
+					struct nm_tcphdr *tcph =
+						(struct nm_tcphdr *)&gso_hdr[14+iphlen];
+
+					gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
+				} else
+					gso_hdr_len = 14 + iphlen + 8; /* UDP */
+
+				ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
+								dst_na->mfs);
+
+				/* Advance source pointers. */
+				src += gso_hdr_len;
+				src_len -= gso_hdr_len;
+				if (src_len == 0) {
+					ft_p++;
+					if (ft_p == ft_end)
+						break;
+					src = ft_p->ft_buf;
+					src_len = ft_p->ft_len;
+					continue;
+				}
+			}
+
+			/* Fill in the header of the current segment. */
+			if (gso_bytes == 0) {
+				memcpy(dst, gso_hdr, gso_hdr_len);
+				gso_bytes = gso_hdr_len;
+			}
+
+			/* Fill in data and update source and dest pointers. */
+			copy = src_len;
+			if (gso_bytes + copy > dst_na->mfs)
+				copy = dst_na->mfs - gso_bytes;
+			memcpy(dst + gso_bytes, src, copy);
+			gso_bytes += copy;
+			src += copy;
+			src_len -= copy;
+
+			/* A segment is complete or we have processed all the
+			   the GSO payload bytes. */
+			if (gso_bytes >= dst_na->mfs ||
+				(src_len == 0 && ft_p + 1 == ft_end)) {
+				/* After raw segmentation, we must fix some header
+				 * fields and compute checksums, in a protocol dependent
+				 * way. */
+				gso_fix_segment(dst, gso_bytes, gso_idx,
+						segmented_bytes,
+						src_len == 0 && ft_p + 1 == ft_end,
+						tcp, iphlen);
+
+				ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
+				slot->len = gso_bytes;
+				slot->flags = 0;
+				segmented_bytes += gso_bytes - gso_hdr_len;
+
+				dst_slots++;
+
+				/* Next destination slot. */
+				*j = nm_next(*j, lim);
+				slot = &ring->slot[*j];
+				dst = NMB(&dst_na->up, slot);
+
+				gso_bytes = 0;
+				gso_idx++;
+			}
+
+			/* Next input slot. */
+			if (src_len == 0) {
+				ft_p++;
+				if (ft_p == ft_end)
+					break;
+				src = ft_p->ft_buf;
+				src_len = ft_p->ft_len;
+			}
+		}
+		ND(3, "%d bytes segmented", segmented_bytes);
+
+	} else {
+		/* Address of a checksum field into a destination slot. */
+		uint16_t *check = NULL;
+		/* Accumulator for an unfolded checksum. */
+		rawsum_t csum = 0;
+
+		/* Process a non-GSO packet. */
+
+		/* Init 'check' if necessary. */
+		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
+				D("invalid checksum request");
+			else
+				check = (uint16_t *)(dst + vh->csum_start +
+						vh->csum_offset);
+		}
+
+		while (ft_p != ft_end) {
+			/* Init/update the packet checksum if needed. */
+			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+				if (!dst_slots)
+					csum = nm_csum_raw(src + vh->csum_start,
+								src_len - vh->csum_start, 0);
+				else
+					csum = nm_csum_raw(src, src_len, csum);
+			}
+
+			/* Round to a multiple of 64 */
+			src_len = (src_len + 63) & ~63;
+
+			if (ft_p->ft_flags & NS_INDIRECT) {
+				if (copyin(src, dst, src_len)) {
+					/* Invalid user pointer, pretend len is 0. */
+					dst_len = 0;
+				}
+			} else {
+				memcpy(dst, src, (int)src_len);
+			}
+			slot->len = dst_len;
+
+			dst_slots++;
+
+			/* Next destination slot. */
+			*j = nm_next(*j, lim);
+			slot = &ring->slot[*j];
+			dst = NMB(&dst_na->up, slot);
+
+			/* Next source slot. */
+			ft_p++;
+			src = ft_p->ft_buf;
+			dst_len = src_len = ft_p->ft_len;
+
+		}
+
+		/* Finalize (fold) the checksum if needed. */
+		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+			*check = nm_csum_fold(csum);
+		}
+		ND(3, "using %u dst_slots", dst_slots);
+
+		/* A second pass on the desitations slots to set the slot flags,
+		 * using the right number of destination slots.
+		 */
+		while (j_start != *j) {
+			slot = &ring->slot[j_start];
+			slot->flags = (dst_slots << 8)| NS_MOREFRAG;
+			j_start = nm_next(j_start, lim);
+		}
+		/* Clear NS_MOREFRAG flag on last entry. */
+		slot->flags = (dst_slots << 8);
+	}
+
+	/* Update howmany. */
+	if (unlikely(dst_slots > *howmany)) {
+		dst_slots = *howmany;
+		D("Slot allocation error: Should never happen");
+	}
+	*howmany -= dst_slots;
+}


Property changes on: trunk/sys/dev/netmap/netmap_offloadings.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/netmap/netmap_pipe.c
===================================================================
--- trunk/sys/dev/netmap/netmap_pipe.c	                        (rev 0)
+++ trunk/sys/dev/netmap/netmap_pipe.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,689 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD: stable/10/sys/dev/netmap/netmap_pipe.c 278779 2015-02-14 19:41:26Z luigi $ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>	/* defines used in kernel.h */
+#include <sys/kernel.h>	/* types used in module initialization */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h>	/* bus_dmamap_* */
+#include <sys/refcount.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#else
+
+#error	Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#ifdef WITH_PIPES
+
+#define NM_PIPE_MAXSLOTS	4096
+
+int netmap_default_pipes = 0; /* default number of pipes for each nic */
+SYSCTL_DECL(_dev_netmap);
+SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
+
+/* allocate the pipe array in the parent adapter */
+int
+netmap_pipe_alloc(struct netmap_adapter *na, struct nmreq *nmr)
+{
+	size_t len;
+	int mode = nmr->nr_flags & NR_REG_MASK;
+	u_int npipes;
+
+	if (mode == NR_REG_PIPE_MASTER || mode == NR_REG_PIPE_SLAVE) {
+		/* this is for our parent, not for us */
+		return 0;
+	}
+
+	/* TODO: we can resize the array if the new
+         * request can accomodate the already existing pipes
+         */
+	if (na->na_pipes) {
+		nmr->nr_arg1 = na->na_max_pipes;
+		return 0;
+	}
+
+	npipes = nmr->nr_arg1;
+	if (npipes == 0)
+		npipes = netmap_default_pipes;
+	nm_bound_var(&npipes, 0, 0, NM_MAXPIPES, NULL);
+
+	if (npipes == 0) {
+		/* really zero, nothing to alloc */
+		goto out;
+	}
+
+	len = sizeof(struct netmap_pipe_adapter *) * npipes;
+	na->na_pipes = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (na->na_pipes == NULL)
+		return ENOMEM;
+
+	na->na_max_pipes = npipes;
+	na->na_next_pipe = 0;
+
+out:
+	nmr->nr_arg1 = npipes;
+
+	return 0;
+}
+
+/* deallocate the parent array in the parent adapter */
+void
+netmap_pipe_dealloc(struct netmap_adapter *na)
+{
+	if (na->na_pipes) {
+		ND("freeing pipes for %s", na->name);
+		free(na->na_pipes, M_DEVBUF);
+		na->na_pipes = NULL;
+		na->na_max_pipes = 0;
+		na->na_next_pipe = 0;
+	}
+}
+
+/* find a pipe endpoint with the given id among the parent's pipes */
+static struct netmap_pipe_adapter *
+netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id)
+{
+	int i;
+	struct netmap_pipe_adapter *na;
+
+	for (i = 0; i < parent->na_next_pipe; i++) {
+		na = parent->na_pipes[i];
+		if (na->id == pipe_id) {
+			return na;
+		}
+	}
+	return NULL;
+}
+
+/* add a new pipe endpoint to the parent array */
+static int
+netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
+{
+	if (parent->na_next_pipe >= parent->na_max_pipes) {
+		D("%s: no space left for pipes", parent->name);
+		return ENOMEM;
+	}
+
+	parent->na_pipes[parent->na_next_pipe] = na;
+	na->parent_slot = parent->na_next_pipe;
+	parent->na_next_pipe++;
+	return 0;
+}
+
+/* remove the given pipe endpoint from the parent array */
+static void
+netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
+{
+	u_int n;
+	n = --parent->na_next_pipe;
+	if (n != na->parent_slot) {
+		parent->na_pipes[na->parent_slot] =
+			parent->na_pipes[n];
+	}
+	parent->na_pipes[n] = NULL;
+}
+
+static int
+netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
+{
+        struct netmap_kring *rxkring = txkring->pipe;
+        u_int limit; /* slots to transfer */
+        u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
+                lim_rx = rxkring->nkr_num_slots - 1;
+        int m, busy;
+
+        ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
+        ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail,
+                txkring->rcur, txkring->rhead, txkring->rtail);
+
+        j = rxkring->nr_hwtail; /* RX */
+        k = txkring->nr_hwcur;  /* TX */
+        m = txkring->rhead - txkring->nr_hwcur; /* new slots */
+        if (m < 0)
+                m += txkring->nkr_num_slots;
+        limit = m;
+        m = lim_rx; /* max avail space on destination */
+        busy = j - rxkring->nr_hwcur; /* busy slots */
+	if (busy < 0)
+		busy += rxkring->nkr_num_slots;
+	m -= busy; /* subtract busy slots */
+        ND(2, "m %d limit %d", m, limit);
+        if (m < limit)
+                limit = m;
+
+	if (limit == 0) {
+		/* either the rxring is full, or nothing to send */
+		nm_txsync_finalize(txkring); /* actually useless */
+		return 0;
+	}
+
+        while (limit-- > 0) {
+                struct netmap_slot *rs = &rxkring->save_ring->slot[j];
+                struct netmap_slot *ts = &txkring->ring->slot[k];
+                struct netmap_slot tmp;
+
+                /* swap the slots */
+                tmp = *rs;
+                *rs = *ts;
+                *ts = tmp;
+
+                /* no need to report the buffer change */
+
+                j = nm_next(j, lim_rx);
+                k = nm_next(k, lim_tx);
+        }
+
+        mb(); /* make sure the slots are updated before publishing them */
+        rxkring->nr_hwtail = j;
+        txkring->nr_hwcur = k;
+        txkring->nr_hwtail = nm_prev(k, lim_tx);
+
+        nm_txsync_finalize(txkring);
+        ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail,
+                txkring->rcur, txkring->rhead, txkring->rtail, j);
+
+        mb(); /* make sure rxkring->nr_hwtail is updated before notifying */
+        rxkring->na->nm_notify(rxkring->na, rxkring->ring_id, NR_RX, 0);
+
+	return 0;
+}
+
+static int
+netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
+{
+        struct netmap_kring *txkring = rxkring->pipe;
+	uint32_t oldhwcur = rxkring->nr_hwcur;
+
+        ND("%s %x <- %s", rxkring->name, flags, txkring->name);
+        rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */
+        ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail,
+                rxkring->rcur, rxkring->rhead, rxkring->rtail);
+        mb(); /* paired with the first mb() in txsync */
+        nm_rxsync_finalize(rxkring);
+
+	if (oldhwcur != rxkring->nr_hwcur) {
+		/* we have released some slots, notify the other end */
+		mb(); /* make sure nr_hwcur is updated before notifying */
+		txkring->na->nm_notify(txkring->na, txkring->ring_id, NR_TX, 0);
+	}
+        return 0;
+}
+
+/* Pipe endpoints are created and destroyed together, so that endopoints do not
+ * have to check for the existence of their peer at each ?xsync.
+ *
+ * To play well with the existing netmap infrastructure (refcounts etc.), we
+ * adopt the following strategy:
+ *
+ * 1) The first endpoint that is created also creates the other endpoint and
+ * grabs a reference to it.
+ *
+ *    state A)  user1 --> endpoint1 --> endpoint2
+ *
+ * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
+ * its reference to the user:
+ *
+ *    state B)  user1 --> endpoint1     endpoint2 <--- user2
+ *
+ * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
+ * callback endpoint2 notes that endpoint1 is still active and adds a reference
+ * from endpoint1 to itself. When user2 then releases her own reference,
+ * endpoint2 is not destroyed and we are back to state A. A symmetrical state
+ * would be reached if endpoint1 were released instead.
+ *
+ * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
+ * it owns a reference to endpoint2 and releases it.
+ *
+ * Something similar goes on for the creation and destruction of the krings.
+ */
+
+
+/* netmap_pipe_krings_delete.
+ *
+ * There are two cases:
+ *
+ * 1) state is
+ *
+ *        usr1 --> e1 --> e2
+ *
+ *    and we are e1. We have to create both sets
+ *    of krings.
+ *
+ * 2) state is
+ *
+ *        usr1 --> e1 --> e2
+ *
+ *    and we are e2. e1 is certainly registered and our
+ *    krings already exist, but they may be hidden.
+ */
+static int
+netmap_pipe_krings_create(struct netmap_adapter *na)
+{
+	struct netmap_pipe_adapter *pna =
+		(struct netmap_pipe_adapter *)na;
+	struct netmap_adapter *ona = &pna->peer->up;
+	int error = 0;
+	if (pna->peer_ref) {
+		int i;
+
+		/* case 1) above */
+		D("%p: case 1, create everything", na);
+		error = netmap_krings_create(na, 0);
+		if (error)
+			goto err;
+
+		/* we also create all the rings, since we need to
+                 * update the save_ring pointers.
+                 * netmap_mem_rings_create (called by our caller)
+                 * will not create the rings again
+                 */
+
+		error = netmap_mem_rings_create(na);
+		if (error)
+			goto del_krings1;
+
+		/* update our hidden ring pointers */
+		for (i = 0; i < na->num_tx_rings + 1; i++)
+			na->tx_rings[i].save_ring = na->tx_rings[i].ring;
+		for (i = 0; i < na->num_rx_rings + 1; i++)
+			na->rx_rings[i].save_ring = na->rx_rings[i].ring;
+
+		/* now, create krings and rings of the other end */
+		error = netmap_krings_create(ona, 0);
+		if (error)
+			goto del_rings1;
+
+		error = netmap_mem_rings_create(ona);
+		if (error)
+			goto del_krings2;
+
+		for (i = 0; i < ona->num_tx_rings + 1; i++)
+			ona->tx_rings[i].save_ring = ona->tx_rings[i].ring;
+		for (i = 0; i < ona->num_rx_rings + 1; i++)
+			ona->rx_rings[i].save_ring = ona->rx_rings[i].ring;
+
+		/* cross link the krings */
+		for (i = 0; i < na->num_tx_rings; i++) {
+			na->tx_rings[i].pipe = pna->peer->up.rx_rings + i;
+			na->rx_rings[i].pipe = pna->peer->up.tx_rings + i;
+			pna->peer->up.tx_rings[i].pipe = na->rx_rings + i;
+			pna->peer->up.rx_rings[i].pipe = na->tx_rings + i;
+		}
+	} else {
+		int i;
+		/* case 2) above */
+		/* recover the hidden rings */
+		ND("%p: case 2, hidden rings", na);
+		for (i = 0; i < na->num_tx_rings + 1; i++)
+			na->tx_rings[i].ring = na->tx_rings[i].save_ring;
+		for (i = 0; i < na->num_rx_rings + 1; i++)
+			na->rx_rings[i].ring = na->rx_rings[i].save_ring;
+	}
+	return 0;
+
+del_krings2:
+	netmap_krings_delete(ona);
+del_rings1:
+	netmap_mem_rings_delete(na);
+del_krings1:
+	netmap_krings_delete(na);
+err:
+	return error;
+}
+
+/* netmap_pipe_reg.
+ *
+ * There are two cases on registration (onoff==1)
+ *
+ * 1.a) state is
+ *
+ *        usr1 --> e1 --> e2
+ *
+ *      and we are e1. Nothing special to do.
+ *
+ * 1.b) state is
+ *
+ *        usr1 --> e1 --> e2 <-- usr2
+ *
+ *      and we are e2. Drop the ref e1 is holding.
+ *
+ *  There are two additional cases on unregister (onoff==0)
+ *
+ *  2.a) state is
+ *
+ *         usr1 --> e1 --> e2
+ *
+ *       and we are e1. Nothing special to do, e2 will
+ *       be cleaned up by the destructor of e1.
+ *
+ *  2.b) state is
+ *
+ *         usr1 --> e1     e2 <-- usr2
+ *
+ *       and we are either e1 or e2. Add a ref from the
+ *       other end and hide our rings.
+ */
+static int
+netmap_pipe_reg(struct netmap_adapter *na, int onoff)
+{
+	struct netmap_pipe_adapter *pna =
+		(struct netmap_pipe_adapter *)na;
+	ND("%p: onoff %d", na, onoff);
+	if (onoff) {
+		na->na_flags |= NAF_NETMAP_ON;
+	} else {
+		na->na_flags &= ~NAF_NETMAP_ON;
+	}
+	if (pna->peer_ref) {
+		ND("%p: case 1.a or 2.a, nothing to do", na);
+		return 0;
+	}
+	if (onoff) {
+		ND("%p: case 1.b, drop peer", na);
+		pna->peer->peer_ref = 0;
+		netmap_adapter_put(na);
+	} else {
+		int i;
+		ND("%p: case 2.b, grab peer", na);
+		netmap_adapter_get(na);
+		pna->peer->peer_ref = 1;
+		/* hide our rings from netmap_mem_rings_delete */
+		for (i = 0; i < na->num_tx_rings + 1; i++) {
+			na->tx_rings[i].ring = NULL;
+		}
+		for (i = 0; i < na->num_rx_rings + 1; i++) {
+			na->rx_rings[i].ring = NULL;
+		}
+	}
+	return 0;
+}
+
+/* netmap_pipe_krings_delete.
+ *
+ * There are two cases:
+ *
+ * 1) state is
+ *
+ *                usr1 --> e1 --> e2
+ *
+ *    and we are e1 (e2 is not registered, so krings_delete cannot be
+ *    called on it);
+ *
+ * 2) state is
+ *
+ *                usr1 --> e1     e2 <-- usr2
+ *
+ *    and we are either e1 or e2.
+ *
+ * In the former case we have to also delete the krings of e2;
+ * in the latter case we do nothing (note that our krings
+ * have already been hidden in the unregister callback).
+ */
+static void
+netmap_pipe_krings_delete(struct netmap_adapter *na)
+{
+	struct netmap_pipe_adapter *pna =
+		(struct netmap_pipe_adapter *)na;
+	struct netmap_adapter *ona; /* na of the other end */
+	int i;
+
+	if (!pna->peer_ref) {
+		ND("%p: case 2, kept alive by peer",  na);
+		return;
+	}
+	/* case 1) above */
+	ND("%p: case 1, deleting everyhing", na);
+	netmap_krings_delete(na); /* also zeroes tx_rings etc. */
+	/* restore the ring to be deleted on the peer */
+	ona = &pna->peer->up;
+	if (ona->tx_rings == NULL) {
+		/* already deleted, we must be on an
+                 * cleanup-after-error path */
+		return;
+	}
+	for (i = 0; i < ona->num_tx_rings + 1; i++)
+		ona->tx_rings[i].ring = ona->tx_rings[i].save_ring;
+	for (i = 0; i < ona->num_rx_rings + 1; i++)
+		ona->rx_rings[i].ring = ona->rx_rings[i].save_ring;
+	netmap_mem_rings_delete(ona);
+	netmap_krings_delete(ona);
+}
+
+
+static void
+netmap_pipe_dtor(struct netmap_adapter *na)
+{
+	struct netmap_pipe_adapter *pna =
+		(struct netmap_pipe_adapter *)na;
+	ND("%p", na);
+	if (pna->peer_ref) {
+		ND("%p: clean up peer", na);
+		pna->peer_ref = 0;
+		netmap_adapter_put(&pna->peer->up);
+	}
+	if (pna->role == NR_REG_PIPE_MASTER)
+		netmap_pipe_remove(pna->parent, pna);
+	netmap_adapter_put(pna->parent);
+	pna->parent = NULL;
+}
+
+int
+netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
+{
+	struct nmreq pnmr;
+	struct netmap_adapter *pna; /* parent adapter */
+	struct netmap_pipe_adapter *mna, *sna, *req;
+	u_int pipe_id;
+	int role = nmr->nr_flags & NR_REG_MASK;
+	int error;
+
+	ND("flags %x", nmr->nr_flags);
+
+	if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) {
+		ND("not a pipe");
+		return 0;
+	}
+	role = nmr->nr_flags & NR_REG_MASK;
+
+	/* first, try to find the parent adapter */
+	bzero(&pnmr, sizeof(pnmr));
+	memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
+	/* pass to parent the requested number of pipes */
+	pnmr.nr_arg1 = nmr->nr_arg1;
+	error = netmap_get_na(&pnmr, &pna, create);
+	if (error) {
+		ND("parent lookup failed: %d", error);
+		return error;
+	}
+	ND("found parent: %s", na->name);
+
+	if (NETMAP_OWNED_BY_KERN(pna)) {
+		ND("parent busy");
+		error = EBUSY;
+		goto put_out;
+	}
+
+	/* next, lookup the pipe id in the parent list */
+	req = NULL;
+	pipe_id = nmr->nr_ringid & NETMAP_RING_MASK;
+	mna = netmap_pipe_find(pna, pipe_id);
+	if (mna) {
+		if (mna->role == role) {
+			ND("found %d directly at %d", pipe_id, mna->parent_slot);
+			req = mna;
+		} else {
+			ND("found %d indirectly at %d", pipe_id, mna->parent_slot);
+			req = mna->peer;
+		}
+		/* the pipe we have found already holds a ref to the parent,
+                 * so we need to drop the one we got from netmap_get_na()
+                 */
+		netmap_adapter_put(pna);
+		goto found;
+	}
+	ND("pipe %d not found, create %d", pipe_id, create);
+	if (!create) {
+		error = ENODEV;
+		goto put_out;
+	}
+	/* we create both master and slave.
+         * The endpoint we were asked for holds a reference to
+         * the other one.
+         */
+	mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (mna == NULL) {
+		error = ENOMEM;
+		goto put_out;
+	}
+	snprintf(mna->up.name, sizeof(mna->up.name), "%s{%d", pna->name, pipe_id);
+
+	mna->id = pipe_id;
+	mna->role = NR_REG_PIPE_MASTER;
+	mna->parent = pna;
+
+	mna->up.nm_txsync = netmap_pipe_txsync;
+	mna->up.nm_rxsync = netmap_pipe_rxsync;
+	mna->up.nm_register = netmap_pipe_reg;
+	mna->up.nm_dtor = netmap_pipe_dtor;
+	mna->up.nm_krings_create = netmap_pipe_krings_create;
+	mna->up.nm_krings_delete = netmap_pipe_krings_delete;
+	mna->up.nm_mem = pna->nm_mem;
+	mna->up.na_lut = pna->na_lut;
+	mna->up.na_lut_objtotal = pna->na_lut_objtotal;
+	mna->up.na_lut_objsize = pna->na_lut_objsize;
+
+	mna->up.num_tx_rings = 1;
+	mna->up.num_rx_rings = 1;
+	mna->up.num_tx_desc = nmr->nr_tx_slots;
+	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
+			1, NM_PIPE_MAXSLOTS, NULL);
+	mna->up.num_rx_desc = nmr->nr_rx_slots;
+	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
+			1, NM_PIPE_MAXSLOTS, NULL);
+	error = netmap_attach_common(&mna->up);
+	if (error)
+		goto free_mna;
+	/* register the master with the parent */
+	error = netmap_pipe_add(pna, mna);
+	if (error)
+		goto free_mna;
+
+	/* create the slave */
+	sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (sna == NULL) {
+		error = ENOMEM;
+		goto free_mna;
+	}
+	/* most fields are the same, copy from master and then fix */
+	*sna = *mna;
+	snprintf(sna->up.name, sizeof(sna->up.name), "%s}%d", pna->name, pipe_id);
+	sna->role = NR_REG_PIPE_SLAVE;
+	error = netmap_attach_common(&sna->up);
+	if (error)
+		goto free_sna;
+
+	/* join the two endpoints */
+	mna->peer = sna;
+	sna->peer = mna;
+
+	/* we already have a reference to the parent, but we
+         * need another one for the other endpoint we created
+         */
+	netmap_adapter_get(pna);
+
+	if (role == NR_REG_PIPE_MASTER) {
+		req = mna;
+		mna->peer_ref = 1;
+		netmap_adapter_get(&sna->up);
+	} else {
+		req = sna;
+		sna->peer_ref = 1;
+		netmap_adapter_get(&mna->up);
+	}
+	ND("created master %p and slave %p", mna, sna);
+found:
+
+	ND("pipe %d %s at %p", pipe_id,
+		(req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req);
+	*na = &req->up;
+	netmap_adapter_get(*na);
+
+	/* write the configuration back */
+	nmr->nr_tx_rings = req->up.num_tx_rings;
+	nmr->nr_rx_rings = req->up.num_rx_rings;
+	nmr->nr_tx_slots = req->up.num_tx_desc;
+	nmr->nr_rx_slots = req->up.num_rx_desc;
+
+	/* keep the reference to the parent.
+         * It will be released by the req destructor
+         */
+
+	return 0;
+
+free_sna:
+	free(sna, M_DEVBUF);
+free_mna:
+	free(mna, M_DEVBUF);
+put_out:
+	netmap_adapter_put(pna);
+	return error;
+}
+
+
+#endif /* WITH_PIPES */


Property changes on: trunk/sys/dev/netmap/netmap_pipe.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/dev/netmap/netmap_vale.c
===================================================================
--- trunk/sys/dev/netmap/netmap_vale.c	                        (rev 0)
+++ trunk/sys/dev/netmap/netmap_vale.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -0,0 +1,2438 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/*
+ * This module implements the VALE switch for netmap
+
+--- VALE SWITCH ---
+
+NMG_LOCK() serializes all modifications to switches and ports.
+A switch cannot be deleted until all ports are gone.
+
+For each switch, an SX lock (RWlock on linux) protects
+deletion of ports. When configuring or deleting a new port, the
+lock is acquired in exclusive mode (after holding NMG_LOCK).
+When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
+The lock is held throughout the entire forwarding cycle,
+during which the thread may incur in a page fault.
+Hence it is important that sleepable shared locks are used.
+
+On the rx ring, the per-port lock is grabbed initially to reserve
+a number of slot in the ring, then the lock is released,
+packets are copied from source to destination, and then
+the lock is acquired again and the receive ring is updated.
+(A similar thing is done on the tx ring for NIC and host stack
+ports attached to the switch)
+
+ */
+
+/*
+ * OS-specific code that is used only within this file.
+ * Other OS-specific code that must be accessed by drivers
+ * is present in netmap_kern.h
+ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD: stable/10/sys/dev/netmap/netmap_vale.c 270252 2014-08-20 23:34:36Z luigi $");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>	/* defines used in kernel.h */
+#include <sys/kernel.h>	/* types used in module initialization */
+#include <sys/conf.h>	/* cdevsw struct, UID, GID */
+#include <sys/sockio.h>
+#include <sys/socketvar.h>	/* struct socket */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/bpf.h>		/* BIOCIMMEDIATE */
+#include <machine/bus.h>	/* bus_dmamap_* */
+#include <sys/endian.h>
+#include <sys/refcount.h>
+
+
+#define BDG_RWLOCK_T		struct rwlock // struct rwlock
+
+#define	BDG_RWINIT(b)		\
+	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
+#define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
+#define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
+#define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
+#define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
+#define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
+#define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#else
+
+#error	Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#ifdef WITH_VALE
+
+/*
+ * system parameters (most of them in netmap_kern.h)
+ * NM_NAME	prefix for switch port names, default "vale"
+ * NM_BDG_MAXPORTS	number of ports
+ * NM_BRIDGES	max number of switches in the system.
+ *	XXX should become a sysctl or tunable
+ *
+ * Switch ports are named valeX:Y where X is the switch name and Y
+ * is the port. If Y matches a physical interface name, the port is
+ * connected to a physical device.
+ *
+ * Unlike physical interfaces, switch ports use their own memory region
+ * for rings and buffers.
+ * The virtual interfaces use per-queue lock instead of core lock.
+ * In the tx loop, we aggregate traffic in batches to make all operations
+ * faster. The batch size is bridge_batch.
+ */
+#define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
+#define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
+#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
+#define NM_BDG_HASH		1024	/* forwarding table entries */
+#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
+#define NM_MULTISEG		64	/* max size of a chain of bufs */
+/* actual size of the tables */
+#define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
+/* NM_FT_NULL terminates a list of slots in the ft */
+#define NM_FT_NULL		NM_BDG_BATCH_MAX
+#define	NM_BRIDGES		8	/* number of bridges */
+
+
+/*
+ * bridge_batch is set via sysctl to the max batch size to be
+ * used in the bridge. The actual value may be larger as the
+ * last packet in the block may overflow the size.
+ */
+int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
+SYSCTL_DECL(_dev_netmap);
+SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
+
+
+static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
+static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
+static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
+
+/*
+ * For each output interface, nm_bdg_q is used to construct a list.
+ * bq_len is the number of output buffers (we can have coalescing
+ * during the copy).
+ */
+struct nm_bdg_q {
+	uint16_t bq_head;
+	uint16_t bq_tail;
+	uint32_t bq_len;	/* number of buffers */
+};
+
+/* XXX revise this */
+struct nm_hash_ent {
+	uint64_t	mac;	/* the top 2 bytes are the epoch */
+	uint64_t	ports;
+};
+
+/*
+ * nm_bridge is a descriptor for a VALE switch.
+ * Interfaces for a bridge are all in bdg_ports[].
+ * The array has fixed size, an empty entry does not terminate
+ * the search, but lookups only occur on attach/detach so we
+ * don't mind if they are slow.
+ *
+ * The bridge is non blocking on the transmit ports: excess
+ * packets are dropped if there is no room on the output port.
+ *
+ * bdg_lock protects accesses to the bdg_ports array.
+ * This is a rw lock (or equivalent).
+ */
+struct nm_bridge {
+	/* XXX what is the proper alignment/layout ? */
+	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
+	int		bdg_namelen;
+	uint32_t	bdg_active_ports; /* 0 means free */
+	char		bdg_basename[IFNAMSIZ];
+
+	/* Indexes of active ports (up to active_ports)
+	 * and all other remaining ports.
+	 */
+	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
+
+	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
+
+
+	/*
+	 * The function to decide the destination port.
+	 * It returns either of an index of the destination port,
+	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
+	 * forward this packet.  ring_nr is the source ring index, and the
+	 * function may overwrite this value to forward this packet to a
+	 * different ring index.
+	 * This function must be set by netmap_bdgctl().
+	 */
+	struct netmap_bdg_ops bdg_ops;
+
+	/* the forwarding table, MAC+ports.
+	 * XXX should be changed to an argument to be passed to
+	 * the lookup function, and allocated on attach
+	 */
+	struct nm_hash_ent ht[NM_BDG_HASH];
+};
+
+const char*
+netmap_bdg_name(struct netmap_vp_adapter *vp)
+{
+	struct nm_bridge *b = vp->na_bdg;
+	if (b == NULL)
+		return NULL;
+	return b->bdg_basename;
+}
+
+
+/*
+ * XXX in principle nm_bridges could be created dynamically
+ * Right now we have a static array and deletions are protected
+ * by an exclusive lock.
+ */
+struct nm_bridge nm_bridges[NM_BRIDGES];
+
+
+/*
+ * this is a slightly optimized copy routine which rounds
+ * to multiple of 64 bytes and is often faster than dealing
+ * with other odd sizes. We assume there is enough room
+ * in the source and destination buffers.
+ *
+ * XXX only for multiples of 64 bytes, non overlapped.
+ */
+static inline void
+pkt_copy(void *_src, void *_dst, int l)
+{
+        uint64_t *src = _src;
+        uint64_t *dst = _dst;
+        if (unlikely(l >= 1024)) {
+                memcpy(dst, src, l);
+                return;
+        }
+        for (; likely(l > 0); l-=64) {
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+        }
+}
+
+
+/*
+ * locate a bridge among the existing ones.
+ * MUST BE CALLED WITH NMG_LOCK()
+ *
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+static struct nm_bridge *
+nm_find_bridge(const char *name, int create)
+{
+	int i, l, namelen;
+	struct nm_bridge *b = NULL;
+
+	NMG_LOCK_ASSERT();
+
+	namelen = strlen(NM_NAME);	/* base length */
+	l = name ? strlen(name) : 0;		/* actual length */
+	if (l < namelen) {
+		D("invalid bridge name %s", name ? name : NULL);
+		return NULL;
+	}
+	for (i = namelen + 1; i < l; i++) {
+		if (name[i] == ':') {
+			namelen = i;
+			break;
+		}
+	}
+	if (namelen >= IFNAMSIZ)
+		namelen = IFNAMSIZ;
+	ND("--- prefix is '%.*s' ---", namelen, name);
+
+	/* lookup the name, remember empty slot if there is one */
+	for (i = 0; i < NM_BRIDGES; i++) {
+		struct nm_bridge *x = nm_bridges + i;
+
+		if (x->bdg_active_ports == 0) {
+			if (create && b == NULL)
+				b = x;	/* record empty slot */
+		} else if (x->bdg_namelen != namelen) {
+			continue;
+		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
+			ND("found '%.*s' at %d", namelen, name, i);
+			b = x;
+			break;
+		}
+	}
+	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
+		/* initialize the bridge */
+		strncpy(b->bdg_basename, name, namelen);
+		ND("create new bridge %s with ports %d", b->bdg_basename,
+			b->bdg_active_ports);
+		b->bdg_namelen = namelen;
+		b->bdg_active_ports = 0;
+		for (i = 0; i < NM_BDG_MAXPORTS; i++)
+			b->bdg_port_index[i] = i;
+		/* set the default function */
+		b->bdg_ops.lookup = netmap_bdg_learning;
+		/* reset the MAC address table */
+		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+	}
+	return b;
+}
+
+
+/*
+ * Free the forwarding tables for rings attached to switch ports.
+ */
+static void
+nm_free_bdgfwd(struct netmap_adapter *na)
+{
+	int nrings, i;
+	struct netmap_kring *kring;
+
+	NMG_LOCK_ASSERT();
+	nrings = na->num_tx_rings;
+	kring = na->tx_rings;
+	for (i = 0; i < nrings; i++) {
+		if (kring[i].nkr_ft) {
+			free(kring[i].nkr_ft, M_DEVBUF);
+			kring[i].nkr_ft = NULL; /* protect from freeing twice */
+		}
+	}
+}
+
+
+/*
+ * Allocate the forwarding tables for the rings attached to the bridge ports.
+ */
+static int
+nm_alloc_bdgfwd(struct netmap_adapter *na)
+{
+	int nrings, l, i, num_dstq;
+	struct netmap_kring *kring;
+
+	NMG_LOCK_ASSERT();
+	/* all port:rings + broadcast */
+	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
+	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
+	l += sizeof(struct nm_bdg_q) * num_dstq;
+	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
+
+	nrings = netmap_real_tx_rings(na);
+	kring = na->tx_rings;
+	for (i = 0; i < nrings; i++) {
+		struct nm_bdg_fwd *ft;
+		struct nm_bdg_q *dstq;
+		int j;
+
+		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
+		if (!ft) {
+			nm_free_bdgfwd(na);
+			return ENOMEM;
+		}
+		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
+		for (j = 0; j < num_dstq; j++) {
+			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
+			dstq[j].bq_len = 0;
+		}
+		kring[i].nkr_ft = ft;
+	}
+	return 0;
+}
+
+
+/* remove from bridge b the ports in slots hw and sw
+ * (sw can be -1 if not needed)
+ */
+static void
+netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
+{
+	int s_hw = hw, s_sw = sw;
+	int i, lim =b->bdg_active_ports;
+	uint8_t tmp[NM_BDG_MAXPORTS];
+
+	/*
+	New algorithm:
+	make a copy of bdg_port_index;
+	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
+	in the array of bdg_port_index, replacing them with
+	entries from the bottom of the array;
+	decrement bdg_active_ports;
+	acquire BDG_WLOCK() and copy back the array.
+	 */
+
+	if (netmap_verbose)
+		D("detach %d and %d (lim %d)", hw, sw, lim);
+	/* make a copy of the list of active ports, update it,
+	 * and then copy back within BDG_WLOCK().
+	 */
+	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
+	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
+		if (hw >= 0 && tmp[i] == hw) {
+			ND("detach hw %d at %d", hw, i);
+			lim--; /* point to last active port */
+			tmp[i] = tmp[lim]; /* swap with i */
+			tmp[lim] = hw;	/* now this is inactive */
+			hw = -1;
+		} else if (sw >= 0 && tmp[i] == sw) {
+			ND("detach sw %d at %d", sw, i);
+			lim--;
+			tmp[i] = tmp[lim];
+			tmp[lim] = sw;
+			sw = -1;
+		} else {
+			i++;
+		}
+	}
+	if (hw >= 0 || sw >= 0) {
+		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+	}
+
+	BDG_WLOCK(b);
+	if (b->bdg_ops.dtor)
+		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
+	b->bdg_ports[s_hw] = NULL;
+	if (s_sw >= 0) {
+		b->bdg_ports[s_sw] = NULL;
+	}
+	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
+	b->bdg_active_ports = lim;
+	BDG_WUNLOCK(b);
+
+	ND("now %d active ports", lim);
+	if (lim == 0) {
+		ND("marking bridge %s as free", b->bdg_basename);
+		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
+	}
+}
+
+/* nm_bdg_ctl callback for VALE ports */
+static int
+netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
+{
+	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
+	struct nm_bridge *b = vpna->na_bdg;
+
+	if (attach)
+		return 0; /* nothing to do */
+	if (b) {
+		netmap_set_all_rings(na, 0 /* disable */);
+		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
+		vpna->na_bdg = NULL;
+		netmap_set_all_rings(na, 1 /* enable */);
+	}
+	/* I have took reference just for attach */
+	netmap_adapter_put(na);
+	return 0;
+}
+
+/* nm_dtor callback for ephemeral VALE ports */
+static void
+netmap_vp_dtor(struct netmap_adapter *na)
+{
+	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
+	struct nm_bridge *b = vpna->na_bdg;
+
+	ND("%s has %d references", na->name, na->na_refcount);
+
+	if (b) {
+		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
+	}
+}
+
+/* nm_dtor callback for persistent VALE ports */
+static void
+netmap_persist_vp_dtor(struct netmap_adapter *na)
+{
+	struct ifnet *ifp = na->ifp;
+
+	netmap_vp_dtor(na);
+	na->ifp = NULL;
+	nm_vi_detach(ifp);
+}
+
+/* remove a persistent VALE port from the system */
+static int
+nm_vi_destroy(const char *name)
+{
+	struct ifnet *ifp;
+	int error;
+
+	ifp = ifunit_ref(name);
+	if (!ifp)
+		return ENXIO;
+	NMG_LOCK();
+	/* make sure this is actually a VALE port */
+	if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
+		error = EINVAL;
+		goto err;
+	}
+
+	if (NA(ifp)->na_refcount > 1) {
+		error = EBUSY;
+		goto err;
+	}
+	NMG_UNLOCK();
+
+	D("destroying a persistent vale interface %s", ifp->if_xname);
+	/* Linux requires all the references are released
+	 * before unregister
+	 */
+	if_rele(ifp);
+	netmap_detach(ifp);
+	return 0;
+
+err:
+	NMG_UNLOCK();
+	if_rele(ifp);
+	return error;
+}
+
+/*
+ * Create a virtual interface registered to the system.
+ * The interface will be attached to a bridge later.
+ */
+static int
+nm_vi_create(struct nmreq *nmr)
+{
+	struct ifnet *ifp;
+	struct netmap_vp_adapter *vpna;
+	int error;
+
+	/* don't include VALE prefix */
+	if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
+		return EINVAL;
+	ifp = ifunit_ref(nmr->nr_name);
+	if (ifp) { /* already exist, cannot create new one */
+		if_rele(ifp);
+		return EEXIST;
+	}
+	error = nm_vi_persist(nmr->nr_name, &ifp);
+	if (error)
+		return error;
+
+	NMG_LOCK();
+	/* netmap_vp_create creates a struct netmap_vp_adapter */
+	error = netmap_vp_create(nmr, ifp, &vpna);
+	if (error) {
+		D("error %d", error);
+		nm_vi_detach(ifp);
+		return error;
+	}
+	/* persist-specific routines */
+	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
+	vpna->up.nm_dtor = netmap_persist_vp_dtor;
+	netmap_adapter_get(&vpna->up);
+	NMG_UNLOCK();
+	D("created %s", ifp->if_xname);
+	return 0;
+}
+
+/* Try to get a reference to a netmap adapter attached to a VALE switch.
+ * If the adapter is found (or is created), this function returns 0, a
+ * non NULL pointer is returned into *na, and the caller holds a
+ * reference to the adapter.
+ * If an adapter is not found, then no reference is grabbed and the
+ * function returns an error code, or 0 if there is just a VALE prefix
+ * mismatch. Therefore the caller holds a reference when
+ * (*na != NULL && return == 0).
+ */
+int
+netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
+{
+	char *nr_name = nmr->nr_name;
+	const char *ifname;
+	struct ifnet *ifp;
+	int error = 0;
+	struct netmap_vp_adapter *vpna, *hostna = NULL;
+	struct nm_bridge *b;
+	int i, j, cand = -1, cand2 = -1;
+	int needed;
+
+	*na = NULL;     /* default return value */
+
+	/* first try to see if this is a bridge port. */
+	NMG_LOCK_ASSERT();
+	if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
+		return 0;  /* no error, but no VALE prefix */
+	}
+
+	b = nm_find_bridge(nr_name, create);
+	if (b == NULL) {
+		D("no bridges available for '%s'", nr_name);
+		return (create ? ENOMEM : ENXIO);
+	}
+	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
+		panic("x");
+
+	/* Now we are sure that name starts with the bridge's name,
+	 * lookup the port in the bridge. We need to scan the entire
+	 * list. It is not important to hold a WLOCK on the bridge
+	 * during the search because NMG_LOCK already guarantees
+	 * that there are no other possible writers.
+	 */
+
+	/* lookup in the local list of ports */
+	for (j = 0; j < b->bdg_active_ports; j++) {
+		i = b->bdg_port_index[j];
+		vpna = b->bdg_ports[i];
+		// KASSERT(na != NULL);
+		D("checking %s", vpna->up.name);
+		if (!strcmp(vpna->up.name, nr_name)) {
+			netmap_adapter_get(&vpna->up);
+			ND("found existing if %s refs %d", nr_name)
+			*na = &vpna->up;
+			return 0;
+		}
+	}
+	/* not found, should we create it? */
+	if (!create)
+		return ENXIO;
+	/* yes we should, see if we have space to attach entries */
+	needed = 2; /* in some cases we only need 1 */
+	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
+		D("bridge full %d, cannot create new port", b->bdg_active_ports);
+		return ENOMEM;
+	}
+	/* record the next two ports available, but do not allocate yet */
+	cand = b->bdg_port_index[b->bdg_active_ports];
+	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
+	ND("+++ bridge %s port %s used %d avail %d %d",
+		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
+
+	/*
+	 * try see if there is a matching NIC with this name
+	 * (after the bridge's name)
+	 */
+	ifname = nr_name + b->bdg_namelen + 1;
+	ifp = ifunit_ref(ifname);
+	if (!ifp) {
+		/* Create an ephemeral virtual port
+		 * This block contains all the ephemeral-specific logics
+		 */
+		if (nmr->nr_cmd) {
+			/* nr_cmd must be 0 for a virtual port */
+			return EINVAL;
+		}
+
+		/* bdg_netmap_attach creates a struct netmap_adapter */
+		error = netmap_vp_create(nmr, NULL, &vpna);
+		if (error) {
+			D("error %d", error);
+			free(ifp, M_DEVBUF);
+			return error;
+		}
+		/* shortcut - we can skip get_hw_na(),
+		 * ownership check and nm_bdg_attach()
+		 */
+	} else {
+		struct netmap_adapter *hw;
+
+		error = netmap_get_hw_na(ifp, &hw);
+		if (error || hw == NULL)
+			goto out;
+
+		/* host adapter might not be created */
+		error = hw->nm_bdg_attach(nr_name, hw);
+		if (error)
+			goto out;
+		vpna = hw->na_vp;
+		hostna = hw->na_hostvp;
+		if_rele(ifp);
+		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
+			hostna = NULL;
+	}
+
+	BDG_WLOCK(b);
+	vpna->bdg_port = cand;
+	ND("NIC  %p to bridge port %d", vpna, cand);
+	/* bind the port to the bridge (virtual ports are not active) */
+	b->bdg_ports[cand] = vpna;
+	vpna->na_bdg = b;
+	b->bdg_active_ports++;
+	if (hostna != NULL) {
+		/* also bind the host stack to the bridge */
+		b->bdg_ports[cand2] = hostna;
+		hostna->bdg_port = cand2;
+		hostna->na_bdg = b;
+		b->bdg_active_ports++;
+		ND("host %p to bridge port %d", hostna, cand2);
+	}
+	ND("if %s refs %d", ifname, vpna->up.na_refcount);
+	BDG_WUNLOCK(b);
+	*na = &vpna->up;
+	netmap_adapter_get(*na);
+	return 0;
+
+out:
+	if_rele(ifp);
+
+	return error;
+}
+
+
+/* Process NETMAP_BDG_ATTACH */
+static int
+nm_bdg_ctl_attach(struct nmreq *nmr)
+{
+	struct netmap_adapter *na;
+	int error;
+
+	NMG_LOCK();
+
+	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
+	if (error) /* no device */
+		goto unlock_exit;
+
+	if (na == NULL) { /* VALE prefix missing */
+		error = EINVAL;
+		goto unlock_exit;
+	}
+
+	if (NETMAP_OWNED_BY_ANY(na)) {
+		error = EBUSY;
+		goto unref_exit;
+	}
+
+	if (na->nm_bdg_ctl) {
+		/* nop for VALE ports. The bwrap needs to put the hwna
+		 * in netmap mode (see netmap_bwrap_bdg_ctl)
+		 */
+		error = na->nm_bdg_ctl(na, nmr, 1);
+		if (error)
+			goto unref_exit;
+		ND("registered %s to netmap-mode", na->name);
+	}
+	NMG_UNLOCK();
+	return 0;
+
+unref_exit:
+	netmap_adapter_put(na);
+unlock_exit:
+	NMG_UNLOCK();
+	return error;
+}
+
+
+/* process NETMAP_BDG_DETACH */
+static int
+nm_bdg_ctl_detach(struct nmreq *nmr)
+{
+	struct netmap_adapter *na;
+	int error;
+
+	NMG_LOCK();
+	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
+	if (error) { /* no device, or another bridge or user owns the device */
+		goto unlock_exit;
+	}
+
+	if (na == NULL) { /* VALE prefix missing */
+		error = EINVAL;
+		goto unlock_exit;
+	}
+
+	if (na->nm_bdg_ctl) {
+		/* remove the port from bridge. The bwrap
+		 * also needs to put the hwna in normal mode
+		 */
+		error = na->nm_bdg_ctl(na, nmr, 0);
+	}
+
+	netmap_adapter_put(na);
+unlock_exit:
+	NMG_UNLOCK();
+	return error;
+
+}
+
+
+/* Called by either user's context (netmap_ioctl())
+ * or external kernel modules (e.g., Openvswitch).
+ * Operation is indicated in nmr->nr_cmd.
+ * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
+ * requires bdg_ops argument; the other commands ignore this argument.
+ *
+ * Called without NMG_LOCK.
+ */
+int
+netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
+{
+	struct nm_bridge *b;
+	struct netmap_adapter *na;
+	struct netmap_vp_adapter *vpna;
+	char *name = nmr->nr_name;
+	int cmd = nmr->nr_cmd, namelen = strlen(name);
+	int error = 0, i, j;
+
+	switch (cmd) {
+	case NETMAP_BDG_NEWIF:
+		error = nm_vi_create(nmr);
+		break;
+
+	case NETMAP_BDG_DELIF:
+		error = nm_vi_destroy(nmr->nr_name);
+		break;
+
+	case NETMAP_BDG_ATTACH:
+		error = nm_bdg_ctl_attach(nmr);
+		break;
+
+	case NETMAP_BDG_DETACH:
+		error = nm_bdg_ctl_detach(nmr);
+		break;
+
+	case NETMAP_BDG_LIST:
+		/* this is used to enumerate bridges and ports */
+		if (namelen) { /* look up indexes of bridge and port */
+			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
+				error = EINVAL;
+				break;
+			}
+			NMG_LOCK();
+			b = nm_find_bridge(name, 0 /* don't create */);
+			if (!b) {
+				error = ENOENT;
+				NMG_UNLOCK();
+				break;
+			}
+
+			name = name + b->bdg_namelen + 1;
+			error = ENOENT;
+			for (j = 0; j < b->bdg_active_ports; j++) {
+				i = b->bdg_port_index[j];
+				vpna = b->bdg_ports[i];
+				if (vpna == NULL) {
+					D("---AAAAAAAAARGH-------");
+					continue;
+				}
+				/* the former and the latter identify a
+				 * virtual port and a NIC, respectively
+				 */
+				if (!strcmp(vpna->up.name, name)) {
+					/* bridge index */
+					nmr->nr_arg1 = b - nm_bridges;
+					nmr->nr_arg2 = i; /* port index */
+					error = 0;
+					break;
+				}
+			}
+			NMG_UNLOCK();
+		} else {
+			/* return the first non-empty entry starting from
+			 * bridge nr_arg1 and port nr_arg2.
+			 *
+			 * Users can detect the end of the same bridge by
+			 * seeing the new and old value of nr_arg1, and can
+			 * detect the end of all the bridge by error != 0
+			 */
+			i = nmr->nr_arg1;
+			j = nmr->nr_arg2;
+
+			NMG_LOCK();
+			for (error = ENOENT; i < NM_BRIDGES; i++) {
+				b = nm_bridges + i;
+				if (j >= b->bdg_active_ports) {
+					j = 0; /* following bridges scan from 0 */
+					continue;
+				}
+				nmr->nr_arg1 = i;
+				nmr->nr_arg2 = j;
+				j = b->bdg_port_index[j];
+				vpna = b->bdg_ports[j];
+				strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
+				error = 0;
+				break;
+			}
+			NMG_UNLOCK();
+		}
+		break;
+
+	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
+		/* register callbacks to the given bridge.
+		 * nmr->nr_name may be just bridge's name (including ':'
+		 * if it is not just NM_NAME).
+		 */
+		if (!bdg_ops) {
+			error = EINVAL;
+			break;
+		}
+		NMG_LOCK();
+		b = nm_find_bridge(name, 0 /* don't create */);
+		if (!b) {
+			error = EINVAL;
+		} else {
+			b->bdg_ops = *bdg_ops;
+		}
+		NMG_UNLOCK();
+		break;
+
+	case NETMAP_BDG_VNET_HDR:
+		/* Valid lengths for the virtio-net header are 0 (no header),
+		   10 and 12. */
+		if (nmr->nr_arg1 != 0 &&
+			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
+				nmr->nr_arg1 != 12) {
+			error = EINVAL;
+			break;
+		}
+		NMG_LOCK();
+		error = netmap_get_bdg_na(nmr, &na, 0);
+		if (na && !error) {
+			vpna = (struct netmap_vp_adapter *)na;
+			vpna->virt_hdr_len = nmr->nr_arg1;
+			if (vpna->virt_hdr_len)
+				vpna->mfs = NETMAP_BUF_SIZE(na);
+			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
+			netmap_adapter_put(na);
+		}
+		NMG_UNLOCK();
+		break;
+
+	default:
+		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+int
+netmap_bdg_config(struct nmreq *nmr)
+{
+	struct nm_bridge *b;
+	int error = EINVAL;
+
+	NMG_LOCK();
+	b = nm_find_bridge(nmr->nr_name, 0);
+	if (!b) {
+		NMG_UNLOCK();
+		return error;
+	}
+	NMG_UNLOCK();
+	/* Don't call config() with NMG_LOCK() held */
+	BDG_RLOCK(b);
+	if (b->bdg_ops.config != NULL)
+		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
+	BDG_RUNLOCK(b);
+	return error;
+}
+
+
+/* nm_krings_create callback for VALE ports.
+ * Calls the standard netmap_krings_create, then adds leases on rx
+ * rings and bdgfwd on tx rings.
+ */
+static int
+netmap_vp_krings_create(struct netmap_adapter *na)
+{
+	u_int tailroom;
+	int error, i;
+	uint32_t *leases;
+	u_int nrx = netmap_real_rx_rings(na);
+
+	/*
+	 * Leases are attached to RX rings on vale ports
+	 */
+	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
+
+	error = netmap_krings_create(na, tailroom);
+	if (error)
+		return error;
+
+	leases = na->tailroom;
+
+	for (i = 0; i < nrx; i++) { /* Receive rings */
+		na->rx_rings[i].nkr_leases = leases;
+		leases += na->num_rx_desc;
+	}
+
+	error = nm_alloc_bdgfwd(na);
+	if (error) {
+		netmap_krings_delete(na);
+		return error;
+	}
+
+	return 0;
+}
+
+
+/* nm_krings_delete callback for VALE ports. */
+static void
+netmap_vp_krings_delete(struct netmap_adapter *na)
+{
+	nm_free_bdgfwd(na);
+	netmap_krings_delete(na);
+}
+
+
+static int
+nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
+	struct netmap_vp_adapter *na, u_int ring_nr);
+
+
+/*
+ * main dispatch routine for the bridge.
+ * Grab packets from a kring, move them into the ft structure
+ * associated to the tx (input) port. Max one instance per port,
+ * filtered on input (ioctl, poll or XXX).
+ * Returns the next position in the ring.
+ */
+static int
+nm_bdg_preflush(struct netmap_kring *kring, u_int end)
+{
+	struct netmap_vp_adapter *na =
+		(struct netmap_vp_adapter*)kring->na;
+	struct netmap_ring *ring = kring->ring;
+	struct nm_bdg_fwd *ft;
+	u_int ring_nr = kring->ring_id;
+	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
+	u_int ft_i = 0;	/* start from 0 */
+	u_int frags = 1; /* how many frags ? */
+	struct nm_bridge *b = na->na_bdg;
+
+	/* To protect against modifications to the bridge we acquire a
+	 * shared lock, waiting if we can sleep (if the source port is
+	 * attached to a user process) or with a trylock otherwise (NICs).
+	 */
+	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
+	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
+		BDG_RLOCK(b);
+	else if (!BDG_RTRYLOCK(b))
+		return 0;
+	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
+	ft = kring->nkr_ft;
+
+	for (; likely(j != end); j = nm_next(j, lim)) {
+		struct netmap_slot *slot = &ring->slot[j];
+		char *buf;
+
+		ft[ft_i].ft_len = slot->len;
+		ft[ft_i].ft_flags = slot->flags;
+
+		ND("flags is 0x%x", slot->flags);
+		/* this slot goes into a list so initialize the link field */
+		ft[ft_i].ft_next = NM_FT_NULL;
+		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
+			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
+		if (unlikely(buf == NULL)) {
+			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
+				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
+				kring->name, j, ft[ft_i].ft_len);
+			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
+			ft[ft_i].ft_len = 0;
+			ft[ft_i].ft_flags = 0;
+		}
+		__builtin_prefetch(buf);
+		++ft_i;
+		if (slot->flags & NS_MOREFRAG) {
+			frags++;
+			continue;
+		}
+		if (unlikely(netmap_verbose && frags > 1))
+			RD(5, "%d frags at %d", frags, ft_i - frags);
+		ft[ft_i - frags].ft_frags = frags;
+		frags = 1;
+		if (unlikely((int)ft_i >= bridge_batch))
+			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
+	}
+	if (frags > 1) {
+		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
+		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
+		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
+		ft[ft_i - frags].ft_frags = frags - 1;
+	}
+	if (ft_i)
+		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
+	BDG_RUNLOCK(b);
+	return j;
+}
+
+
+/* ----- FreeBSD if_bridge hash function ------- */
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ *
+ * http://www.burtleburtle.net/bob/hash/spooky.html
+ */
+#define mix(a, b, c)                                                    \
+do {                                                                    \
+        a -= b; a -= c; a ^= (c >> 13);                                 \
+        b -= c; b -= a; b ^= (a << 8);                                  \
+        c -= a; c -= b; c ^= (b >> 13);                                 \
+        a -= b; a -= c; a ^= (c >> 12);                                 \
+        b -= c; b -= a; b ^= (a << 16);                                 \
+        c -= a; c -= b; c ^= (b >> 5);                                  \
+        a -= b; a -= c; a ^= (c >> 3);                                  \
+        b -= c; b -= a; b ^= (a << 10);                                 \
+        c -= a; c -= b; c ^= (b >> 15);                                 \
+} while (/*CONSTCOND*/0)
+
+
+static __inline uint32_t
+nm_bridge_rthash(const uint8_t *addr)
+{
+        uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
+
+        b += addr[5] << 8;
+        b += addr[4];
+        a += addr[3] << 24;
+        a += addr[2] << 16;
+        a += addr[1] << 8;
+        a += addr[0];
+
+        mix(a, b, c);
+#define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
+        return (c & BRIDGE_RTHASH_MASK);
+}
+
+#undef mix
+
+
+/* nm_register callback for VALE ports */
+static int
+netmap_vp_reg(struct netmap_adapter *na, int onoff)
+{
+	struct netmap_vp_adapter *vpna =
+		(struct netmap_vp_adapter*)na;
+
+	/* persistent ports may be put in netmap mode
+	 * before being attached to a bridge
+	 */
+	if (vpna->na_bdg)
+		BDG_WLOCK(vpna->na_bdg);
+	if (onoff) {
+		na->na_flags |= NAF_NETMAP_ON;
+		 /* XXX on FreeBSD, persistent VALE ports should also
+		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
+		 */
+	} else {
+		na->na_flags &= ~NAF_NETMAP_ON;
+	}
+	if (vpna->na_bdg)
+		BDG_WUNLOCK(vpna->na_bdg);
+	return 0;
+}
+
+
+/*
+ * Lookup function for a learning bridge.
+ * Update the hash table with the source address,
+ * and then returns the destination port index, and the
+ * ring in *dst_ring (at the moment, always use ring 0)
+ */
+u_int
+netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
+		const struct netmap_vp_adapter *na)
+{
+	uint8_t *buf = ft->ft_buf;
+	u_int buf_len = ft->ft_len;
+	struct nm_hash_ent *ht = na->na_bdg->ht;
+	uint32_t sh, dh;
+	u_int dst, mysrc = na->bdg_port;
+	uint64_t smac, dmac;
+
+	/* safety check, unfortunately we have many cases */
+	if (buf_len >= 14 + na->virt_hdr_len) {
+		/* virthdr + mac_hdr in the same slot */
+		buf += na->virt_hdr_len;
+		buf_len -= na->virt_hdr_len;
+	} else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
+		/* only header in first fragment */
+		ft++;
+		buf = ft->ft_buf;
+		buf_len = ft->ft_len;
+	} else {
+		RD(5, "invalid buf format, length %d", buf_len);
+		return NM_BDG_NOPORT;
+	}
+	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
+	smac = le64toh(*(uint64_t *)(buf + 4));
+	smac >>= 16;
+
+	/*
+	 * The hash is somewhat expensive, there might be some
+	 * worthwhile optimizations here.
+	 */
+	if ((buf[6] & 1) == 0) { /* valid src */
+		uint8_t *s = buf+6;
+		sh = nm_bridge_rthash(s); // XXX hash of source
+		/* update source port forwarding entry */
+		ht[sh].mac = smac;	/* XXX expire ? */
+		ht[sh].ports = mysrc;
+		if (netmap_verbose)
+		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
+			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
+	}
+	dst = NM_BDG_BROADCAST;
+	if ((buf[0] & 1) == 0) { /* unicast */
+		dh = nm_bridge_rthash(buf); // XXX hash of dst
+		if (ht[dh].mac == dmac) {	/* found dst */
+			dst = ht[dh].ports;
+		}
+		/* XXX otherwise return NM_BDG_UNKNOWN ? */
+	}
+	*dst_ring = 0;
+	return dst;
+}
+
+
+/*
+ * Available space in the ring. Only used in VALE code
+ * and only with is_rx = 1
+ */
+static inline uint32_t
+nm_kr_space(struct netmap_kring *k, int is_rx)
+{
+	int space;
+
+	if (is_rx) {
+		int busy = k->nkr_hwlease - k->nr_hwcur;
+		if (busy < 0)
+			busy += k->nkr_num_slots;
+		space = k->nkr_num_slots - 1 - busy;
+	} else {
+		/* XXX never used in this branch */
+		space = k->nr_hwtail - k->nkr_hwlease;
+		if (space < 0)
+			space += k->nkr_num_slots;
+	}
+#if 0
+	// sanity check
+	if (k->nkr_hwlease >= k->nkr_num_slots ||
+		k->nr_hwcur >= k->nkr_num_slots ||
+		k->nr_tail >= k->nkr_num_slots ||
+		busy < 0 ||
+		busy >= k->nkr_num_slots) {
+		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
+			k->nkr_lease_idx, k->nkr_num_slots);
+	}
+#endif
+	return space;
+}
+
+
+
+
+/* make a lease on the kring for N positions. return the
+ * lease index
+ * XXX only used in VALE code and with is_rx = 1
+ */
+static inline uint32_t
+nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
+{
+	uint32_t lim = k->nkr_num_slots - 1;
+	uint32_t lease_idx = k->nkr_lease_idx;
+
+	k->nkr_leases[lease_idx] = NR_NOSLOT;
+	k->nkr_lease_idx = nm_next(lease_idx, lim);
+
+	if (n > nm_kr_space(k, is_rx)) {
+		D("invalid request for %d slots", n);
+		panic("x");
+	}
+	/* XXX verify that there are n slots */
+	k->nkr_hwlease += n;
+	if (k->nkr_hwlease > lim)
+		k->nkr_hwlease -= lim + 1;
+
+	if (k->nkr_hwlease >= k->nkr_num_slots ||
+		k->nr_hwcur >= k->nkr_num_slots ||
+		k->nr_hwtail >= k->nkr_num_slots ||
+		k->nkr_lease_idx >= k->nkr_num_slots) {
+		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
+			k->na->name,
+			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
+			k->nkr_lease_idx, k->nkr_num_slots);
+	}
+	return lease_idx;
+}
+
+/*
+ *
+ * This flush routine supports only unicast and broadcast but a large
+ * number of ports, and lets us replace the learn and dispatch functions.
+ */
+int
+nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
+		u_int ring_nr)
+{
+	struct nm_bdg_q *dst_ents, *brddst;
+	uint16_t num_dsts = 0, *dsts;
+	struct nm_bridge *b = na->na_bdg;
+	u_int i, j, me = na->bdg_port;
+
+	/*
+	 * The work area (pointed by ft) is followed by an array of
+	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
+	 * queues per port plus one for the broadcast traffic.
+	 * Then we have an array of destination indexes.
+	 */
+	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
+	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
+
+	/* first pass: find a destination for each packet in the batch */
+	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
+		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
+		uint16_t dst_port, d_i;
+		struct nm_bdg_q *d;
+
+		ND("slot %d frags %d", i, ft[i].ft_frags);
+		/* Drop the packet if the virtio-net header is not into the first
+		   fragment nor at the very beginning of the second. */
+		if (unlikely(na->virt_hdr_len > ft[i].ft_len))
+			continue;
+		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
+		if (netmap_verbose > 255)
+			RD(5, "slot %d port %d -> %d", i, me, dst_port);
+		if (dst_port == NM_BDG_NOPORT)
+			continue; /* this packet is identified to be dropped */
+		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
+			continue;
+		else if (dst_port == NM_BDG_BROADCAST)
+			dst_ring = 0; /* broadcasts always go to ring 0 */
+		else if (unlikely(dst_port == me ||
+		    !b->bdg_ports[dst_port]))
+			continue;
+
+		/* get a position in the scratch pad */
+		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
+		d = dst_ents + d_i;
+
+		/* append the first fragment to the list */
+		if (d->bq_head == NM_FT_NULL) { /* new destination */
+			d->bq_head = d->bq_tail = i;
+			/* remember this position to be scanned later */
+			if (dst_port != NM_BDG_BROADCAST)
+				dsts[num_dsts++] = d_i;
+		} else {
+			ft[d->bq_tail].ft_next = i;
+			d->bq_tail = i;
+		}
+		d->bq_len += ft[i].ft_frags;
+	}
+
+	/*
+	 * Broadcast traffic goes to ring 0 on all destinations.
+	 * So we need to add these rings to the list of ports to scan.
+	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
+	 * expensive. We should keep a compact list of active destinations
+	 * so we could shorten this loop.
+	 */
+	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
+	if (brddst->bq_head != NM_FT_NULL) {
+		for (j = 0; likely(j < b->bdg_active_ports); j++) {
+			uint16_t d_i;
+			i = b->bdg_port_index[j];
+			if (unlikely(i == me))
+				continue;
+			d_i = i * NM_BDG_MAXRINGS;
+			if (dst_ents[d_i].bq_head == NM_FT_NULL)
+				dsts[num_dsts++] = d_i;
+		}
+	}
+
+	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
+	/* second pass: scan destinations */
+	for (i = 0; i < num_dsts; i++) {
+		struct netmap_vp_adapter *dst_na;
+		struct netmap_kring *kring;
+		struct netmap_ring *ring;
+		u_int dst_nr, lim, j, d_i, next, brd_next;
+		u_int needed, howmany;
+		int retry = netmap_txsync_retry;
+		struct nm_bdg_q *d;
+		uint32_t my_start = 0, lease_idx = 0;
+		int nrings;
+		int virt_hdr_mismatch = 0;
+
+		d_i = dsts[i];
+		ND("second pass %d port %d", i, d_i);
+		d = dst_ents + d_i;
+		// XXX fix the division
+		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
+		/* protect from the lookup function returning an inactive
+		 * destination port
+		 */
+		if (unlikely(dst_na == NULL))
+			goto cleanup;
+		if (dst_na->up.na_flags & NAF_SW_ONLY)
+			goto cleanup;
+		/*
+		 * The interface may be in !netmap mode in two cases:
+		 * - when na is attached but not activated yet;
+		 * - when na is being deactivated but is still attached.
+		 */
+		if (unlikely(!nm_netmap_on(&dst_na->up))) {
+			ND("not in netmap mode!");
+			goto cleanup;
+		}
+
+		/* there is at least one either unicast or broadcast packet */
+		brd_next = brddst->bq_head;
+		next = d->bq_head;
+		/* we need to reserve this many slots. If fewer are
+		 * available, some packets will be dropped.
+		 * Packets may have multiple fragments, so we may not use
+		 * there is a chance that we may not use all of the slots
+		 * we have claimed, so we will need to handle the leftover
+		 * ones when we regain the lock.
+		 */
+		needed = d->bq_len + brddst->bq_len;
+
+		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
+			RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
+			/* There is a virtio-net header/offloadings mismatch between
+			 * source and destination. The slower mismatch datapath will
+			 * be used to cope with all the mismatches.
+			 */
+			virt_hdr_mismatch = 1;
+			if (dst_na->mfs < na->mfs) {
+				/* We may need to do segmentation offloadings, and so
+				 * we may need a number of destination slots greater
+				 * than the number of input slots ('needed').
+				 * We look for the smallest integer 'x' which satisfies:
+				 *	needed * na->mfs + x * H <= x * na->mfs
+				 * where 'H' is the length of the longest header that may
+				 * be replicated in the segmentation process (e.g. for
+				 * TCPv4 we must account for ethernet header, IP header
+				 * and TCPv4 header).
+				 */
+				needed = (needed * na->mfs) /
+						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
+				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
+			}
+		}
+
+		ND(5, "pass 2 dst %d is %x %s",
+			i, d_i, is_vp ? "virtual" : "nic/host");
+		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
+		nrings = dst_na->up.num_rx_rings;
+		if (dst_nr >= nrings)
+			dst_nr = dst_nr % nrings;
+		kring = &dst_na->up.rx_rings[dst_nr];
+		ring = kring->ring;
+		lim = kring->nkr_num_slots - 1;
+
+retry:
+
+		if (dst_na->retry && retry) {
+			/* try to get some free slot from the previous run */
+			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
+			/* actually useful only for bwraps, since there
+			 * the notify will trigger a txsync on the hwna. VALE ports
+			 * have dst_na->retry == 0
+			 */
+		}
+		/* reserve the buffers in the queue and an entry
+		 * to report completion, and drop lock.
+		 * XXX this might become a helper function.
+		 */
+		mtx_lock(&kring->q_lock);
+		if (kring->nkr_stopped) {
+			mtx_unlock(&kring->q_lock);
+			goto cleanup;
+		}
+		my_start = j = kring->nkr_hwlease;
+		howmany = nm_kr_space(kring, 1);
+		if (needed < howmany)
+			howmany = needed;
+		lease_idx = nm_kr_lease(kring, howmany, 1);
+		mtx_unlock(&kring->q_lock);
+
+		/* only retry if we need more than available slots */
+		if (retry && needed <= howmany)
+			retry = 0;
+
+		/* copy to the destination queue */
+		while (howmany > 0) {
+			struct netmap_slot *slot;
+			struct nm_bdg_fwd *ft_p, *ft_end;
+			u_int cnt;
+
+			/* find the queue from which we pick next packet.
+			 * NM_FT_NULL is always higher than valid indexes
+			 * so we never dereference it if the other list
+			 * has packets (and if both are empty we never
+			 * get here).
+			 */
+			if (next < brd_next) {
+				ft_p = ft + next;
+				next = ft_p->ft_next;
+			} else { /* insert broadcast */
+				ft_p = ft + brd_next;
+				brd_next = ft_p->ft_next;
+			}
+			cnt = ft_p->ft_frags; // cnt > 0
+			if (unlikely(cnt > howmany))
+			    break; /* no more space */
+			if (netmap_verbose && cnt > 1)
+				RD(5, "rx %d frags to %d", cnt, j);
+			ft_end = ft_p + cnt;
+			if (unlikely(virt_hdr_mismatch)) {
+				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
+			} else {
+				howmany -= cnt;
+				do {
+					char *dst, *src = ft_p->ft_buf;
+					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
+
+					slot = &ring->slot[j];
+					dst = NMB(&dst_na->up, slot);
+
+					ND("send [%d] %d(%d) bytes at %s:%d",
+							i, (int)copy_len, (int)dst_len,
+							NM_IFPNAME(dst_ifp), j);
+					/* round to a multiple of 64 */
+					copy_len = (copy_len + 63) & ~63;
+
+					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
+						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
+						RD(5, "invalid len %d, down to 64", (int)copy_len);
+						copy_len = dst_len = 64; // XXX
+					}
+					if (ft_p->ft_flags & NS_INDIRECT) {
+						if (copyin(src, dst, copy_len)) {
+							// invalid user pointer, pretend len is 0
+							dst_len = 0;
+						}
+					} else {
+						//memcpy(dst, src, copy_len);
+						pkt_copy(src, dst, (int)copy_len);
+					}
+					slot->len = dst_len;
+					slot->flags = (cnt << 8)| NS_MOREFRAG;
+					j = nm_next(j, lim);
+					needed--;
+					ft_p++;
+				} while (ft_p != ft_end);
+				slot->flags = (cnt << 8); /* clear flag on last entry */
+			}
+			/* are we done ? */
+			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
+				break;
+		}
+		{
+		    /* current position */
+		    uint32_t *p = kring->nkr_leases; /* shorthand */
+		    uint32_t update_pos;
+		    int still_locked = 1;
+
+		    mtx_lock(&kring->q_lock);
+		    if (unlikely(howmany > 0)) {
+			/* not used all bufs. If i am the last one
+			 * i can recover the slots, otherwise must
+			 * fill them with 0 to mark empty packets.
+			 */
+			ND("leftover %d bufs", howmany);
+			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
+			    /* yes i am the last one */
+			    ND("roll back nkr_hwlease to %d", j);
+			    kring->nkr_hwlease = j;
+			} else {
+			    while (howmany-- > 0) {
+				ring->slot[j].len = 0;
+				ring->slot[j].flags = 0;
+				j = nm_next(j, lim);
+			    }
+			}
+		    }
+		    p[lease_idx] = j; /* report I am done */
+
+		    update_pos = kring->nr_hwtail;
+
+		    if (my_start == update_pos) {
+			/* all slots before my_start have been reported,
+			 * so scan subsequent leases to see if other ranges
+			 * have been completed, and to a selwakeup or txsync.
+		         */
+			while (lease_idx != kring->nkr_lease_idx &&
+				p[lease_idx] != NR_NOSLOT) {
+			    j = p[lease_idx];
+			    p[lease_idx] = NR_NOSLOT;
+			    lease_idx = nm_next(lease_idx, lim);
+			}
+			/* j is the new 'write' position. j != my_start
+			 * means there are new buffers to report
+			 */
+			if (likely(j != my_start)) {
+				kring->nr_hwtail = j;
+				still_locked = 0;
+				mtx_unlock(&kring->q_lock);
+				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
+				/* this is netmap_notify for VALE ports and
+				 * netmap_bwrap_notify for bwrap. The latter will
+				 * trigger a txsync on the underlying hwna
+				 */
+				if (dst_na->retry && retry--) {
+					/* XXX this is going to call nm_notify again.
+					 * Only useful for bwrap in virtual machines
+					 */
+					goto retry;
+				}
+			}
+		    }
+		    if (still_locked)
+			mtx_unlock(&kring->q_lock);
+		}
+cleanup:
+		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
+		d->bq_len = 0;
+	}
+	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
+	brddst->bq_len = 0;
+	return 0;
+}
+
+/* nm_txsync callback for VALE ports */
+static int
+netmap_vp_txsync(struct netmap_kring *kring, int flags)
+{
+	struct netmap_vp_adapter *na =
+		(struct netmap_vp_adapter *)kring->na;
+	u_int done;
+	u_int const lim = kring->nkr_num_slots - 1;
+	u_int const cur = kring->rcur;
+
+	if (bridge_batch <= 0) { /* testing only */
+		done = cur; // used all
+		goto done;
+	}
+	if (!na->na_bdg) {
+		done = cur;
+		goto done;
+	}
+	if (bridge_batch > NM_BDG_BATCH)
+		bridge_batch = NM_BDG_BATCH;
+
+	done = nm_bdg_preflush(kring, cur);
+done:
+	if (done != cur)
+		D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
+	/*
+	 * packets between 'done' and 'cur' are left unsent.
+	 */
+	kring->nr_hwcur = done;
+	kring->nr_hwtail = nm_prev(done, lim);
+	nm_txsync_finalize(kring);
+	if (netmap_verbose)
+		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
+	return 0;
+}
+
+
+/* rxsync code used by VALE ports nm_rxsync callback and also
+ * internally by the brwap
+ */
+static int
+netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
+{
+	struct netmap_adapter *na = kring->na;
+	struct netmap_ring *ring = kring->ring;
+	u_int nm_i, lim = kring->nkr_num_slots - 1;
+	u_int head = nm_rxsync_prologue(kring);
+	int n;
+
+	if (head > lim) {
+		D("ouch dangerous reset!!!");
+		n = netmap_ring_reinit(kring);
+		goto done;
+	}
+
+	/* First part, import newly received packets. */
+	/* actually nothing to do here, they are already in the kring */
+
+	/* Second part, skip past packets that userspace has released. */
+	nm_i = kring->nr_hwcur;
+	if (nm_i != head) {
+		/* consistency check, but nothing really important here */
+		for (n = 0; likely(nm_i != head); n++) {
+			struct netmap_slot *slot = &ring->slot[nm_i];
+			void *addr = NMB(na, slot);
+
+			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
+				D("bad buffer index %d, ignore ?",
+					slot->buf_idx);
+			}
+			slot->flags &= ~NS_BUF_CHANGED;
+			nm_i = nm_next(nm_i, lim);
+		}
+		kring->nr_hwcur = head;
+	}
+
+	/* tell userspace that there are new packets */
+	nm_rxsync_finalize(kring);
+	n = 0;
+done:
+	return n;
+}
+
+/*
+ * nm_rxsync callback for VALE ports
+ * user process reading from a VALE switch.
+ * Already protected against concurrent calls from userspace,
+ * but we must acquire the queue's lock to protect against
+ * writers on the same queue.
+ */
+static int
+netmap_vp_rxsync(struct netmap_kring *kring, int flags)
+{
+	int n;
+
+	mtx_lock(&kring->q_lock);
+	n = netmap_vp_rxsync_locked(kring, flags);
+	mtx_unlock(&kring->q_lock);
+	return n;
+}
+
+
+/* nm_bdg_attach callback for VALE ports
+ * The na_vp port is this same netmap_adapter. There is no host port.
+ */
+static int
+netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
+{
+	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
+
+	if (vpna->na_bdg)
+		return EBUSY;
+	na->na_vp = vpna;
+	strncpy(na->name, name, sizeof(na->name));
+	na->na_hostvp = NULL;
+	return 0;
+}
+
+/* create a netmap_vp_adapter that describes a VALE port.
+ * Only persistent VALE ports have a non-null ifp.
+ */
+static int
+netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
+{
+	struct netmap_vp_adapter *vpna;
+	struct netmap_adapter *na;
+	int error;
+	u_int npipes = 0;
+
+	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (vpna == NULL)
+		return ENOMEM;
+
+ 	na = &vpna->up;
+
+	na->ifp = ifp;
+	strncpy(na->name, nmr->nr_name, sizeof(na->name));
+
+	/* bound checking */
+	na->num_tx_rings = nmr->nr_tx_rings;
+	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
+	nmr->nr_tx_rings = na->num_tx_rings; // write back
+	na->num_rx_rings = nmr->nr_rx_rings;
+	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
+	nmr->nr_rx_rings = na->num_rx_rings; // write back
+	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
+			1, NM_BDG_MAXSLOTS, NULL);
+	na->num_tx_desc = nmr->nr_tx_slots;
+	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
+			1, NM_BDG_MAXSLOTS, NULL);
+	/* validate number of pipes. We want at least 1,
+	 * but probably can do with some more.
+	 * So let's use 2 as default (when 0 is supplied)
+	 */
+	npipes = nmr->nr_arg1;
+	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
+	nmr->nr_arg1 = npipes;	/* write back */
+	/* validate extra bufs */
+	nm_bound_var(&nmr->nr_arg3, 0, 0,
+			128*NM_BDG_MAXSLOTS, NULL);
+	na->num_rx_desc = nmr->nr_rx_slots;
+	vpna->virt_hdr_len = 0;
+	vpna->mfs = 1514;
+	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
+		vpna->mfs = netmap_buf_size; */
+        if (netmap_verbose)
+		D("max frame size %u", vpna->mfs);
+
+	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
+	na->nm_txsync = netmap_vp_txsync;
+	na->nm_rxsync = netmap_vp_rxsync;
+	na->nm_register = netmap_vp_reg;
+	na->nm_krings_create = netmap_vp_krings_create;
+	na->nm_krings_delete = netmap_vp_krings_delete;
+	na->nm_dtor = netmap_vp_dtor;
+	na->nm_mem = netmap_mem_private_new(na->name,
+			na->num_tx_rings, na->num_tx_desc,
+			na->num_rx_rings, na->num_rx_desc,
+			nmr->nr_arg3, npipes, &error);
+	if (na->nm_mem == NULL)
+		goto err;
+	na->nm_bdg_attach = netmap_vp_bdg_attach;
+	/* other nmd fields are set in the common routine */
+	error = netmap_attach_common(na);
+	if (error)
+		goto err;
+	*ret = vpna;
+	return 0;
+
+err:
+	if (na->nm_mem != NULL)
+		netmap_mem_private_delete(na->nm_mem);
+	free(vpna, M_DEVBUF);
+	return error;
+}
+
+/* Bridge wrapper code (bwrap).
+ * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
+ * VALE switch.
+ * The main task is to swap the meaning of tx and rx rings to match the
+ * expectations of the VALE switch code (see nm_bdg_flush).
+ *
+ * The bwrap works by interposing a netmap_bwrap_adapter between the
+ * rest of the system and the hwna. The netmap_bwrap_adapter looks like
+ * a netmap_vp_adapter to the rest the system, but, internally, it
+ * translates all callbacks to what the hwna expects.
+ *
+ * Note that we have to intercept callbacks coming from two sides:
+ *
+ *  - callbacks coming from the netmap module are intercepted by
+ *    passing around the netmap_bwrap_adapter instead of the hwna
+ *
+ *  - callbacks coming from outside of the netmap module only know
+ *    about the hwna. This, however, only happens in interrupt
+ *    handlers, where only the hwna->nm_notify callback is called.
+ *    What the bwrap does is to overwrite the hwna->nm_notify callback
+ *    with its own netmap_bwrap_intr_notify.
+ *    XXX This assumes that the hwna->nm_notify callback was the
+ *    standard netmap_notify(), as it is the case for nic adapters.
+ *    Any additional action performed by hwna->nm_notify will not be
+ *    performed by netmap_bwrap_intr_notify.
+ *
+ * Additionally, the bwrap can optionally attach the host rings pair
+ * of the wrapped adapter to a different port of the switch.
+ */
+
+
+static void
+netmap_bwrap_dtor(struct netmap_adapter *na)
+{
+	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
+	struct netmap_adapter *hwna = bna->hwna;
+
+	ND("na %p", na);
+	/* drop reference to hwna->ifp.
+	 * If we don't do this, netmap_detach_common(na)
+	 * will think it has set NA(na->ifp) to NULL
+	 */
+	na->ifp = NULL;
+	/* for safety, also drop the possible reference
+	 * in the hostna
+	 */
+	bna->host.up.ifp = NULL;
+
+	hwna->nm_mem = bna->save_nmd;
+	hwna->na_private = NULL;
+	hwna->na_vp = hwna->na_hostvp = NULL;
+	hwna->na_flags &= ~NAF_BUSY;
+	netmap_adapter_put(hwna);
+
+}
+
+
+/*
+ * Intr callback for NICs connected to a bridge.
+ * Simply ignore tx interrupts (maybe we could try to recover space ?)
+ * and pass received packets from nic to the bridge.
+ *
+ * XXX TODO check locking: this is called from the interrupt
+ * handler so we should make sure that the interface is not
+ * disconnected while passing down an interrupt.
+ *
+ * Note, no user process can access this NIC or the host stack.
+ * The only part of the ring that is significant are the slots,
+ * and head/cur/tail are set from the kring as needed
+ * (part as a receive ring, part as a transmit ring).
+ *
+ * callback that overwrites the hwna notify callback.
+ * Packets come from the outside or from the host stack and are put on an hwna rx ring.
+ * The bridge wrapper then sends the packets through the bridge.
+ */
+static int
+netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
+{
+	struct netmap_bwrap_adapter *bna = na->na_private;
+	struct netmap_vp_adapter *hostna = &bna->host;
+	struct netmap_kring *kring, *bkring;
+	struct netmap_ring *ring;
+	int is_host_ring = ring_nr == na->num_rx_rings;
+	struct netmap_vp_adapter *vpna = &bna->up;
+	int error = 0;
+
+	if (netmap_verbose)
+	    D("%s %s%d 0x%x", na->name,
+		(tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
+
+	if (flags & NAF_DISABLE_NOTIFY) {
+		/* the enabled/disabled state of the ring has changed,
+		 * propagate the info to the wrapper (with tx/rx swapped)
+		 */
+		if (tx == NR_TX) {
+			netmap_set_rxring(&vpna->up, ring_nr,
+					na->tx_rings[ring_nr].nkr_stopped);
+		} else {
+			netmap_set_txring(&vpna->up, ring_nr,
+					na->rx_rings[ring_nr].nkr_stopped);
+		}
+		return 0;
+	}
+
+	if (!nm_netmap_on(na))
+		return 0;
+
+	/* we only care about receive interrupts */
+	if (tx == NR_TX)
+		return 0;
+
+	kring = &na->rx_rings[ring_nr];
+	ring = kring->ring;
+
+	/* make sure the ring is not disabled */
+	if (nm_kr_tryget(kring))
+		return 0;
+
+	if (is_host_ring && hostna->na_bdg == NULL) {
+		error = bna->save_notify(na, ring_nr, tx, flags);
+		goto put_out;
+	}
+
+	/* Here we expect ring->head = ring->cur = ring->tail
+	 * because everything has been released from the previous round.
+	 * However the ring is shared and we might have info from
+	 * the wrong side (the tx ring). Hence we overwrite with
+	 * the info from the rx kring.
+	 */
+	if (netmap_verbose)
+	    D("%s head %d cur %d tail %d (kring %d %d %d)",  na->name,
+		ring->head, ring->cur, ring->tail,
+		kring->rhead, kring->rcur, kring->rtail);
+
+	ring->head = kring->rhead;
+	ring->cur = kring->rcur;
+	ring->tail = kring->rtail;
+
+	if (is_host_ring) {
+		vpna = hostna;
+		ring_nr = 0;
+	}
+	/* simulate a user wakeup on the rx ring */
+	/* fetch packets that have arrived.
+	 * XXX maybe do this in a loop ?
+	 */
+	error = kring->nm_sync(kring, 0);
+	if (error)
+		goto put_out;
+	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
+		D("how strange, interrupt with no packets on %s",
+			na->name);
+		goto put_out;
+	}
+
+	/* new packets are ring->cur to ring->tail, and the bkring
+	 * had hwcur == ring->cur. So advance ring->cur to ring->tail
+	 * to push all packets out.
+	 */
+	ring->head = ring->cur = ring->tail;
+
+	/* also set tail to what the bwrap expects */
+	bkring = &vpna->up.tx_rings[ring_nr];
+	ring->tail = bkring->nr_hwtail; // rtail too ?
+
+	/* pass packets to the switch */
+	nm_txsync_prologue(bkring); // XXX error checking ?
+	netmap_vp_txsync(bkring, flags);
+
+	/* mark all buffers as released on this ring */
+	ring->head = ring->cur = kring->nr_hwtail;
+	ring->tail = kring->rtail;
+	/* another call to actually release the buffers */
+	if (!is_host_ring) {
+		error = kring->nm_sync(kring, 0);
+	} else {
+		/* mark all packets as released, as in the
+		 * second part of netmap_rxsync_from_host()
+		 */
+		kring->nr_hwcur = kring->nr_hwtail;
+		nm_rxsync_finalize(kring);
+	}
+
+put_out:
+	nm_kr_put(kring);
+	return error;
+}
+
+
+/* nm_register callback for bwrap */
+static int
+netmap_bwrap_register(struct netmap_adapter *na, int onoff)
+{
+	struct netmap_bwrap_adapter *bna =
+		(struct netmap_bwrap_adapter *)na;
+	struct netmap_adapter *hwna = bna->hwna;
+	struct netmap_vp_adapter *hostna = &bna->host;
+	int error;
+
+	ND("%s %s", na->name, onoff ? "on" : "off");
+
+	if (onoff) {
+		int i;
+
+		/* netmap_do_regif has been called on the bwrap na.
+		 * We need to pass the information about the
+		 * memory allocator down to the hwna before
+		 * putting it in netmap mode
+		 */
+		hwna->na_lut = na->na_lut;
+		hwna->na_lut_objtotal = na->na_lut_objtotal;
+		hwna->na_lut_objsize = na->na_lut_objsize;
+
+		if (hostna->na_bdg) {
+			/* if the host rings have been attached to switch,
+			 * we need to copy the memory allocator information
+			 * in the hostna also
+			 */
+			hostna->up.na_lut = na->na_lut;
+			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
+			hostna->up.na_lut_objsize = na->na_lut_objsize;
+		}
+
+		/* cross-link the netmap rings
+		 * The original number of rings comes from hwna,
+		 * rx rings on one side equals tx rings on the other.
+		 * We need to do this now, after the initialization
+		 * of the kring->ring pointers
+		 */
+		for (i = 0; i < na->num_rx_rings + 1; i++) {
+			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
+			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
+		}
+		for (i = 0; i < na->num_tx_rings + 1; i++) {
+			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
+			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
+		}
+	}
+
+	/* forward the request to the hwna */
+	error = hwna->nm_register(hwna, onoff);
+	if (error)
+		return error;
+
+	/* impersonate a netmap_vp_adapter */
+	netmap_vp_reg(na, onoff);
+	if (hostna->na_bdg)
+		netmap_vp_reg(&hostna->up, onoff);
+
+	if (onoff) {
+		/* intercept the hwna nm_nofify callback */
+		bna->save_notify = hwna->nm_notify;
+		hwna->nm_notify = netmap_bwrap_intr_notify;
+	} else {
+		hwna->nm_notify = bna->save_notify;
+		hwna->na_lut = NULL;
+		hwna->na_lut_objtotal = 0;
+		hwna->na_lut_objsize = 0;
+	}
+
+	return 0;
+}
+
+/* nm_config callback for bwrap */
+static int
+netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
+				    u_int *rxr, u_int *rxd)
+{
+	struct netmap_bwrap_adapter *bna =
+		(struct netmap_bwrap_adapter *)na;
+	struct netmap_adapter *hwna = bna->hwna;
+
+	/* forward the request */
+	netmap_update_config(hwna);
+	/* swap the results */
+	*txr = hwna->num_rx_rings;
+	*txd = hwna->num_rx_desc;
+	*rxr = hwna->num_tx_rings;
+	*rxd = hwna->num_rx_desc;
+
+	return 0;
+}
+
+
+/* nm_krings_create callback for bwrap */
+static int
+netmap_bwrap_krings_create(struct netmap_adapter *na)
+{
+	struct netmap_bwrap_adapter *bna =
+		(struct netmap_bwrap_adapter *)na;
+	struct netmap_adapter *hwna = bna->hwna;
+	struct netmap_adapter *hostna = &bna->host.up;
+	int error;
+
+	ND("%s", na->name);
+
+	/* impersonate a netmap_vp_adapter */
+	error = netmap_vp_krings_create(na);
+	if (error)
+		return error;
+
+	/* also create the hwna krings */
+	error = hwna->nm_krings_create(hwna);
+	if (error) {
+		netmap_vp_krings_delete(na);
+		return error;
+	}
+	/* the connection between the bwrap krings and the hwna krings
+	 * will be perfomed later, in the nm_register callback, since
+	 * now the kring->ring pointers have not been initialized yet
+	 */
+
+	if (na->na_flags & NAF_HOST_RINGS) {
+		/* the hostna rings are the host rings of the bwrap.
+		 * The corresponding krings must point back to the
+		 * hostna
+		 */
+		hostna->tx_rings = na->tx_rings + na->num_tx_rings;
+		hostna->tx_rings[0].na = hostna;
+		hostna->rx_rings = na->rx_rings + na->num_rx_rings;
+		hostna->rx_rings[0].na = hostna;
+	}
+
+	return 0;
+}
+
+
+static void
+netmap_bwrap_krings_delete(struct netmap_adapter *na)
+{
+	struct netmap_bwrap_adapter *bna =
+		(struct netmap_bwrap_adapter *)na;
+	struct netmap_adapter *hwna = bna->hwna;
+
+	ND("%s", na->name);
+
+	hwna->nm_krings_delete(hwna);
+	netmap_vp_krings_delete(na);
+}
+
+
+/* notify method for the bridge-->hwna direction */
+static int
+netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
+{
+	struct netmap_bwrap_adapter *bna =
+		(struct netmap_bwrap_adapter *)na;
+	struct netmap_adapter *hwna = bna->hwna;
+	struct netmap_kring *kring, *hw_kring;
+	struct netmap_ring *ring;
+	u_int lim;
+	int error = 0;
+
+	if (tx == NR_TX)
+	        return EINVAL;
+
+	kring = &na->rx_rings[ring_n];
+	hw_kring = &hwna->tx_rings[ring_n];
+	ring = kring->ring;
+	lim = kring->nkr_num_slots - 1;
+
+	if (!nm_netmap_on(hwna))
+		return 0;
+	mtx_lock(&kring->q_lock);
+	/* first step: simulate a user wakeup on the rx ring */
+	netmap_vp_rxsync_locked(kring, flags);
+	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
+		na->name, ring_n,
+		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+		ring->head, ring->cur, ring->tail,
+		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
+	/* second step: the simulated user consumes all new packets */
+	ring->head = ring->cur = ring->tail;
+
+	/* third step: the new packets are sent on the tx ring
+	 * (which is actually the same ring)
+	 */
+	/* set tail to what the hw expects */
+	ring->tail = hw_kring->rtail;
+	nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
+	error = hw_kring->nm_sync(hw_kring, flags);
+
+	/* fourth step: now we are back the rx ring */
+	/* claim ownership on all hw owned bufs */
+	ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
+	ring->tail = kring->rtail; /* restore saved value of tail, for safety */
+
+	/* fifth step: the user goes to sleep again, causing another rxsync */
+	netmap_vp_rxsync_locked(kring, flags);
+	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
+		na->name, ring_n,
+		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+		ring->head, ring->cur, ring->tail,
+		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
+	mtx_unlock(&kring->q_lock);
+	return error;
+}
+
+
+/* notify method for the bridge-->host-rings path */
+static int
+netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
+{
+	struct netmap_bwrap_adapter *bna = na->na_private;
+	struct netmap_adapter *port_na = &bna->up.up;
+	if (tx == NR_TX || ring_n != 0)
+		return EINVAL;
+	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
+}
+
+
+/* nm_bdg_ctl callback for the bwrap.
+ * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
+ * On attach, it needs to provide a fake netmap_priv_d structure and
+ * perform a netmap_do_regif() on the bwrap. This will put both the
+ * bwrap and the hwna in netmap mode, with the netmap rings shared
+ * and cross linked. Moroever, it will start intercepting interrupts
+ * directed to hwna.
+ */
+static int
+netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
+{
+	struct netmap_priv_d *npriv;
+	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
+	struct netmap_if *nifp;
+	int error = 0;
+
+	if (attach) {
+		if (NETMAP_OWNED_BY_ANY(na)) {
+			return EBUSY;
+		}
+		if (bna->na_kpriv) {
+			/* nothing to do */
+			return 0;
+		}
+		npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
+		if (npriv == NULL)
+			return ENOMEM;
+		nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
+		if (!nifp) {
+			bzero(npriv, sizeof(*npriv));
+			free(npriv, M_DEVBUF);
+			return error;
+		}
+		bna->na_kpriv = npriv;
+		na->na_flags |= NAF_BUSY;
+	} else {
+		int last_instance;
+
+		if (na->active_fds == 0) /* not registered */
+			return EINVAL;
+		last_instance = netmap_dtor_locked(bna->na_kpriv);
+		if (!last_instance) {
+			D("--- error, trying to detach an entry with active mmaps");
+			error = EINVAL;
+		} else {
+			struct nm_bridge *b = bna->up.na_bdg,
+				*bh = bna->host.na_bdg;
+			npriv = bna->na_kpriv;
+			bna->na_kpriv = NULL;
+			D("deleting priv");
+
+			bzero(npriv, sizeof(*npriv));
+			free(npriv, M_DEVBUF);
+			if (b) {
+				/* XXX the bwrap dtor should take care
+				 * of this (2014-06-16)
+				 */
+				netmap_bdg_detach_common(b, bna->up.bdg_port,
+				    (bh ? bna->host.bdg_port : -1));
+			}
+			na->na_flags &= ~NAF_BUSY;
+		}
+	}
+	return error;
+
+}
+
+/* attach a bridge wrapper to the 'real' device */
+int
+netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
+{
+	struct netmap_bwrap_adapter *bna;
+	struct netmap_adapter *na = NULL;
+	struct netmap_adapter *hostna = NULL;
+	int error = 0;
+
+	/* make sure the NIC is not already in use */
+	if (NETMAP_OWNED_BY_ANY(hwna)) {
+		D("NIC %s busy, cannot attach to bridge", hwna->name);
+		return EBUSY;
+	}
+
+	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (bna == NULL) {
+		return ENOMEM;
+	}
+
+	na = &bna->up.up;
+	strncpy(na->name, nr_name, sizeof(na->name));
+	/* fill the ring data for the bwrap adapter with rx/tx meanings
+	 * swapped. The real cross-linking will be done during register,
+	 * when all the krings will have been created.
+	 */
+	na->num_rx_rings = hwna->num_tx_rings;
+	na->num_tx_rings = hwna->num_rx_rings;
+	na->num_tx_desc = hwna->num_rx_desc;
+	na->num_rx_desc = hwna->num_tx_desc;
+	na->nm_dtor = netmap_bwrap_dtor;
+	na->nm_register = netmap_bwrap_register;
+	// na->nm_txsync = netmap_bwrap_txsync;
+	// na->nm_rxsync = netmap_bwrap_rxsync;
+	na->nm_config = netmap_bwrap_config;
+	na->nm_krings_create = netmap_bwrap_krings_create;
+	na->nm_krings_delete = netmap_bwrap_krings_delete;
+	na->nm_notify = netmap_bwrap_notify;
+	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
+	na->pdev = hwna->pdev;
+	na->nm_mem = netmap_mem_private_new(na->name,
+			na->num_tx_rings, na->num_tx_desc,
+			na->num_rx_rings, na->num_rx_desc,
+			0, 0, &error);
+	na->na_flags |= NAF_MEM_OWNER;
+	if (na->nm_mem == NULL)
+		goto err_put;
+	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
+
+	bna->hwna = hwna;
+	netmap_adapter_get(hwna);
+	hwna->na_private = bna; /* weak reference */
+	hwna->na_vp = &bna->up;
+
+	if (hwna->na_flags & NAF_HOST_RINGS) {
+		if (hwna->na_flags & NAF_SW_ONLY)
+			na->na_flags |= NAF_SW_ONLY;
+		na->na_flags |= NAF_HOST_RINGS;
+		hostna = &bna->host.up;
+		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
+		hostna->ifp = hwna->ifp;
+		hostna->num_tx_rings = 1;
+		hostna->num_tx_desc = hwna->num_rx_desc;
+		hostna->num_rx_rings = 1;
+		hostna->num_rx_desc = hwna->num_tx_desc;
+		// hostna->nm_txsync = netmap_bwrap_host_txsync;
+		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
+		hostna->nm_notify = netmap_bwrap_host_notify;
+		hostna->nm_mem = na->nm_mem;
+		hostna->na_private = bna;
+		hostna->na_vp = &bna->up;
+		na->na_hostvp = hwna->na_hostvp =
+			hostna->na_hostvp = &bna->host;
+		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
+	}
+
+	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
+		na->name, ifp->if_xname,
+		na->num_tx_rings, na->num_tx_desc,
+		na->num_rx_rings, na->num_rx_desc);
+
+	error = netmap_attach_common(na);
+	if (error) {
+		goto err_free;
+	}
+	/* make bwrap ifp point to the real ifp
+	 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
+	 * as a request to make the ifp point to the na. Since we
+	 * do not want to change the na already pointed to by hwna->ifp,
+	 * the following assignment has to be delayed until now
+	 */
+	na->ifp = hwna->ifp;
+	hwna->na_flags |= NAF_BUSY;
+	/* make hwna point to the allocator we are actually using,
+	 * so that monitors will be able to find it
+	 */
+	bna->save_nmd = hwna->nm_mem;
+	hwna->nm_mem = na->nm_mem;
+	return 0;
+
+err_free:
+	netmap_mem_private_delete(na->nm_mem);
+err_put:
+	hwna->na_vp = hwna->na_hostvp = NULL;
+	netmap_adapter_put(hwna);
+	free(bna, M_DEVBUF);
+	return error;
+
+}
+
+
+void
+netmap_init_bridges(void)
+{
+	int i;
+	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
+	for (i = 0; i < NM_BRIDGES; i++)
+		BDG_RWINIT(&nm_bridges[i]);
+}
+#endif /* WITH_VALE */


Property changes on: trunk/sys/dev/netmap/netmap_vale.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/null/null.c
===================================================================
--- trunk/sys/dev/null/null.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/null/null.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -2,7 +2,6 @@
 /*-
  * Copyright (c) 2000 Mark R. V. Murray & Jeroen C. van Gelderen
  * Copyright (c) 2001-2004 Mark R. V. Murray
- * Copyright (c) 2014 Eitan Adler
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/null/null.c 291215 2015-11-23 18:00:55Z smh $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -38,7 +37,6 @@
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
-#include <sys/priv.h>
 #include <sys/disk.h>
 #include <sys/bus.h>
 #include <sys/filio.h>
@@ -49,9 +47,7 @@
 /* For use with destroy_dev(9). */
 static struct cdev *null_dev;
 static struct cdev *zero_dev;
-static struct cdev *full_dev;
 
-static d_write_t full_write;
 static d_write_t null_write;
 static d_ioctl_t null_ioctl;
 static d_ioctl_t zero_ioctl;
@@ -74,25 +70,8 @@
 	.d_flags =	D_MMAP_ANON,
 };
 
-static struct cdevsw full_cdevsw = {
-	.d_version =	D_VERSION,
-	.d_read =	zero_read,
-	.d_write =	full_write,
-	.d_ioctl =	zero_ioctl,
-	.d_name =	"full",
-};
-
-
 /* ARGSUSED */
 static int
-full_write(struct cdev *dev __unused, struct uio *uio, int flags __unused)
-{
-
-	return (ENOSPC);
-}
-
-/* ARGSUSED */
-static int
 null_write(struct cdev *dev __unused, struct uio *uio, int flags __unused)
 {
 	uio->uio_resid = 0;
@@ -110,9 +89,7 @@
 
 	switch (cmd) {
 	case DIOCSKERNELDUMP:
-		error = priv_check(td, PRIV_SETDUMPER);
-		if (error == 0)
-			error = set_dumper(NULL);
+		error = set_dumper(NULL, NULL, td);
 		break;
 	case FIONBIO:
 		break;
@@ -176,9 +153,7 @@
 	switch(type) {
 	case MOD_LOAD:
 		if (bootverbose)
-			printf("null: <full device, null device, zero device>\n");
-		full_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &full_cdevsw, 0,
-		    NULL, UID_ROOT, GID_WHEEL, 0666, "full");
+			printf("null: <null device, zero device>\n");
 		null_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &null_cdevsw, 0,
 		    NULL, UID_ROOT, GID_WHEEL, 0666, "null");
 		zero_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &zero_cdevsw, 0,
@@ -186,7 +161,6 @@
 		break;
 
 	case MOD_UNLOAD:
-		destroy_dev(full_dev);
 		destroy_dev(null_dev);
 		destroy_dev(zero_dev);
 		break;

Modified: trunk/sys/dev/nvram2env/nvram2env.c
===================================================================
--- trunk/sys/dev/nvram2env/nvram2env.c	2018-05-27 23:30:53 UTC (rev 10091)
+++ trunk/sys/dev/nvram2env/nvram2env.c	2018-05-27 23:32:51 UTC (rev 10092)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 Aleksandr Rybalko.
  * All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/nvram2env/nvram2env.c 246128 2013-01-30 18:01:20Z sbz $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -303,7 +304,8 @@
 	DEVMETHOD(device_identify, 	nvram2env_identify),
 	DEVMETHOD(device_probe,		nvram2env_probe),
 	DEVMETHOD(device_attach,	nvram2env_attach),
-	{0, 0},
+
+	DEVMETHOD_END
 };
 
 static driver_t nvram2env_driver = {



More information about the Midnightbsd-cvs mailing list