[Midnightbsd-cvs] src [9570] trunk/sys/dev: sync with 9 stable

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Mon Sep 18 21:58:49 EDT 2017


Revision: 9570
          http://svnweb.midnightbsd.org/src/?rev=9570
Author:   laffer1
Date:     2017-09-18 21:58:48 -0400 (Mon, 18 Sep 2017)
Log Message:
-----------
sync with 9 stable

Modified Paths:
--------------
    trunk/sys/dev/nvd/nvd.c
    trunk/sys/dev/nvme/nvme.c
    trunk/sys/dev/nvme/nvme.h
    trunk/sys/dev/nvme/nvme_ctrlr.c
    trunk/sys/dev/nvme/nvme_ctrlr_cmd.c
    trunk/sys/dev/nvme/nvme_ns.c
    trunk/sys/dev/nvme/nvme_ns_cmd.c
    trunk/sys/dev/nvme/nvme_private.h
    trunk/sys/dev/nvme/nvme_qpair.c
    trunk/sys/dev/nvme/nvme_sysctl.c
    trunk/sys/dev/nvme/nvme_test.c
    trunk/sys/dev/nvme/nvme_util.c

Modified: trunk/sys/dev/nvd/nvd.c
===================================================================
--- trunk/sys/dev/nvd/nvd.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvd/nvd.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -187,17 +187,6 @@
 
 	atomic_add_int(&ndisk->cur_depth, -1);
 
-	/*
-	 * TODO: add more extensive translation of NVMe status codes
-	 *  to different bio error codes (i.e. EIO, EINVAL, etc.)
-	 */
-	if (nvme_completion_is_error(cpl)) {
-		bp->bio_error = EIO;
-		bp->bio_flags |= BIO_ERROR;
-		bp->bio_resid = bp->bio_bcount;
-	} else
-		bp->bio_resid = 0;
-
 	biodone(bp);
 }
 

Modified: trunk/sys/dev/nvme/nvme.c
===================================================================
--- trunk/sys/dev/nvme/nvme.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -1,6 +1,6 @@
 /* $MidnightBSD$ */
 /*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2014 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme.c 253631 2013-07-24 22:48:29Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme.c 265565 2014-05-07 16:47:58Z jimharris $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -158,8 +158,6 @@
 {
 	device_t		*devlist;
 	struct nvme_controller	*ctrlr;
-	union cc_register	cc;
-	union csts_register	csts;
 	int			dev, devcount;
 
 	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
@@ -166,22 +164,8 @@
 		return;
 
 	for (dev = 0; dev < devcount; dev++) {
-		/*
-		 * Only notify controller of shutdown when a real shutdown is
-		 *  in process, not when a module unload occurs.  It seems at
-		 *  least some controllers (Chatham at least) don't let you
-		 *  re-enable the controller after shutdown notification has
-		 *  been received.
-		 */
 		ctrlr = DEVICE2SOFTC(devlist[dev]);
-		cc.raw = nvme_mmio_read_4(ctrlr, cc);
-		cc.bits.shn = NVME_SHN_NORMAL;
-		nvme_mmio_write_4(ctrlr, cc, cc.raw);
-		csts.raw = nvme_mmio_read_4(ctrlr, csts);
-		while (csts.bits.shst != NVME_SHST_COMPLETE) {
-			DELAY(5);
-			csts.raw = nvme_mmio_read_4(ctrlr, csts);
-		}
+		nvme_ctrlr_shutdown(ctrlr);
 	}
 
 	free(devlist, M_TEMP);
@@ -238,8 +222,10 @@
 
 	status = nvme_ctrlr_construct(ctrlr, dev);
 
-	if (status != 0)
+	if (status != 0) {
+		nvme_ctrlr_destruct(ctrlr, dev);
 		return (status);
+	}
 
 	/*
 	 * Reset controller twice to ensure we do a transition from cc.en==1
@@ -247,12 +233,16 @@
 	 *  the controller was left in when boot handed off to OS.
 	 */
 	status = nvme_ctrlr_hw_reset(ctrlr);
-	if (status != 0)
+	if (status != 0) {
+		nvme_ctrlr_destruct(ctrlr, dev);
 		return (status);
+	}
 
 	status = nvme_ctrlr_hw_reset(ctrlr);
-	if (status != 0)
+	if (status != 0) {
+		nvme_ctrlr_destruct(ctrlr, dev);
 		return (status);
+	}
 
 	nvme_sysctl_initialize_ctrlr(ctrlr);
 
@@ -277,30 +267,75 @@
 }
 
 static void
-nvme_notify_consumer(struct nvme_consumer *cons)
+nvme_notify(struct nvme_consumer *cons,
+	    struct nvme_controller *ctrlr)
 {
-	device_t		*devlist;
-	struct nvme_controller	*ctrlr;
 	struct nvme_namespace	*ns;
 	void			*ctrlr_cookie;
-	int			dev_idx, ns_idx, devcount;
+	int			cmpset, ns_idx;
 
+	/*
+	 * The consumer may register itself after the nvme devices
+	 *  have registered with the kernel, but before the
+	 *  driver has completed initialization.  In that case,
+	 *  return here, and when initialization completes, the
+	 *  controller will make sure the consumer gets notified.
+	 */
+	if (!ctrlr->is_initialized)
+		return;
+
+	cmpset = atomic_cmpset_32(&ctrlr->notification_sent, 0, 1);
+
+	if (cmpset == 0)
+		return;
+
+	if (cons->ctrlr_fn != NULL)
+		ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr);
+	else
+		ctrlr_cookie = NULL;
+	ctrlr->cons_cookie[cons->id] = ctrlr_cookie;
+	if (ctrlr->is_failed) {
+		if (cons->fail_fn != NULL)
+			(*cons->fail_fn)(ctrlr_cookie);
+		/*
+		 * Do not notify consumers about the namespaces of a
+		 *  failed controller.
+		 */
+		return;
+	}
+	for (ns_idx = 0; ns_idx < ctrlr->cdata.nn; ns_idx++) {
+		ns = &ctrlr->ns[ns_idx];
+		if (cons->ns_fn != NULL)
+			ns->cons_cookie[cons->id] =
+			    (*cons->ns_fn)(ns, ctrlr_cookie);
+	}
+}
+
+void
+nvme_notify_new_controller(struct nvme_controller *ctrlr)
+{
+	int i;
+
+	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
+		if (nvme_consumer[i].id != INVALID_CONSUMER_ID) {
+			nvme_notify(&nvme_consumer[i], ctrlr);
+		}
+	}
+}
+
+static void
+nvme_notify_new_consumer(struct nvme_consumer *cons)
+{
+	device_t		*devlist;
+	struct nvme_controller	*ctrlr;
+	int			dev_idx, devcount;
+
 	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
 		return;
 
 	for (dev_idx = 0; dev_idx < devcount; dev_idx++) {
 		ctrlr = DEVICE2SOFTC(devlist[dev_idx]);
-		if (cons->ctrlr_fn != NULL)
-			ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr);
-		else
-			ctrlr_cookie = NULL;
-		ctrlr->cons_cookie[cons->id] = ctrlr_cookie;
-		for (ns_idx = 0; ns_idx < ctrlr->cdata.nn; ns_idx++) {
-			ns = &ctrlr->ns[ns_idx];
-			if (cons->ns_fn != NULL)
-				ns->cons_cookie[cons->id] =
-				    (*cons->ns_fn)(ns, ctrlr_cookie);
-		}
+		nvme_notify(cons, ctrlr);
 	}
 
 	free(devlist, M_TEMP);
@@ -355,7 +390,7 @@
 			nvme_consumer[i].async_fn = async_fn;
 			nvme_consumer[i].fail_fn = fail_fn;
 
-			nvme_notify_consumer(&nvme_consumer[i]);
+			nvme_notify_new_consumer(&nvme_consumer[i]);
 			return (&nvme_consumer[i]);
 		}
 

Modified: trunk/sys/dev/nvme/nvme.h
===================================================================
--- trunk/sys/dev/nvme/nvme.h	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme.h	2017-09-19 01:58:48 UTC (rev 9570)
@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: release/9.2.0/sys/dev/nvme/nvme.h 253631 2013-07-24 22:48:29Z jimharris $
+ * $FreeBSD: stable/9/sys/dev/nvme/nvme.h 263273 2014-03-17 21:42:31Z jimharris $
  */
 
 #ifndef __NVME_H__
@@ -171,27 +171,30 @@
 	union cap_lo_register	cap_lo;
 	union cap_hi_register	cap_hi;
 
-	uint32_t	vs;		/* version */
-	uint32_t	intms;		/* interrupt mask set */
-	uint32_t	intmc;		/* interrupt mask clear */
+	uint32_t		vs;	/* version */
+	uint32_t		intms;	/* interrupt mask set */
+	uint32_t		intmc;	/* interrupt mask clear */
 
 	/** controller configuration */
 	union cc_register	cc;
 
-	uint32_t	reserved1;
-	uint32_t	csts;		/* controller status */
-	uint32_t	reserved2;
+	uint32_t		reserved1;
 
+	/** controller status */
+	union csts_register	csts;
+
+	uint32_t		reserved2;
+
 	/** admin queue attributes */
 	union aqa_register	aqa;
 
-	uint64_t	asq;		/* admin submission queue base addr */
-	uint64_t	acq;		/* admin completion queue base addr */
-	uint32_t	reserved3[0x3f2];
+	uint64_t		asq;	/* admin submission queue base addr */
+	uint64_t		acq;	/* admin completion queue base addr */
+	uint32_t		reserved3[0x3f2];
 
 	struct {
-	    uint32_t	sq_tdbl;	/* submission queue tail doorbell */
-	    uint32_t	cq_hdbl;	/* completion queue head doorbell */
+	    uint32_t		sq_tdbl; /* submission queue tail doorbell */
+	    uint32_t		cq_hdbl; /* completion queue head doorbell */
 	} doorbell[1] __packed;
 } __packed;
 
@@ -533,7 +536,7 @@
 	uint8_t			reserved6[1024];
 
 	/* bytes 3072-4095: vendor specific */
-	uint8_t			reserved7[1024];
+	uint8_t			vs[1024];
 } __packed __aligned(4);
 
 struct nvme_namespace_data {
@@ -718,7 +721,7 @@
 	uint32_t		time;	/* in seconds */
 	uint32_t		num_threads;
 	uint32_t		flags;
-	uint32_t		io_completed[NVME_TEST_MAX_THREADS];
+	uint64_t		io_completed[NVME_TEST_MAX_THREADS];
 };
 
 enum nvme_io_test_flags {

Modified: trunk/sys/dev/nvme/nvme_ctrlr.c
===================================================================
--- trunk/sys/dev/nvme/nvme_ctrlr.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_ctrlr.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -1,6 +1,6 @@
 /* $MidnightBSD$ */
 /*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2014 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_ctrlr.c 253627 2013-07-24 22:42:00Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_ctrlr.c 265566 2014-05-07 16:48:43Z jimharris $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -182,8 +182,8 @@
 	cdata->lpa.ns_smart = 1;
 	cdata->sqes.min = 6;
 	cdata->sqes.max = 6;
-	cdata->sqes.min = 4;
-	cdata->sqes.max = 4;
+	cdata->cqes.min = 4;
+	cdata->cqes.max = 4;
 	cdata->nn = 1;
 
 	/* Chatham2 doesn't support DSM command */
@@ -618,9 +618,35 @@
 }
 
 static void
+nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
+    union nvme_critical_warning_state state)
+{
+
+	if (state.bits.available_spare == 1)
+		nvme_printf(ctrlr, "available spare space below threshold\n");
+
+	if (state.bits.temperature == 1)
+		nvme_printf(ctrlr, "temperature above threshold\n");
+
+	if (state.bits.device_reliability == 1)
+		nvme_printf(ctrlr, "device reliability degraded\n");
+
+	if (state.bits.read_only == 1)
+		nvme_printf(ctrlr, "media placed in read only mode\n");
+
+	if (state.bits.volatile_memory_backup == 1)
+		nvme_printf(ctrlr, "volatile memory backup device failed\n");
+
+	if (state.bits.reserved != 0)
+		nvme_printf(ctrlr,
+		    "unknown critical warning(s): state = 0x%02x\n", state.raw);
+}
+
+static void
 nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
 {
-	struct nvme_async_event_request	*aer = arg;
+	struct nvme_async_event_request		*aer = arg;
+	struct nvme_health_information_page	*health_info;
 
 	/*
 	 * If the log page fetch for some reason completed with an error,
@@ -630,7 +656,26 @@
 	if (nvme_completion_is_error(cpl))
 		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
 		    aer->log_page_id, NULL, 0);
-	else
+	else {
+		if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+			health_info = (struct nvme_health_information_page *)
+			    aer->log_page_buffer;
+			nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+			    health_info->critical_warning);
+			/*
+			 * Critical warnings reported through the
+			 *  SMART/health log page are persistent, so
+			 *  clear the associated bits in the async event
+			 *  config so that we do not receive repeated
+			 *  notifications for the same event.
+			 */
+			aer->ctrlr->async_event_config.raw &=
+			    ~health_info->critical_warning.raw;
+			nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+			    aer->ctrlr->async_event_config, NULL, NULL);
+		}
+
+
 		/*
 		 * Pass the cpl data from the original async event completion,
 		 *  not the log page fetch.
@@ -637,6 +682,7 @@
 		 */
 		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
 		    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+	}
 
 	/*
 	 * Repost another asynchronous event request to replace the one
@@ -709,14 +755,28 @@
 static void
 nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
 {
-	union nvme_critical_warning_state	state;
+	struct nvme_completion_poll_status	status;
 	struct nvme_async_event_request		*aer;
 	uint32_t				i;
 
-	state.raw = 0xFF;
-	state.bits.reserved = 0;
-	nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, NULL, NULL);
+	ctrlr->async_event_config.raw = 0xFF;
+	ctrlr->async_event_config.bits.reserved = 0;
 
+	status.done = FALSE;
+	nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_TEMPERATURE_THRESHOLD,
+	    0, NULL, 0, nvme_completion_poll_cb, &status);
+	while (status.done == FALSE)
+		pause("nvme", 1);
+	if (nvme_completion_is_error(&status.cpl) ||
+	    (status.cpl.cdw0 & 0xFFFF) == 0xFFFF ||
+	    (status.cpl.cdw0 & 0xFFFF) == 0x0000) {
+		nvme_printf(ctrlr, "temperature threshold not supported\n");
+		ctrlr->async_event_config.bits.temperature = 0;
+	}
+
+	nvme_ctrlr_cmd_set_async_event_config(ctrlr,
+	    ctrlr->async_event_config, NULL, NULL);
+
 	/* aerl is a zero-based value, so we need to add 1 here. */
 	ctrlr->num_aers = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
 
@@ -783,16 +843,6 @@
 
 	for (i = 0; i < ctrlr->num_io_queues; i++)
 		nvme_io_qpair_enable(&ctrlr->ioq[i]);
-
-	/*
-	 * Clear software progress marker to 0, to indicate to pre-boot
-	 *  software that OS driver load was successful.
-	 *
-	 * Chatham does not support this feature.
-	 */
-	if (pci_get_devid(ctrlr->dev) != CHATHAM_PCI_ID)
-		nvme_ctrlr_cmd_set_feature(ctrlr,
-		    NVME_FEAT_SOFTWARE_PROGRESS_MARKER, 0, NULL, 0, NULL, NULL);
 }
 
 void
@@ -802,6 +852,9 @@
 
 	nvme_ctrlr_start(ctrlr);
 	config_intrhook_disestablish(&ctrlr->config_hook);
+
+	ctrlr->is_initialized = 1;
+	nvme_notify_new_controller(ctrlr);
 }
 
 static void
@@ -982,6 +1035,27 @@
 		break;
 	case NVME_PASSTHROUGH_CMD:
 		pt = (struct nvme_pt_command *)arg;
+#ifdef CHATHAM2
+		/*
+		 * Chatham IDENTIFY data is spoofed, so copy the spoofed data
+		 *  rather than issuing the command to the Chatham controller.
+		 */
+		if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID &&
+                    pt->cmd.opc == NVME_OPC_IDENTIFY) {
+			if (pt->cmd.cdw10 == 1) {
+                        	if (pt->len != sizeof(ctrlr->cdata))
+                                	return (EINVAL);
+                        	return (copyout(&ctrlr->cdata, pt->buf,
+				    pt->len));
+			} else {
+				if (pt->len != sizeof(ctrlr->ns[0].data) ||
+				    pt->cmd.nsid != 1)
+					return (EINVAL);
+				return (copyout(&ctrlr->ns[0].data, pt->buf,
+				    pt->len));
+			}
+		}
+#endif
 		return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, pt->cmd.nsid,
 		    1 /* is_user_buffer */, 1 /* is_admin_cmd */));
 	default:
@@ -1002,8 +1076,8 @@
 {
 	union cap_lo_register	cap_lo;
 	union cap_hi_register	cap_hi;
-	int			num_vectors, per_cpu_io_queues, status = 0;
-	int			timeout_period;
+	int			i, num_vectors, per_cpu_io_queues, rid;
+	int			status, timeout_period;
 
 	ctrlr->dev = dev;
 
@@ -1076,9 +1150,46 @@
 		goto intx;
 	}
 
-	if (pci_alloc_msix(dev, &num_vectors) != 0)
+	if (pci_alloc_msix(dev, &num_vectors) != 0) {
 		ctrlr->msix_enabled = 0;
+		goto intx;
+	}
 
+	/*
+	 * On earlier FreeBSD releases, there are reports that
+	 *  pci_alloc_msix() can return successfully with all vectors
+	 *  requested, but a subsequent bus_alloc_resource_any()
+	 *  for one of those vectors fails.  This issue occurs more
+	 *  readily with multiple devices using per-CPU vectors.
+	 * To workaround this issue, try to allocate the resources now,
+	 *  and fall back to INTx if we cannot allocate all of them.
+	 *  This issue cannot be reproduced on more recent versions of
+	 *  FreeBSD which have increased the maximum number of MSI-X
+	 *  vectors, but adding the workaround makes it easier for
+	 *  vendors wishing to import this driver into kernels based on
+	 *  older versions of FreeBSD.
+	 */
+	for (i = 0; i < num_vectors; i++) {
+		rid = i + 1;
+		ctrlr->msi_res[i] = bus_alloc_resource_any(ctrlr->dev,
+		    SYS_RES_IRQ, &rid, RF_ACTIVE);
+
+		if (ctrlr->msi_res[i] == NULL) {
+			ctrlr->msix_enabled = 0;
+			while (i > 0) {
+				i--;
+				bus_release_resource(ctrlr->dev,
+				    SYS_RES_IRQ,
+				    rman_get_rid(ctrlr->msi_res[i]),
+				    ctrlr->msi_res[i]);
+			}
+			pci_release_msi(dev);
+			nvme_printf(ctrlr, "could not obtain all MSI-X "
+			    "resources, reverting to intx\n");
+			break;
+		}
+	}
+
 intx:
 
 	if (!ctrlr->msix_enabled)
@@ -1091,8 +1202,8 @@
 	if (status != 0)
 		return (status);
 
-	ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
-	    "nvme%d", device_get_unit(dev));
+	ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, device_get_unit(dev),
+	    UID_ROOT, GID_WHEEL, 0600, "nvme%d", device_get_unit(dev));
 
 	if (ctrlr->cdev == NULL)
 		return (ENXIO);
@@ -1104,6 +1215,8 @@
 	taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
 
 	ctrlr->is_resetting = 0;
+	ctrlr->is_initialized = 0;
+	ctrlr->notification_sent = 0;
 	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
 
 	TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr);
@@ -1118,6 +1231,21 @@
 {
 	int				i;
 
+	/*
+	 *  Notify the controller of a shutdown, even though this is due to
+	 *   a driver unload, not a system shutdown (this path is not invoked
+	 *   during shutdown).  This ensures the controller receives a
+	 *   shutdown notification in case the system is shutdown before
+	 *   reloading the driver.
+	 *
+	 *  Chatham does not let you re-enable the controller after shutdown
+	 *   notification has been received, so do not send it in this case.
+	 *   This is OK because Chatham does not depend on the shutdown
+	 *   notification anyways.
+	 */
+	if (pci_get_devid(ctrlr->dev) != CHATHAM_PCI_ID)
+		nvme_ctrlr_shutdown(ctrlr);
+
 	nvme_ctrlr_disable(ctrlr);
 	taskqueue_free(ctrlr->taskqueue);
 
@@ -1164,6 +1292,26 @@
 }
 
 void
+nvme_ctrlr_shutdown(struct nvme_controller *ctrlr)
+{
+	union cc_register	cc;
+	union csts_register	csts;
+	int			ticks = 0;
+
+	cc.raw = nvme_mmio_read_4(ctrlr, cc);
+	cc.bits.shn = NVME_SHN_NORMAL;
+	nvme_mmio_write_4(ctrlr, cc, cc.raw);
+	csts.raw = nvme_mmio_read_4(ctrlr, csts);
+	while ((csts.bits.shst != NVME_SHST_COMPLETE) && (ticks++ < 5*hz)) {
+		pause("nvme shn", 1);
+		csts.raw = nvme_mmio_read_4(ctrlr, csts);
+	}
+	if (csts.bits.shst != NVME_SHST_COMPLETE)
+		nvme_printf(ctrlr, "did not complete shutdown within 5 seconds "
+		    "of notification\n");
+}
+
+void
 nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
     struct nvme_request *req)
 {

Modified: trunk/sys/dev/nvme/nvme_ctrlr_cmd.c
===================================================================
--- trunk/sys/dev/nvme/nvme_ctrlr_cmd.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_ctrlr_cmd.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_ctrlr_cmd.c 253296 2013-07-12 22:07:33Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_ctrlr_cmd.c 267619 2014-06-18 19:28:55Z jimharris $");
 
 #include "nvme_private.h"
 
@@ -206,7 +206,7 @@
 {
 	uint32_t cdw11;
 
-	cdw11 = ((num_queues - 1) << 16) || (num_queues - 1);
+	cdw11 = ((num_queues - 1) << 16) | (num_queues - 1);
 	nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_NUMBER_OF_QUEUES, cdw11,
 	    NULL, 0, cb_fn, cb_arg);
 }

Modified: trunk/sys/dev/nvme/nvme_ns.c
===================================================================
--- trunk/sys/dev/nvme/nvme_ns.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_ns.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_ns.c 253630 2013-07-24 22:46:27Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_ns.c 257721 2013-11-05 22:33:45Z pluknet $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
@@ -35,13 +35,32 @@
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/ioccom.h>
+#include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/proc.h>
+#include <sys/systm.h>
 
 #include <dev/pci/pcivar.h>
 
+#include <geom/geom.h>
+
 #include "nvme_private.h"
 
+static void		nvme_bio_child_inbed(struct bio *parent, int bio_error);
+static void		nvme_bio_child_done(void *arg,
+					    const struct nvme_completion *cpl);
+static uint32_t		nvme_get_num_segments(uint64_t addr, uint64_t size,
+					      uint32_t alignment);
+static void		nvme_free_child_bios(int num_bios,
+					     struct bio **child_bios);
+static struct bio **	nvme_allocate_child_bios(int num_bios);
+static struct bio **	nvme_construct_child_bios(struct bio *bp,
+						  uint32_t alignment,
+						  int *num_bios);
+static int		nvme_ns_split_bio(struct nvme_namespace *ns,
+					  struct bio *bp,
+					  uint32_t alignment);
+
 static int
 nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
     struct thread *td)
@@ -134,11 +153,7 @@
 
 static struct cdevsw nvme_ns_cdevsw = {
 	.d_version =	D_VERSION,
-#ifdef NVME_UNMAPPED_BIO_SUPPORT
-	.d_flags =	D_DISK | D_UNMAPPED_IO,
-#else
 	.d_flags =	D_DISK,
-#endif
 	.d_read =	physread,
 	.d_write =	physwrite,
 	.d_open =	nvme_ns_open,
@@ -207,18 +222,218 @@
 	if (bp->bio_driver2)
 		free(bp->bio_driver2, M_NVME);
 
+	if (nvme_completion_is_error(status)) {
+		bp->bio_flags |= BIO_ERROR;
+		if (bp->bio_error == 0)
+			bp->bio_error = EIO;
+	}
+
+	if ((bp->bio_flags & BIO_ERROR) == 0)
+		bp->bio_resid = 0;
+	else
+		bp->bio_resid = bp->bio_bcount;
+
 	bp_cb_fn(bp, status);
 }
 
+static void
+nvme_bio_child_inbed(struct bio *parent, int bio_error)
+{
+	struct nvme_completion	parent_cpl;
+	int			inbed;
+
+	if (bio_error != 0) {
+		parent->bio_flags |= BIO_ERROR;
+		parent->bio_error = bio_error;
+	}
+
+	/*
+	 * atomic_fetchadd will return value before adding 1, so we still
+	 *  must add 1 to get the updated inbed number.
+	 */
+	inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1;
+	if (inbed == parent->bio_children) {
+		bzero(&parent_cpl, sizeof(parent_cpl));
+		if (parent->bio_flags & BIO_ERROR)
+			parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR;
+		nvme_ns_bio_done(parent, &parent_cpl);
+	}
+}
+
+static void
+nvme_bio_child_done(void *arg, const struct nvme_completion *cpl)
+{
+	struct bio		*child = arg;
+	struct bio		*parent;
+	int			bio_error;
+
+	parent = child->bio_parent;
+	g_destroy_bio(child);
+	bio_error = nvme_completion_is_error(cpl) ? EIO : 0;
+	nvme_bio_child_inbed(parent, bio_error);
+}
+
+static uint32_t
+nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align)
+{
+	uint32_t	num_segs, offset, remainder;
+
+	if (align == 0)
+		return (1);
+
+	KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n"));
+
+	num_segs = size / align;
+	remainder = size & (align - 1);
+	offset = addr & (align - 1);
+	if (remainder > 0 || offset > 0)
+		num_segs += 1 + (remainder + offset - 1) / align;
+	return (num_segs);
+}
+
+static void
+nvme_free_child_bios(int num_bios, struct bio **child_bios)
+{
+	int i;
+
+	for (i = 0; i < num_bios; i++) {
+		if (child_bios[i] != NULL)
+			g_destroy_bio(child_bios[i]);
+	}
+
+	free(child_bios, M_NVME);
+}
+
+static struct bio **
+nvme_allocate_child_bios(int num_bios)
+{
+	struct bio **child_bios;
+	int err = 0, i;
+
+	child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT);
+	if (child_bios == NULL)
+		return (NULL);
+
+	for (i = 0; i < num_bios; i++) {
+		child_bios[i] = g_new_bio();
+		if (child_bios[i] == NULL)
+			err = ENOMEM;
+	}
+
+	if (err == ENOMEM) {
+		nvme_free_child_bios(num_bios, child_bios);
+		return (NULL);
+	}
+
+	return (child_bios);
+}
+
+static struct bio **
+nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios)
+{
+	struct bio	**child_bios;
+	struct bio	*child;
+	uint64_t	cur_offset;
+	caddr_t		data;
+	uint32_t	rem_bcount;
+	int		i;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+	struct vm_page	**ma;
+	uint32_t	ma_offset;
+#endif
+
+	*num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount,
+	    alignment);
+	child_bios = nvme_allocate_child_bios(*num_bios);
+	if (child_bios == NULL)
+		return (NULL);
+
+	bp->bio_children = *num_bios;
+	bp->bio_inbed = 0;
+	cur_offset = bp->bio_offset;
+	rem_bcount = bp->bio_bcount;
+	data = bp->bio_data;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+	ma_offset = bp->bio_ma_offset;
+	ma = bp->bio_ma;
+#endif
+
+	for (i = 0; i < *num_bios; i++) {
+		child = child_bios[i];
+		child->bio_parent = bp;
+		child->bio_cmd = bp->bio_cmd;
+		child->bio_offset = cur_offset;
+		child->bio_bcount = min(rem_bcount,
+		    alignment - (cur_offset & (alignment - 1)));
+		child->bio_flags = bp->bio_flags;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+		if (bp->bio_flags & BIO_UNMAPPED) {
+			child->bio_ma_offset = ma_offset;
+			child->bio_ma = ma;
+			child->bio_ma_n =
+			    nvme_get_num_segments(child->bio_ma_offset,
+				child->bio_bcount, PAGE_SIZE);
+			ma_offset = (ma_offset + child->bio_bcount) &
+			    PAGE_MASK;
+			ma += child->bio_ma_n;
+			if (ma_offset != 0)
+				ma -= 1;
+		} else
+#endif
+		{
+			child->bio_data = data;
+			data += child->bio_bcount;
+		}
+		cur_offset += child->bio_bcount;
+		rem_bcount -= child->bio_bcount;
+	}
+
+	return (child_bios);
+}
+
+static int
+nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp,
+    uint32_t alignment)
+{
+	struct bio	*child;
+	struct bio	**child_bios;
+	int		err, i, num_bios;
+
+	child_bios = nvme_construct_child_bios(bp, alignment, &num_bios);
+	if (child_bios == NULL)
+		return (ENOMEM);
+
+	for (i = 0; i < num_bios; i++) {
+		child = child_bios[i];
+		err = nvme_ns_bio_process(ns, child, nvme_bio_child_done);
+		if (err != 0) {
+			nvme_bio_child_inbed(bp, err);
+			g_destroy_bio(child);
+		}
+	}
+
+	free(child_bios, M_NVME);
+	return (0);
+}
+
 int
 nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
 	nvme_cb_fn_t cb_fn)
 {
 	struct nvme_dsm_range	*dsm_range;
+	uint32_t		num_bios;
 	int			err;
 
 	bp->bio_driver1 = cb_fn;
 
+	if (ns->stripesize > 0 &&
+	    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
+		num_bios = nvme_get_num_segments(bp->bio_offset,
+		    bp->bio_bcount, ns->stripesize);
+		if (num_bios > 1)
+			return (nvme_ns_split_bio(ns, bp, ns->stripesize));
+	}
+
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp);
@@ -278,10 +493,16 @@
     struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
+	int					unit;
 
 	ns->ctrlr = ctrlr;
 	ns->id = id;
+	ns->stripesize = 0;
 
+	if (pci_get_devid(ctrlr->dev) == 0x09538086 && ctrlr->cdata.vs[3] != 0)
+		ns->stripesize =
+		    (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size;
+
 	/*
 	 * Namespaces are reconstructed after a controller reset, so check
 	 *  to make sure we only call mtx_init once on each mtx.
@@ -334,6 +555,12 @@
 	if (ns->cdev != NULL)
 		return (0);
 
+	/*
+	 * Namespace IDs start at 1, so we need to subtract 1 to create a
+	 *  correct unit number.
+	 */
+	unit = device_get_unit(ctrlr->dev) * NVME_MAX_NAMESPACES + ns->id - 1;
+
 /*
  * MAKEDEV_ETERNAL was added in r210923, for cdevs that will never
  *  be destroyed.  This avoids refcounting on the cdev object.
@@ -341,14 +568,17 @@
  *  surprise removal nor namespace deletion.
  */
 #ifdef MAKEDEV_ETERNAL_KLD
-	ns->cdev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &nvme_ns_cdevsw, 0,
+	ns->cdev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &nvme_ns_cdevsw, unit,
 	    NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d",
 	    device_get_unit(ctrlr->dev), ns->id);
 #else
-	ns->cdev = make_dev_credf(0, &nvme_ns_cdevsw, 0,
+	ns->cdev = make_dev_credf(0, &nvme_ns_cdevsw, unit,
 	    NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d",
 	    device_get_unit(ctrlr->dev), ns->id);
 #endif
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+	ns->cdev->si_flags |= SI_UNMAPPED;
+#endif
 
 	if (ns->cdev != NULL)
 		ns->cdev->si_drv1 = ns;

Modified: trunk/sys/dev/nvme/nvme_ns_cmd.c
===================================================================
--- trunk/sys/dev/nvme/nvme_ns_cmd.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_ns_cmd.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_ns_cmd.c 253630 2013-07-24 22:46:27Z jimharris $");
+__FBSDID("$FreeBSD$");
 
 #include "nvme_private.h"
 

Modified: trunk/sys/dev/nvme/nvme_private.h
===================================================================
--- trunk/sys/dev/nvme/nvme_private.h	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_private.h	2017-09-19 01:58:48 UTC (rev 9570)
@@ -1,6 +1,6 @@
 /* $MidnightBSD$ */
 /*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2014 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: release/9.2.0/sys/dev/nvme/nvme_private.h 253297 2013-07-12 22:08:24Z jimharris $
+ * $FreeBSD: stable/9/sys/dev/nvme/nvme_private.h 265566 2014-05-07 16:48:43Z jimharris $
  */
 
 #ifndef __NVME_PRIVATE_H__
@@ -239,6 +239,7 @@
 	uint16_t			flags;
 	struct cdev			*cdev;
 	void				*cons_cookie[NVME_MAX_CONSUMERS];
+	uint32_t			stripesize;
 	struct mtx			lock;
 };
 
@@ -289,6 +290,8 @@
 	struct task		fail_req_task;
 	struct taskqueue	*taskqueue;
 
+	struct resource		*msi_res[MAXCPU + 1];
+
 	/* For shared legacy interrupt. */
 	int			rid;
 	struct resource		*res;
@@ -322,12 +325,17 @@
 
 	struct cdev			*cdev;
 
+	/** bit mask of warning types currently enabled for async events */
+	union nvme_critical_warning_state	async_event_config;
+
 	uint32_t			num_aers;
 	struct nvme_async_event_request	aer[NVME_MAX_ASYNC_EVENTS];
 
 	void				*cons_cookie[NVME_MAX_CONSUMERS];
 
-	uint32_t		is_resetting;
+	uint32_t			is_resetting;
+	uint32_t			is_initialized;
+	uint32_t			notification_sent;
 
 	boolean_t			is_failed;
 	STAILQ_HEAD(, nvme_request)	fail_req;
@@ -434,6 +442,7 @@
 
 int	nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev);
 void	nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev);
+void	nvme_ctrlr_shutdown(struct nvme_controller *ctrlr);
 int	nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr);
 void	nvme_ctrlr_reset(struct nvme_controller *ctrlr);
 /* ctrlr defined as void * to allow use with config_intrhook. */
@@ -552,5 +561,6 @@
 				    uint32_t log_page_id, void *log_page_buffer,
 				    uint32_t log_page_size);
 void	nvme_notify_fail_consumers(struct nvme_controller *ctrlr);
+void	nvme_notify_new_controller(struct nvme_controller *ctrlr);
 
 #endif /* __NVME_PRIVATE_H__ */

Modified: trunk/sys/dev/nvme/nvme_qpair.c
===================================================================
--- trunk/sys/dev/nvme/nvme_qpair.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_qpair.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -1,6 +1,6 @@
 /* $MidnightBSD$ */
 /*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2014 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_qpair.c 253296 2013-07-12 22:07:33Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_qpair.c 265566 2014-05-07 16:48:43Z jimharris $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -488,10 +488,8 @@
 		 *  the queue's vector to get the corresponding rid to use.
 		 */
 		qpair->rid = vector + 1;
+		qpair->res = ctrlr->msi_res[vector];
 
-		qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
-		    &qpair->rid, RF_ACTIVE);
-
 		bus_setup_intr(ctrlr->dev, qpair->res,
 		    INTR_TYPE_MISC | INTR_MPSAFE, NULL,
 		    nvme_qpair_msix_handler, qpair, &qpair->tag);
@@ -499,8 +497,9 @@
 
 	mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
 
+	/* Note: NVMe PRP format is restricted to 4-byte alignment. */
 	bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
-	    sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
+	    4, PAGE_SIZE, BUS_SPACE_MAXADDR,
 	    BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE,
 	    (NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0,
 	    NULL, NULL, &qpair->dma_tag);

Modified: trunk/sys/dev/nvme/nvme_sysctl.c
===================================================================
--- trunk/sys/dev/nvme/nvme_sysctl.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_sysctl.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_sysctl.c 253296 2013-07-12 22:07:33Z jimharris $");
+__FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>

Modified: trunk/sys/dev/nvme/nvme_test.c
===================================================================
--- trunk/sys/dev/nvme/nvme_test.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_test.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_test.c 253296 2013-07-12 22:07:33Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_test.c 257588 2013-11-03 20:52:13Z jimharris $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
@@ -54,7 +54,7 @@
 	void			*buf;
 	uint32_t		size;
 	uint32_t		time;
-	uint32_t		io_completed;
+	uint64_t		io_completed;
 };
 
 struct nvme_io_test_internal {
@@ -67,7 +67,7 @@
 	uint32_t		td_active;
 	uint32_t		td_idx;
 	uint32_t		flags;
-	uint32_t		io_completed[NVME_TEST_MAX_THREADS];
+	uint64_t		io_completed[NVME_TEST_MAX_THREADS];
 };
 
 static void
@@ -91,8 +91,8 @@
 	struct cdev			*dev;
 	void				*buf;
 	struct timeval			t;
-	uint64_t			offset;
-	uint32_t			idx, io_completed = 0;
+	uint64_t			io_completed = 0, offset;
+	uint32_t			idx;
 #if __FreeBSD_version >= 900017
 	int				ref;
 #endif

Modified: trunk/sys/dev/nvme/nvme_util.c
===================================================================
--- trunk/sys/dev/nvme/nvme_util.c	2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_util.c	2017-09-19 01:58:48 UTC (rev 9570)
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_util.c 253631 2013-07-24 22:48:29Z jimharris $");
+__FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <dev/nvme/nvme.h>



More information about the Midnightbsd-cvs mailing list