[Midnightbsd-cvs] src [9570] trunk/sys/dev: sync with 9 stable
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Mon Sep 18 21:58:49 EDT 2017
Revision: 9570
http://svnweb.midnightbsd.org/src/?rev=9570
Author: laffer1
Date: 2017-09-18 21:58:48 -0400 (Mon, 18 Sep 2017)
Log Message:
-----------
sync with 9 stable
Modified Paths:
--------------
trunk/sys/dev/nvd/nvd.c
trunk/sys/dev/nvme/nvme.c
trunk/sys/dev/nvme/nvme.h
trunk/sys/dev/nvme/nvme_ctrlr.c
trunk/sys/dev/nvme/nvme_ctrlr_cmd.c
trunk/sys/dev/nvme/nvme_ns.c
trunk/sys/dev/nvme/nvme_ns_cmd.c
trunk/sys/dev/nvme/nvme_private.h
trunk/sys/dev/nvme/nvme_qpair.c
trunk/sys/dev/nvme/nvme_sysctl.c
trunk/sys/dev/nvme/nvme_test.c
trunk/sys/dev/nvme/nvme_util.c
Modified: trunk/sys/dev/nvd/nvd.c
===================================================================
--- trunk/sys/dev/nvd/nvd.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvd/nvd.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -187,17 +187,6 @@
atomic_add_int(&ndisk->cur_depth, -1);
- /*
- * TODO: add more extensive translation of NVMe status codes
- * to different bio error codes (i.e. EIO, EINVAL, etc.)
- */
- if (nvme_completion_is_error(cpl)) {
- bp->bio_error = EIO;
- bp->bio_flags |= BIO_ERROR;
- bp->bio_resid = bp->bio_bcount;
- } else
- bp->bio_resid = 0;
-
biodone(bp);
}
Modified: trunk/sys/dev/nvme/nvme.c
===================================================================
--- trunk/sys/dev/nvme/nvme.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
/*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2014 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme.c 253631 2013-07-24 22:48:29Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme.c 265565 2014-05-07 16:47:58Z jimharris $");
#include <sys/param.h>
#include <sys/bus.h>
@@ -158,8 +158,6 @@
{
device_t *devlist;
struct nvme_controller *ctrlr;
- union cc_register cc;
- union csts_register csts;
int dev, devcount;
if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
@@ -166,22 +164,8 @@
return;
for (dev = 0; dev < devcount; dev++) {
- /*
- * Only notify controller of shutdown when a real shutdown is
- * in process, not when a module unload occurs. It seems at
- * least some controllers (Chatham at least) don't let you
- * re-enable the controller after shutdown notification has
- * been received.
- */
ctrlr = DEVICE2SOFTC(devlist[dev]);
- cc.raw = nvme_mmio_read_4(ctrlr, cc);
- cc.bits.shn = NVME_SHN_NORMAL;
- nvme_mmio_write_4(ctrlr, cc, cc.raw);
- csts.raw = nvme_mmio_read_4(ctrlr, csts);
- while (csts.bits.shst != NVME_SHST_COMPLETE) {
- DELAY(5);
- csts.raw = nvme_mmio_read_4(ctrlr, csts);
- }
+ nvme_ctrlr_shutdown(ctrlr);
}
free(devlist, M_TEMP);
@@ -238,8 +222,10 @@
status = nvme_ctrlr_construct(ctrlr, dev);
- if (status != 0)
+ if (status != 0) {
+ nvme_ctrlr_destruct(ctrlr, dev);
return (status);
+ }
/*
* Reset controller twice to ensure we do a transition from cc.en==1
@@ -247,12 +233,16 @@
* the controller was left in when boot handed off to OS.
*/
status = nvme_ctrlr_hw_reset(ctrlr);
- if (status != 0)
+ if (status != 0) {
+ nvme_ctrlr_destruct(ctrlr, dev);
return (status);
+ }
status = nvme_ctrlr_hw_reset(ctrlr);
- if (status != 0)
+ if (status != 0) {
+ nvme_ctrlr_destruct(ctrlr, dev);
return (status);
+ }
nvme_sysctl_initialize_ctrlr(ctrlr);
@@ -277,30 +267,75 @@
}
static void
-nvme_notify_consumer(struct nvme_consumer *cons)
+nvme_notify(struct nvme_consumer *cons,
+ struct nvme_controller *ctrlr)
{
- device_t *devlist;
- struct nvme_controller *ctrlr;
struct nvme_namespace *ns;
void *ctrlr_cookie;
- int dev_idx, ns_idx, devcount;
+ int cmpset, ns_idx;
+ /*
+ * The consumer may register itself after the nvme devices
+ * have registered with the kernel, but before the
+ * driver has completed initialization. In that case,
+ * return here, and when initialization completes, the
+ * controller will make sure the consumer gets notified.
+ */
+ if (!ctrlr->is_initialized)
+ return;
+
+ cmpset = atomic_cmpset_32(&ctrlr->notification_sent, 0, 1);
+
+ if (cmpset == 0)
+ return;
+
+ if (cons->ctrlr_fn != NULL)
+ ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr);
+ else
+ ctrlr_cookie = NULL;
+ ctrlr->cons_cookie[cons->id] = ctrlr_cookie;
+ if (ctrlr->is_failed) {
+ if (cons->fail_fn != NULL)
+ (*cons->fail_fn)(ctrlr_cookie);
+ /*
+ * Do not notify consumers about the namespaces of a
+ * failed controller.
+ */
+ return;
+ }
+ for (ns_idx = 0; ns_idx < ctrlr->cdata.nn; ns_idx++) {
+ ns = &ctrlr->ns[ns_idx];
+ if (cons->ns_fn != NULL)
+ ns->cons_cookie[cons->id] =
+ (*cons->ns_fn)(ns, ctrlr_cookie);
+ }
+}
+
+void
+nvme_notify_new_controller(struct nvme_controller *ctrlr)
+{
+ int i;
+
+ for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
+ if (nvme_consumer[i].id != INVALID_CONSUMER_ID) {
+ nvme_notify(&nvme_consumer[i], ctrlr);
+ }
+ }
+}
+
+static void
+nvme_notify_new_consumer(struct nvme_consumer *cons)
+{
+ device_t *devlist;
+ struct nvme_controller *ctrlr;
+ int dev_idx, devcount;
+
if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
return;
for (dev_idx = 0; dev_idx < devcount; dev_idx++) {
ctrlr = DEVICE2SOFTC(devlist[dev_idx]);
- if (cons->ctrlr_fn != NULL)
- ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr);
- else
- ctrlr_cookie = NULL;
- ctrlr->cons_cookie[cons->id] = ctrlr_cookie;
- for (ns_idx = 0; ns_idx < ctrlr->cdata.nn; ns_idx++) {
- ns = &ctrlr->ns[ns_idx];
- if (cons->ns_fn != NULL)
- ns->cons_cookie[cons->id] =
- (*cons->ns_fn)(ns, ctrlr_cookie);
- }
+ nvme_notify(cons, ctrlr);
}
free(devlist, M_TEMP);
@@ -355,7 +390,7 @@
nvme_consumer[i].async_fn = async_fn;
nvme_consumer[i].fail_fn = fail_fn;
- nvme_notify_consumer(&nvme_consumer[i]);
+ nvme_notify_new_consumer(&nvme_consumer[i]);
return (&nvme_consumer[i]);
}
Modified: trunk/sys/dev/nvme/nvme.h
===================================================================
--- trunk/sys/dev/nvme/nvme.h 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme.h 2017-09-19 01:58:48 UTC (rev 9570)
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: release/9.2.0/sys/dev/nvme/nvme.h 253631 2013-07-24 22:48:29Z jimharris $
+ * $FreeBSD: stable/9/sys/dev/nvme/nvme.h 263273 2014-03-17 21:42:31Z jimharris $
*/
#ifndef __NVME_H__
@@ -171,27 +171,30 @@
union cap_lo_register cap_lo;
union cap_hi_register cap_hi;
- uint32_t vs; /* version */
- uint32_t intms; /* interrupt mask set */
- uint32_t intmc; /* interrupt mask clear */
+ uint32_t vs; /* version */
+ uint32_t intms; /* interrupt mask set */
+ uint32_t intmc; /* interrupt mask clear */
/** controller configuration */
union cc_register cc;
- uint32_t reserved1;
- uint32_t csts; /* controller status */
- uint32_t reserved2;
+ uint32_t reserved1;
+ /** controller status */
+ union csts_register csts;
+
+ uint32_t reserved2;
+
/** admin queue attributes */
union aqa_register aqa;
- uint64_t asq; /* admin submission queue base addr */
- uint64_t acq; /* admin completion queue base addr */
- uint32_t reserved3[0x3f2];
+ uint64_t asq; /* admin submission queue base addr */
+ uint64_t acq; /* admin completion queue base addr */
+ uint32_t reserved3[0x3f2];
struct {
- uint32_t sq_tdbl; /* submission queue tail doorbell */
- uint32_t cq_hdbl; /* completion queue head doorbell */
+ uint32_t sq_tdbl; /* submission queue tail doorbell */
+ uint32_t cq_hdbl; /* completion queue head doorbell */
} doorbell[1] __packed;
} __packed;
@@ -533,7 +536,7 @@
uint8_t reserved6[1024];
/* bytes 3072-4095: vendor specific */
- uint8_t reserved7[1024];
+ uint8_t vs[1024];
} __packed __aligned(4);
struct nvme_namespace_data {
@@ -718,7 +721,7 @@
uint32_t time; /* in seconds */
uint32_t num_threads;
uint32_t flags;
- uint32_t io_completed[NVME_TEST_MAX_THREADS];
+ uint64_t io_completed[NVME_TEST_MAX_THREADS];
};
enum nvme_io_test_flags {
Modified: trunk/sys/dev/nvme/nvme_ctrlr.c
===================================================================
--- trunk/sys/dev/nvme/nvme_ctrlr.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_ctrlr.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
/*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2014 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_ctrlr.c 253627 2013-07-24 22:42:00Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_ctrlr.c 265566 2014-05-07 16:48:43Z jimharris $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -182,8 +182,8 @@
cdata->lpa.ns_smart = 1;
cdata->sqes.min = 6;
cdata->sqes.max = 6;
- cdata->sqes.min = 4;
- cdata->sqes.max = 4;
+ cdata->cqes.min = 4;
+ cdata->cqes.max = 4;
cdata->nn = 1;
/* Chatham2 doesn't support DSM command */
@@ -618,9 +618,35 @@
}
static void
+nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
+ union nvme_critical_warning_state state)
+{
+
+ if (state.bits.available_spare == 1)
+ nvme_printf(ctrlr, "available spare space below threshold\n");
+
+ if (state.bits.temperature == 1)
+ nvme_printf(ctrlr, "temperature above threshold\n");
+
+ if (state.bits.device_reliability == 1)
+ nvme_printf(ctrlr, "device reliability degraded\n");
+
+ if (state.bits.read_only == 1)
+ nvme_printf(ctrlr, "media placed in read only mode\n");
+
+ if (state.bits.volatile_memory_backup == 1)
+ nvme_printf(ctrlr, "volatile memory backup device failed\n");
+
+ if (state.bits.reserved != 0)
+ nvme_printf(ctrlr,
+ "unknown critical warning(s): state = 0x%02x\n", state.raw);
+}
+
+static void
nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
{
- struct nvme_async_event_request *aer = arg;
+ struct nvme_async_event_request *aer = arg;
+ struct nvme_health_information_page *health_info;
/*
* If the log page fetch for some reason completed with an error,
@@ -630,7 +656,26 @@
if (nvme_completion_is_error(cpl))
nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
aer->log_page_id, NULL, 0);
- else
+ else {
+ if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+ health_info = (struct nvme_health_information_page *)
+ aer->log_page_buffer;
+ nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+ health_info->critical_warning);
+ /*
+ * Critical warnings reported through the
+ * SMART/health log page are persistent, so
+ * clear the associated bits in the async event
+ * config so that we do not receive repeated
+ * notifications for the same event.
+ */
+ aer->ctrlr->async_event_config.raw &=
+ ~health_info->critical_warning.raw;
+ nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+ aer->ctrlr->async_event_config, NULL, NULL);
+ }
+
+
/*
* Pass the cpl data from the original async event completion,
* not the log page fetch.
@@ -637,6 +682,7 @@
*/
nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+ }
/*
* Repost another asynchronous event request to replace the one
@@ -709,14 +755,28 @@
static void
nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
{
- union nvme_critical_warning_state state;
+ struct nvme_completion_poll_status status;
struct nvme_async_event_request *aer;
uint32_t i;
- state.raw = 0xFF;
- state.bits.reserved = 0;
- nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, NULL, NULL);
+ ctrlr->async_event_config.raw = 0xFF;
+ ctrlr->async_event_config.bits.reserved = 0;
+ status.done = FALSE;
+ nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_TEMPERATURE_THRESHOLD,
+ 0, NULL, 0, nvme_completion_poll_cb, &status);
+ while (status.done == FALSE)
+ pause("nvme", 1);
+ if (nvme_completion_is_error(&status.cpl) ||
+ (status.cpl.cdw0 & 0xFFFF) == 0xFFFF ||
+ (status.cpl.cdw0 & 0xFFFF) == 0x0000) {
+ nvme_printf(ctrlr, "temperature threshold not supported\n");
+ ctrlr->async_event_config.bits.temperature = 0;
+ }
+
+ nvme_ctrlr_cmd_set_async_event_config(ctrlr,
+ ctrlr->async_event_config, NULL, NULL);
+
/* aerl is a zero-based value, so we need to add 1 here. */
ctrlr->num_aers = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
@@ -783,16 +843,6 @@
for (i = 0; i < ctrlr->num_io_queues; i++)
nvme_io_qpair_enable(&ctrlr->ioq[i]);
-
- /*
- * Clear software progress marker to 0, to indicate to pre-boot
- * software that OS driver load was successful.
- *
- * Chatham does not support this feature.
- */
- if (pci_get_devid(ctrlr->dev) != CHATHAM_PCI_ID)
- nvme_ctrlr_cmd_set_feature(ctrlr,
- NVME_FEAT_SOFTWARE_PROGRESS_MARKER, 0, NULL, 0, NULL, NULL);
}
void
@@ -802,6 +852,9 @@
nvme_ctrlr_start(ctrlr);
config_intrhook_disestablish(&ctrlr->config_hook);
+
+ ctrlr->is_initialized = 1;
+ nvme_notify_new_controller(ctrlr);
}
static void
@@ -982,6 +1035,27 @@
break;
case NVME_PASSTHROUGH_CMD:
pt = (struct nvme_pt_command *)arg;
+#ifdef CHATHAM2
+ /*
+ * Chatham IDENTIFY data is spoofed, so copy the spoofed data
+ * rather than issuing the command to the Chatham controller.
+ */
+ if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID &&
+ pt->cmd.opc == NVME_OPC_IDENTIFY) {
+ if (pt->cmd.cdw10 == 1) {
+ if (pt->len != sizeof(ctrlr->cdata))
+ return (EINVAL);
+ return (copyout(&ctrlr->cdata, pt->buf,
+ pt->len));
+ } else {
+ if (pt->len != sizeof(ctrlr->ns[0].data) ||
+ pt->cmd.nsid != 1)
+ return (EINVAL);
+ return (copyout(&ctrlr->ns[0].data, pt->buf,
+ pt->len));
+ }
+ }
+#endif
return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, pt->cmd.nsid,
1 /* is_user_buffer */, 1 /* is_admin_cmd */));
default:
@@ -1002,8 +1076,8 @@
{
union cap_lo_register cap_lo;
union cap_hi_register cap_hi;
- int num_vectors, per_cpu_io_queues, status = 0;
- int timeout_period;
+ int i, num_vectors, per_cpu_io_queues, rid;
+ int status, timeout_period;
ctrlr->dev = dev;
@@ -1076,9 +1150,46 @@
goto intx;
}
- if (pci_alloc_msix(dev, &num_vectors) != 0)
+ if (pci_alloc_msix(dev, &num_vectors) != 0) {
ctrlr->msix_enabled = 0;
+ goto intx;
+ }
+ /*
+ * On earlier FreeBSD releases, there are reports that
+ * pci_alloc_msix() can return successfully with all vectors
+ * requested, but a subsequent bus_alloc_resource_any()
+ * for one of those vectors fails. This issue occurs more
+ * readily with multiple devices using per-CPU vectors.
+ * To workaround this issue, try to allocate the resources now,
+ * and fall back to INTx if we cannot allocate all of them.
+ * This issue cannot be reproduced on more recent versions of
+ * FreeBSD which have increased the maximum number of MSI-X
+ * vectors, but adding the workaround makes it easier for
+ * vendors wishing to import this driver into kernels based on
+ * older versions of FreeBSD.
+ */
+ for (i = 0; i < num_vectors; i++) {
+ rid = i + 1;
+ ctrlr->msi_res[i] = bus_alloc_resource_any(ctrlr->dev,
+ SYS_RES_IRQ, &rid, RF_ACTIVE);
+
+ if (ctrlr->msi_res[i] == NULL) {
+ ctrlr->msix_enabled = 0;
+ while (i > 0) {
+ i--;
+ bus_release_resource(ctrlr->dev,
+ SYS_RES_IRQ,
+ rman_get_rid(ctrlr->msi_res[i]),
+ ctrlr->msi_res[i]);
+ }
+ pci_release_msi(dev);
+ nvme_printf(ctrlr, "could not obtain all MSI-X "
+ "resources, reverting to intx\n");
+ break;
+ }
+ }
+
intx:
if (!ctrlr->msix_enabled)
@@ -1091,8 +1202,8 @@
if (status != 0)
return (status);
- ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
- "nvme%d", device_get_unit(dev));
+ ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, device_get_unit(dev),
+ UID_ROOT, GID_WHEEL, 0600, "nvme%d", device_get_unit(dev));
if (ctrlr->cdev == NULL)
return (ENXIO);
@@ -1104,6 +1215,8 @@
taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
ctrlr->is_resetting = 0;
+ ctrlr->is_initialized = 0;
+ ctrlr->notification_sent = 0;
TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr);
@@ -1118,6 +1231,21 @@
{
int i;
+ /*
+ * Notify the controller of a shutdown, even though this is due to
+ * a driver unload, not a system shutdown (this path is not invoked
+ * during shutdown). This ensures the controller receives a
+ * shutdown notification in case the system is shutdown before
+ * reloading the driver.
+ *
+ * Chatham does not let you re-enable the controller after shutdown
+ * notification has been received, so do not send it in this case.
+ * This is OK because Chatham does not depend on the shutdown
+ * notification anyways.
+ */
+ if (pci_get_devid(ctrlr->dev) != CHATHAM_PCI_ID)
+ nvme_ctrlr_shutdown(ctrlr);
+
nvme_ctrlr_disable(ctrlr);
taskqueue_free(ctrlr->taskqueue);
@@ -1164,6 +1292,26 @@
}
void
+nvme_ctrlr_shutdown(struct nvme_controller *ctrlr)
+{
+ union cc_register cc;
+ union csts_register csts;
+ int ticks = 0;
+
+ cc.raw = nvme_mmio_read_4(ctrlr, cc);
+ cc.bits.shn = NVME_SHN_NORMAL;
+ nvme_mmio_write_4(ctrlr, cc, cc.raw);
+ csts.raw = nvme_mmio_read_4(ctrlr, csts);
+ while ((csts.bits.shst != NVME_SHST_COMPLETE) && (ticks++ < 5*hz)) {
+ pause("nvme shn", 1);
+ csts.raw = nvme_mmio_read_4(ctrlr, csts);
+ }
+ if (csts.bits.shst != NVME_SHST_COMPLETE)
+ nvme_printf(ctrlr, "did not complete shutdown within 5 seconds "
+ "of notification\n");
+}
+
+void
nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
struct nvme_request *req)
{
Modified: trunk/sys/dev/nvme/nvme_ctrlr_cmd.c
===================================================================
--- trunk/sys/dev/nvme/nvme_ctrlr_cmd.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_ctrlr_cmd.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_ctrlr_cmd.c 253296 2013-07-12 22:07:33Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_ctrlr_cmd.c 267619 2014-06-18 19:28:55Z jimharris $");
#include "nvme_private.h"
@@ -206,7 +206,7 @@
{
uint32_t cdw11;
- cdw11 = ((num_queues - 1) << 16) || (num_queues - 1);
+ cdw11 = ((num_queues - 1) << 16) | (num_queues - 1);
nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_NUMBER_OF_QUEUES, cdw11,
NULL, 0, cb_fn, cb_arg);
}
Modified: trunk/sys/dev/nvme/nvme_ns.c
===================================================================
--- trunk/sys/dev/nvme/nvme_ns.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_ns.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_ns.c 253630 2013-07-24 22:46:27Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_ns.c 257721 2013-11-05 22:33:45Z pluknet $");
#include <sys/param.h>
#include <sys/bio.h>
@@ -35,13 +35,32 @@
#include <sys/disk.h>
#include <sys/fcntl.h>
#include <sys/ioccom.h>
+#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/proc.h>
+#include <sys/systm.h>
#include <dev/pci/pcivar.h>
+#include <geom/geom.h>
+
#include "nvme_private.h"
+static void nvme_bio_child_inbed(struct bio *parent, int bio_error);
+static void nvme_bio_child_done(void *arg,
+ const struct nvme_completion *cpl);
+static uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size,
+ uint32_t alignment);
+static void nvme_free_child_bios(int num_bios,
+ struct bio **child_bios);
+static struct bio ** nvme_allocate_child_bios(int num_bios);
+static struct bio ** nvme_construct_child_bios(struct bio *bp,
+ uint32_t alignment,
+ int *num_bios);
+static int nvme_ns_split_bio(struct nvme_namespace *ns,
+ struct bio *bp,
+ uint32_t alignment);
+
static int
nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
struct thread *td)
@@ -134,11 +153,7 @@
static struct cdevsw nvme_ns_cdevsw = {
.d_version = D_VERSION,
-#ifdef NVME_UNMAPPED_BIO_SUPPORT
- .d_flags = D_DISK | D_UNMAPPED_IO,
-#else
.d_flags = D_DISK,
-#endif
.d_read = physread,
.d_write = physwrite,
.d_open = nvme_ns_open,
@@ -207,18 +222,218 @@
if (bp->bio_driver2)
free(bp->bio_driver2, M_NVME);
+ if (nvme_completion_is_error(status)) {
+ bp->bio_flags |= BIO_ERROR;
+ if (bp->bio_error == 0)
+ bp->bio_error = EIO;
+ }
+
+ if ((bp->bio_flags & BIO_ERROR) == 0)
+ bp->bio_resid = 0;
+ else
+ bp->bio_resid = bp->bio_bcount;
+
bp_cb_fn(bp, status);
}
+static void
+nvme_bio_child_inbed(struct bio *parent, int bio_error)
+{
+ struct nvme_completion parent_cpl;
+ int inbed;
+
+ if (bio_error != 0) {
+ parent->bio_flags |= BIO_ERROR;
+ parent->bio_error = bio_error;
+ }
+
+ /*
+ * atomic_fetchadd will return value before adding 1, so we still
+ * must add 1 to get the updated inbed number.
+ */
+ inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1;
+ if (inbed == parent->bio_children) {
+ bzero(&parent_cpl, sizeof(parent_cpl));
+ if (parent->bio_flags & BIO_ERROR)
+ parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR;
+ nvme_ns_bio_done(parent, &parent_cpl);
+ }
+}
+
+static void
+nvme_bio_child_done(void *arg, const struct nvme_completion *cpl)
+{
+ struct bio *child = arg;
+ struct bio *parent;
+ int bio_error;
+
+ parent = child->bio_parent;
+ g_destroy_bio(child);
+ bio_error = nvme_completion_is_error(cpl) ? EIO : 0;
+ nvme_bio_child_inbed(parent, bio_error);
+}
+
+static uint32_t
+nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align)
+{
+ uint32_t num_segs, offset, remainder;
+
+ if (align == 0)
+ return (1);
+
+ KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n"));
+
+ num_segs = size / align;
+ remainder = size & (align - 1);
+ offset = addr & (align - 1);
+ if (remainder > 0 || offset > 0)
+ num_segs += 1 + (remainder + offset - 1) / align;
+ return (num_segs);
+}
+
+static void
+nvme_free_child_bios(int num_bios, struct bio **child_bios)
+{
+ int i;
+
+ for (i = 0; i < num_bios; i++) {
+ if (child_bios[i] != NULL)
+ g_destroy_bio(child_bios[i]);
+ }
+
+ free(child_bios, M_NVME);
+}
+
+static struct bio **
+nvme_allocate_child_bios(int num_bios)
+{
+ struct bio **child_bios;
+ int err = 0, i;
+
+ child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT);
+ if (child_bios == NULL)
+ return (NULL);
+
+ for (i = 0; i < num_bios; i++) {
+ child_bios[i] = g_new_bio();
+ if (child_bios[i] == NULL)
+ err = ENOMEM;
+ }
+
+ if (err == ENOMEM) {
+ nvme_free_child_bios(num_bios, child_bios);
+ return (NULL);
+ }
+
+ return (child_bios);
+}
+
+static struct bio **
+nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios)
+{
+ struct bio **child_bios;
+ struct bio *child;
+ uint64_t cur_offset;
+ caddr_t data;
+ uint32_t rem_bcount;
+ int i;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+ struct vm_page **ma;
+ uint32_t ma_offset;
+#endif
+
+ *num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount,
+ alignment);
+ child_bios = nvme_allocate_child_bios(*num_bios);
+ if (child_bios == NULL)
+ return (NULL);
+
+ bp->bio_children = *num_bios;
+ bp->bio_inbed = 0;
+ cur_offset = bp->bio_offset;
+ rem_bcount = bp->bio_bcount;
+ data = bp->bio_data;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+ ma_offset = bp->bio_ma_offset;
+ ma = bp->bio_ma;
+#endif
+
+ for (i = 0; i < *num_bios; i++) {
+ child = child_bios[i];
+ child->bio_parent = bp;
+ child->bio_cmd = bp->bio_cmd;
+ child->bio_offset = cur_offset;
+ child->bio_bcount = min(rem_bcount,
+ alignment - (cur_offset & (alignment - 1)));
+ child->bio_flags = bp->bio_flags;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+ if (bp->bio_flags & BIO_UNMAPPED) {
+ child->bio_ma_offset = ma_offset;
+ child->bio_ma = ma;
+ child->bio_ma_n =
+ nvme_get_num_segments(child->bio_ma_offset,
+ child->bio_bcount, PAGE_SIZE);
+ ma_offset = (ma_offset + child->bio_bcount) &
+ PAGE_MASK;
+ ma += child->bio_ma_n;
+ if (ma_offset != 0)
+ ma -= 1;
+ } else
+#endif
+ {
+ child->bio_data = data;
+ data += child->bio_bcount;
+ }
+ cur_offset += child->bio_bcount;
+ rem_bcount -= child->bio_bcount;
+ }
+
+ return (child_bios);
+}
+
+static int
+nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp,
+ uint32_t alignment)
+{
+ struct bio *child;
+ struct bio **child_bios;
+ int err, i, num_bios;
+
+ child_bios = nvme_construct_child_bios(bp, alignment, &num_bios);
+ if (child_bios == NULL)
+ return (ENOMEM);
+
+ for (i = 0; i < num_bios; i++) {
+ child = child_bios[i];
+ err = nvme_ns_bio_process(ns, child, nvme_bio_child_done);
+ if (err != 0) {
+ nvme_bio_child_inbed(bp, err);
+ g_destroy_bio(child);
+ }
+ }
+
+ free(child_bios, M_NVME);
+ return (0);
+}
+
int
nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
nvme_cb_fn_t cb_fn)
{
struct nvme_dsm_range *dsm_range;
+ uint32_t num_bios;
int err;
bp->bio_driver1 = cb_fn;
+ if (ns->stripesize > 0 &&
+ (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
+ num_bios = nvme_get_num_segments(bp->bio_offset,
+ bp->bio_bcount, ns->stripesize);
+ if (num_bios > 1)
+ return (nvme_ns_split_bio(ns, bp, ns->stripesize));
+ }
+
switch (bp->bio_cmd) {
case BIO_READ:
err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp);
@@ -278,10 +493,16 @@
struct nvme_controller *ctrlr)
{
struct nvme_completion_poll_status status;
+ int unit;
ns->ctrlr = ctrlr;
ns->id = id;
+ ns->stripesize = 0;
+ if (pci_get_devid(ctrlr->dev) == 0x09538086 && ctrlr->cdata.vs[3] != 0)
+ ns->stripesize =
+ (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size;
+
/*
* Namespaces are reconstructed after a controller reset, so check
* to make sure we only call mtx_init once on each mtx.
@@ -334,6 +555,12 @@
if (ns->cdev != NULL)
return (0);
+ /*
+ * Namespace IDs start at 1, so we need to subtract 1 to create a
+ * correct unit number.
+ */
+ unit = device_get_unit(ctrlr->dev) * NVME_MAX_NAMESPACES + ns->id - 1;
+
/*
* MAKEDEV_ETERNAL was added in r210923, for cdevs that will never
* be destroyed. This avoids refcounting on the cdev object.
@@ -341,14 +568,17 @@
* surprise removal nor namespace deletion.
*/
#ifdef MAKEDEV_ETERNAL_KLD
- ns->cdev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &nvme_ns_cdevsw, 0,
+ ns->cdev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &nvme_ns_cdevsw, unit,
NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d",
device_get_unit(ctrlr->dev), ns->id);
#else
- ns->cdev = make_dev_credf(0, &nvme_ns_cdevsw, 0,
+ ns->cdev = make_dev_credf(0, &nvme_ns_cdevsw, unit,
NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d",
device_get_unit(ctrlr->dev), ns->id);
#endif
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+ ns->cdev->si_flags |= SI_UNMAPPED;
+#endif
if (ns->cdev != NULL)
ns->cdev->si_drv1 = ns;
Modified: trunk/sys/dev/nvme/nvme_ns_cmd.c
===================================================================
--- trunk/sys/dev/nvme/nvme_ns_cmd.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_ns_cmd.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_ns_cmd.c 253630 2013-07-24 22:46:27Z jimharris $");
+__FBSDID("$FreeBSD$");
#include "nvme_private.h"
Modified: trunk/sys/dev/nvme/nvme_private.h
===================================================================
--- trunk/sys/dev/nvme/nvme_private.h 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_private.h 2017-09-19 01:58:48 UTC (rev 9570)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
/*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2014 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: release/9.2.0/sys/dev/nvme/nvme_private.h 253297 2013-07-12 22:08:24Z jimharris $
+ * $FreeBSD: stable/9/sys/dev/nvme/nvme_private.h 265566 2014-05-07 16:48:43Z jimharris $
*/
#ifndef __NVME_PRIVATE_H__
@@ -239,6 +239,7 @@
uint16_t flags;
struct cdev *cdev;
void *cons_cookie[NVME_MAX_CONSUMERS];
+ uint32_t stripesize;
struct mtx lock;
};
@@ -289,6 +290,8 @@
struct task fail_req_task;
struct taskqueue *taskqueue;
+ struct resource *msi_res[MAXCPU + 1];
+
/* For shared legacy interrupt. */
int rid;
struct resource *res;
@@ -322,12 +325,17 @@
struct cdev *cdev;
+ /** bit mask of warning types currently enabled for async events */
+ union nvme_critical_warning_state async_event_config;
+
uint32_t num_aers;
struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS];
void *cons_cookie[NVME_MAX_CONSUMERS];
- uint32_t is_resetting;
+ uint32_t is_resetting;
+ uint32_t is_initialized;
+ uint32_t notification_sent;
boolean_t is_failed;
STAILQ_HEAD(, nvme_request) fail_req;
@@ -434,6 +442,7 @@
int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev);
void nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev);
+void nvme_ctrlr_shutdown(struct nvme_controller *ctrlr);
int nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr);
void nvme_ctrlr_reset(struct nvme_controller *ctrlr);
/* ctrlr defined as void * to allow use with config_intrhook. */
@@ -552,5 +561,6 @@
uint32_t log_page_id, void *log_page_buffer,
uint32_t log_page_size);
void nvme_notify_fail_consumers(struct nvme_controller *ctrlr);
+void nvme_notify_new_controller(struct nvme_controller *ctrlr);
#endif /* __NVME_PRIVATE_H__ */
Modified: trunk/sys/dev/nvme/nvme_qpair.c
===================================================================
--- trunk/sys/dev/nvme/nvme_qpair.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_qpair.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
/*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2014 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_qpair.c 253296 2013-07-12 22:07:33Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_qpair.c 265566 2014-05-07 16:48:43Z jimharris $");
#include <sys/param.h>
#include <sys/bus.h>
@@ -488,10 +488,8 @@
* the queue's vector to get the corresponding rid to use.
*/
qpair->rid = vector + 1;
+ qpair->res = ctrlr->msi_res[vector];
- qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
- &qpair->rid, RF_ACTIVE);
-
bus_setup_intr(ctrlr->dev, qpair->res,
INTR_TYPE_MISC | INTR_MPSAFE, NULL,
nvme_qpair_msix_handler, qpair, &qpair->tag);
@@ -499,8 +497,9 @@
mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
+ /* Note: NVMe PRP format is restricted to 4-byte alignment. */
bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
- sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
+ 4, PAGE_SIZE, BUS_SPACE_MAXADDR,
BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE,
(NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0,
NULL, NULL, &qpair->dma_tag);
Modified: trunk/sys/dev/nvme/nvme_sysctl.c
===================================================================
--- trunk/sys/dev/nvme/nvme_sysctl.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_sysctl.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_sysctl.c 253296 2013-07-12 22:07:33Z jimharris $");
+__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
Modified: trunk/sys/dev/nvme/nvme_test.c
===================================================================
--- trunk/sys/dev/nvme/nvme_test.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_test.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_test.c 253296 2013-07-12 22:07:33Z jimharris $");
+__FBSDID("$FreeBSD: stable/9/sys/dev/nvme/nvme_test.c 257588 2013-11-03 20:52:13Z jimharris $");
#include <sys/param.h>
#include <sys/bio.h>
@@ -54,7 +54,7 @@
void *buf;
uint32_t size;
uint32_t time;
- uint32_t io_completed;
+ uint64_t io_completed;
};
struct nvme_io_test_internal {
@@ -67,7 +67,7 @@
uint32_t td_active;
uint32_t td_idx;
uint32_t flags;
- uint32_t io_completed[NVME_TEST_MAX_THREADS];
+ uint64_t io_completed[NVME_TEST_MAX_THREADS];
};
static void
@@ -91,8 +91,8 @@
struct cdev *dev;
void *buf;
struct timeval t;
- uint64_t offset;
- uint32_t idx, io_completed = 0;
+ uint64_t io_completed = 0, offset;
+ uint32_t idx;
#if __FreeBSD_version >= 900017
int ref;
#endif
Modified: trunk/sys/dev/nvme/nvme_util.c
===================================================================
--- trunk/sys/dev/nvme/nvme_util.c 2017-09-19 01:26:26 UTC (rev 9569)
+++ trunk/sys/dev/nvme/nvme_util.c 2017-09-19 01:58:48 UTC (rev 9570)
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/dev/nvme/nvme_util.c 253631 2013-07-24 22:48:29Z jimharris $");
+__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <dev/nvme/nvme.h>
More information about the Midnightbsd-cvs
mailing list