[Midnightbsd-cvs] src [10007] trunk/sys/geom: sync with freebsd 10 stable

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun May 27 17:22:56 EDT 2018


Revision: 10007
          http://svnweb.midnightbsd.org/src/?rev=10007
Author:   laffer1
Date:     2018-05-27 17:22:55 -0400 (Sun, 27 May 2018)
Log Message:
-----------
sync with freebsd 10 stable

Modified Paths:
--------------
    trunk/sys/geom/geom_fox.c
    trunk/sys/geom/geom_int.h
    trunk/sys/geom/geom_io.c

Added Paths:
-----------
    trunk/sys/geom/geom_flashmap.c

Added: trunk/sys/geom/geom_flashmap.c
===================================================================
--- trunk/sys/geom/geom_flashmap.c	                        (rev 0)
+++ trunk/sys/geom/geom_flashmap.c	2018-05-27 21:22:55 UTC (rev 10007)
@@ -0,0 +1,251 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Semihalf
+ * Copyright (c) 2009 Jakub Klama <jakub.klama at uj.edu.pl>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/geom/geom_flashmap.c 320168 2017-06-20 19:59:57Z marius $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/slicer.h>
+
+#include <geom/geom.h>
+#include <geom/geom_slice.h>
+#include <geom/geom_disk.h>
+
+#include <dev/nand/nand_dev.h>
+
+#define	FLASHMAP_CLASS_NAME "Flashmap"
+
+struct g_flashmap_slice {
+	off_t		sl_start;
+	off_t		sl_end;
+	const char	*sl_name;
+
+	STAILQ_ENTRY(g_flashmap_slice) sl_link;
+};
+
+STAILQ_HEAD(g_flashmap_head, g_flashmap_slice);
+
+static struct {
+	const char	*type;
+	flash_slicer_t	slicer;
+} g_flashmap_slicers[] = {
+	{ "NAND::device",	NULL },
+	{ "CFI::device",	NULL },
+	{ "SPI::device",	NULL },
+	{ "MMC::device",	NULL }
+};
+
+static g_ioctl_t g_flashmap_ioctl;
+static g_taste_t g_flashmap_taste;
+
+static int g_flashmap_load(device_t dev, struct g_provider *pp,
+    flash_slicer_t slicer, struct g_flashmap_head *head);
+static int g_flashmap_modify(struct g_geom *gp, const char *devname,
+    int secsize, struct g_flashmap_head *slices);
+static void g_flashmap_print(struct g_flashmap_slice *slice);
+
+MALLOC_DECLARE(M_FLASHMAP);
+MALLOC_DEFINE(M_FLASHMAP, "geom_flashmap", "GEOM flash memory slicer class");
+
+static void
+g_flashmap_print(struct g_flashmap_slice *slice)
+{
+
+	printf("%08jx-%08jx: %s (%juKB)\n", (uintmax_t)slice->sl_start,
+	    (uintmax_t)slice->sl_end, slice->sl_name,
+	    (uintmax_t)(slice->sl_end - slice->sl_start) / 1024);
+}
+
+static int
+g_flashmap_modify(struct g_geom *gp, const char *devname, int secsize,
+    struct g_flashmap_head *slices)
+{
+	struct g_flashmap_slice *slice;
+	int i, error;
+
+	g_topology_assert();
+
+	i = 0;
+	STAILQ_FOREACH(slice, slices, sl_link) {
+		if (bootverbose) {
+			printf("%s: slice ", devname);
+			g_flashmap_print(slice);
+		}
+
+		error = g_slice_config(gp, i++, G_SLICE_CONFIG_CHECK,
+		    slice->sl_start,
+		    slice->sl_end - slice->sl_start + 1,
+		    secsize, FLASH_SLICES_FMT, gp->name, slice->sl_name);
+
+		if (error)
+			return (error);
+	}
+
+	i = 0;
+	STAILQ_FOREACH(slice, slices, sl_link) {
+		error = g_slice_config(gp, i++, G_SLICE_CONFIG_SET,
+		    slice->sl_start,
+		    slice->sl_end - slice->sl_start + 1,
+		    secsize, "%ss.%s", gp->name, slice->sl_name);
+
+		if (error)
+			return (error);
+	}
+
+	return (0);
+}
+
+static int
+g_flashmap_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag,
+    struct thread *td)
+{
+	struct g_consumer *cp;
+	struct g_geom *gp;
+
+	if (cmd != NAND_IO_GET_CHIP_PARAM)
+		return (ENOIOCTL);
+
+	cp = LIST_FIRST(&pp->geom->consumer);
+	if (cp == NULL)
+		return (ENOIOCTL);
+	gp = cp->provider->geom;
+	if (gp->ioctl == NULL)
+		return (ENOIOCTL);
+
+	return (gp->ioctl(cp->provider, cmd, data, fflag, td));
+}
+
+static struct g_geom *
+g_flashmap_taste(struct g_class *mp, struct g_provider *pp, int flags)
+{
+	struct g_geom *gp;
+	struct g_consumer *cp;
+	struct g_flashmap_head head;
+	struct g_flashmap_slice *slice, *slice_temp;
+	flash_slicer_t slicer;
+	device_t dev;
+	int i, size;
+
+	g_trace(G_T_TOPOLOGY, "flashmap_taste(%s,%s)", mp->name, pp->name);
+	g_topology_assert();
+
+	if (flags == G_TF_NORMAL &&
+	    strcmp(pp->geom->class->name, G_DISK_CLASS_NAME) != 0)
+		return (NULL);
+
+	gp = g_slice_new(mp, FLASH_SLICES_MAX_NUM, pp, &cp, NULL, 0, NULL);
+	if (gp == NULL)
+		return (NULL);
+
+	STAILQ_INIT(&head);
+
+	do {
+		slicer = NULL;
+		for (i = 0; i < nitems(g_flashmap_slicers); i++) {
+			size = sizeof(device_t);
+			if (g_io_getattr(g_flashmap_slicers[i].type, cp,
+			    &size, &dev) == 0) {
+				slicer = g_flashmap_slicers[i].slicer;
+				break;
+			}
+		}
+		if (slicer == NULL)
+			break;
+
+		if (g_flashmap_load(dev, pp, slicer, &head) == 0)
+			break;
+
+		g_flashmap_modify(gp, cp->provider->name,
+		    cp->provider->sectorsize, &head);
+	} while (0);
+
+	g_access(cp, -1, 0, 0);
+
+	STAILQ_FOREACH_SAFE(slice, &head, sl_link, slice_temp)
+		free(slice, M_FLASHMAP);
+
+	if (LIST_EMPTY(&gp->provider)) {
+		g_slice_spoiled(cp);
+		return (NULL);
+	}
+	return (gp);
+}
+
+static int
+g_flashmap_load(device_t dev, struct g_provider *pp, flash_slicer_t slicer,
+    struct g_flashmap_head *head)
+{
+	struct flash_slice *slices;
+	struct g_flashmap_slice *slice;
+	int i, nslices = 0;
+
+	slices = malloc(sizeof(struct flash_slice) * FLASH_SLICES_MAX_NUM,
+	    M_FLASHMAP, M_WAITOK | M_ZERO);
+	if (slicer(dev, pp->name, slices, &nslices) == 0) {
+		for (i = 0; i < nslices; i++) {
+			slice = malloc(sizeof(struct g_flashmap_slice),
+			    M_FLASHMAP, M_WAITOK);
+
+			slice->sl_name = slices[i].label;
+			slice->sl_start = slices[i].base;
+			slice->sl_end = slices[i].base + slices[i].size - 1;
+
+			STAILQ_INSERT_TAIL(head, slice, sl_link);
+		}
+	}
+
+	free(slices, M_FLASHMAP);
+	return (nslices);
+}
+
+void flash_register_slicer(flash_slicer_t slicer, u_int type, bool force)
+{
+
+	DROP_GIANT();
+	g_topology_lock();
+	if (g_flashmap_slicers[type].slicer == NULL || force == TRUE)
+		g_flashmap_slicers[type].slicer = slicer;
+	g_topology_unlock();
+	PICKUP_GIANT();
+}
+
+static struct g_class g_flashmap_class = {
+	.name = FLASHMAP_CLASS_NAME,
+	.version = G_VERSION,
+	.taste = g_flashmap_taste,
+	.ioctl = g_flashmap_ioctl,
+};
+
+DECLARE_GEOM_CLASS(g_flashmap_class, g_flashmap);
+MODULE_VERSION(g_flashmap, 0);


Property changes on: trunk/sys/geom/geom_flashmap.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/geom/geom_fox.c
===================================================================
--- trunk/sys/geom/geom_fox.c	2018-05-27 21:21:04 UTC (rev 10006)
+++ trunk/sys/geom/geom_fox.c	2018-05-27 21:22:55 UTC (rev 10007)
@@ -1,4 +1,4 @@
-/* $MidnightBSD: src/sys/geom/geom_fox.c,v 1.3 2008/12/03 00:25:46 laffer1 Exp $ */
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2003 Poul-Henning Kamp
  * All rights reserved.
@@ -27,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/geom_fox.c,v 1.11 2005/11/30 22:15:00 sobomax Exp $
+ * $FreeBSD: stable/10/sys/geom/geom_fox.c 219029 2011-02-25 10:24:35Z netchild $
  */
 
 /* This is a GEOM module for handling path selection for multi-path

Modified: trunk/sys/geom/geom_int.h
===================================================================
--- trunk/sys/geom/geom_int.h	2018-05-27 21:21:04 UTC (rev 10006)
+++ trunk/sys/geom/geom_int.h	2018-05-27 21:22:55 UTC (rev 10007)
@@ -33,7 +33,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/geom_int.h,v 1.29 2004/10/25 12:28:28 phk Exp $
+ * $FreeBSD: stable/10/sys/geom/geom_int.h 260479 2014-01-09 11:13:03Z mav $
  */
 
 LIST_HEAD(class_list_head, g_class);
@@ -40,6 +40,9 @@
 TAILQ_HEAD(g_tailq_head, g_geom);
 
 extern int g_collectstats;
+#define G_STATS_PROVIDERS	1	/* Collect I/O stats for providers */
+#define G_STATS_CONSUMERS	2	/* Collect I/O stats for consumers */
+
 extern int g_debugflags;
 /*
  * 1	G_T_TOPOLOGY
@@ -55,6 +58,7 @@
 /* geom_dump.c */
 void g_confxml(void *, int flag);
 void g_conf_specific(struct sbuf *sb, struct g_class *mp, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp);
+void g_conf_printf_escaped(struct sbuf *sb, const char *fmt, ...);
 void g_confdot(void *, int flag);
 void g_conftxt(void *, int flag);
 
@@ -76,6 +80,7 @@
 /* geom_kern.c / geom_kernsim.c */
 void g_init(void);
 extern int g_shutdown;
+extern int g_notaste;
 
 /* geom_ctl.c */
 void g_ctl_init(void);

Modified: trunk/sys/geom/geom_io.c
===================================================================
--- trunk/sys/geom/geom_io.c	2018-05-27 21:21:04 UTC (rev 10006)
+++ trunk/sys/geom/geom_io.c	2018-05-27 21:22:55 UTC (rev 10007)
@@ -2,6 +2,7 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
@@ -9,6 +10,9 @@
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -35,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_io.c,v 1.75.2.5 2010/09/19 19:57:15 mav Exp $");
+__FBSDID("$FreeBSD: stable/10/sys/geom/geom_io.c 292348 2015-12-16 19:01:14Z ken $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -45,6 +49,8 @@
 #include <sys/ktr.h>
 #include <sys/proc.h>
 #include <sys/stack.h>
+#include <sys/sysctl.h>
+#include <sys/vmem.h>
 
 #include <sys/errno.h>
 #include <geom/geom.h>
@@ -52,12 +58,31 @@
 #include <sys/devicestat.h>
 
 #include <vm/uma.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
 
+static int	g_io_transient_map_bio(struct bio *bp);
+
 static struct g_bioq g_bio_run_down;
 static struct g_bioq g_bio_run_up;
 static struct g_bioq g_bio_run_task;
 
-static u_int pace;
+/*
+ * Pace is a hint that we've had some trouble recently allocating
+ * bios, so we should back off trying to send I/O down the stack
+ * a bit to let the problem resolve. When pacing, we also turn
+ * off direct dispatch to also reduce memory pressure from I/Os
+ * there, at the expxense of some added latency while the memory
+ * pressures exist. See g_io_schedule_down() for more details
+ * and limitations.
+ */
+static volatile u_int pace;
+
 static uma_zone_t	biozone;
 
 /*
@@ -181,12 +206,18 @@
 		/*
 		 *  BIO_ORDERED flag may be used by disk drivers to enforce
 		 *  ordering restrictions, so this flag needs to be cloned.
+		 *  BIO_UNMAPPED and BIO_VLIST should be inherited, to properly
+		 *  indicate which way the buffer is passed.
 		 *  Other bio flags are not suitable for cloning.
 		 */
-		bp2->bio_flags = bp->bio_flags & BIO_ORDERED;
+		bp2->bio_flags = bp->bio_flags &
+		    (BIO_ORDERED | BIO_UNMAPPED | BIO_VLIST);
 		bp2->bio_length = bp->bio_length;
 		bp2->bio_offset = bp->bio_offset;
 		bp2->bio_data = bp->bio_data;
+		bp2->bio_ma = bp->bio_ma;
+		bp2->bio_ma_n = bp->bio_ma_n;
+		bp2->bio_ma_offset = bp->bio_ma_offset;
 		bp2->bio_attribute = bp->bio_attribute;
 		/* Inherit classification info from the parent */
 		bp2->bio_classifier1 = bp->bio_classifier1;
@@ -211,11 +242,15 @@
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
+	bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST);
 	bp2->bio_parent = bp;
 	bp2->bio_cmd = bp->bio_cmd;
 	bp2->bio_length = bp->bio_length;
 	bp2->bio_offset = bp->bio_offset;
 	bp2->bio_data = bp->bio_data;
+	bp2->bio_ma = bp->bio_ma;
+	bp2->bio_ma_n = bp->bio_ma_n;
+	bp2->bio_ma_offset = bp->bio_ma_offset;
 	bp2->bio_attribute = bp->bio_attribute;
 	bp->bio_children++;
 #ifdef KTR
@@ -289,6 +324,8 @@
 {
 	struct g_consumer *cp;
 	struct g_provider *pp;
+	off_t excess;
+	int error;
 
 	cp = bp->bio_from;
 	pp = bp->bio_to;
@@ -333,11 +370,44 @@
 			return (EIO);
 		if (bp->bio_offset > pp->mediasize)
 			return (EIO);
+
+		/* Truncate requests to the end of providers media. */
+		excess = bp->bio_offset + bp->bio_length;
+		if (excess > bp->bio_to->mediasize) {
+			KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
+			    round_page(bp->bio_ma_offset +
+			    bp->bio_length) / PAGE_SIZE == bp->bio_ma_n,
+			    ("excess bio %p too short", bp));
+			excess -= bp->bio_to->mediasize;
+			bp->bio_length -= excess;
+			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+				bp->bio_ma_n = round_page(bp->bio_ma_offset +
+				    bp->bio_length) / PAGE_SIZE;
+			}
+			if (excess > 0)
+				CTR3(KTR_GEOM, "g_down truncated bio "
+				    "%p provider %s by %d", bp,
+				    bp->bio_to->name, excess);
+		}
+
+		/* Deliver zero length transfers right here. */
+		if (bp->bio_length == 0) {
+			CTR2(KTR_GEOM, "g_down terminated 0-length "
+			    "bp %p provider %s", bp, bp->bio_to->name);
+			return (0);
+		}
+
+		if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
+		    (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
+		    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
+			if ((error = g_io_transient_map_bio(bp)) >= 0)
+				return (error);
+		}
 		break;
 	default:
 		break;
 	}
-	return (0);
+	return (EJUSTRETURN);
 }
 
 /*
@@ -401,7 +471,8 @@
 g_io_request(struct bio *bp, struct g_consumer *cp)
 {
 	struct g_provider *pp;
-	int first;
+	struct mtx *mtxp;
+	int direct, error, first;
 
 	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
 	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
@@ -451,48 +522,82 @@
 
 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
 	    ("Bio already on queue bp=%p", bp));
-	bp->bio_flags |= BIO_ONQUEUE;
-
-	if (g_collectstats)
+	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
+	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
 		binuptime(&bp->bio_t0);
 	else
 		getbinuptime(&bp->bio_t0);
 
+#ifdef GET_STACK_USAGE
+	direct = (cp->flags & G_CF_DIRECT_SEND) != 0 &&
+	    (pp->flags & G_PF_DIRECT_RECEIVE) != 0 &&
+	    !g_is_geom_thread(curthread) &&
+	    ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ||
+	    (bp->bio_flags & BIO_UNMAPPED) == 0 || THREAD_CAN_SLEEP()) &&
+	    pace == 0;
+	if (direct) {
+		/* Block direct execution if less then half of stack left. */
+		size_t	st, su;
+		GET_STACK_USAGE(st, su);
+		if (su * 2 > st)
+			direct = 0;
+	}
+#else
+	direct = 0;
+#endif
+
+	if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) {
+		g_bioq_lock(&g_bio_run_down);
+		g_run_classifiers(bp);
+		g_bioq_unlock(&g_bio_run_down);
+	}
+
 	/*
 	 * The statistics collection is lockless, as such, but we
 	 * can not update one instance of the statistics from more
 	 * than one thread at a time, so grab the lock first.
-	 *
-	 * We also use the lock to protect the list of classifiers.
 	 */
-	g_bioq_lock(&g_bio_run_down);
-
-	if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1)
-		g_run_classifiers(bp);
-
-	if (g_collectstats & 1)
+	mtxp = mtx_pool_find(mtxpool_sleep, pp);
+	mtx_lock(mtxp);
+	if (g_collectstats & G_STATS_PROVIDERS)
 		devstat_start_transaction(pp->stat, &bp->bio_t0);
-	if (g_collectstats & 2)
+	if (g_collectstats & G_STATS_CONSUMERS)
 		devstat_start_transaction(cp->stat, &bp->bio_t0);
-
 	pp->nstart++;
 	cp->nstart++;
-	first = TAILQ_EMPTY(&g_bio_run_down.bio_queue);
-	TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
-	g_bio_run_down.bio_queue_length++;
-	g_bioq_unlock(&g_bio_run_down);
+	mtx_unlock(mtxp);
 
-	/* Pass it on down. */
-	if (first)
-		wakeup(&g_wait_down);
+	if (direct) {
+		error = g_io_check(bp);
+		if (error >= 0) {
+			CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p "
+			    "provider %s returned %d", bp, bp->bio_to->name,
+			    error);
+			g_io_deliver(bp, error);
+			return;
+		}
+		bp->bio_to->geom->start(bp);
+	} else {
+		g_bioq_lock(&g_bio_run_down);
+		first = TAILQ_EMPTY(&g_bio_run_down.bio_queue);
+		TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
+		bp->bio_flags |= BIO_ONQUEUE;
+		g_bio_run_down.bio_queue_length++;
+		g_bioq_unlock(&g_bio_run_down);
+		/* Pass it on down. */
+		if (first)
+			wakeup(&g_wait_down);
+	}
 }
 
 void
 g_io_deliver(struct bio *bp, int error)
 {
+	struct bintime now;
 	struct g_consumer *cp;
 	struct g_provider *pp;
-	int first;
+	struct mtx *mtxp;
+	int direct, first;
 
 	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
 	pp = bp->bio_to;
@@ -538,31 +643,55 @@
 	bp->bio_bcount = bp->bio_length;
 	bp->bio_resid = bp->bio_bcount - bp->bio_completed;
 
+#ifdef GET_STACK_USAGE
+	direct = (pp->flags & G_PF_DIRECT_SEND) &&
+		 (cp->flags & G_CF_DIRECT_RECEIVE) &&
+		 !g_is_geom_thread(curthread);
+	if (direct) {
+		/* Block direct execution if less then half of stack left. */
+		size_t	st, su;
+		GET_STACK_USAGE(st, su);
+		if (su * 2 > st)
+			direct = 0;
+	}
+#else
+	direct = 0;
+#endif
+
 	/*
 	 * The statistics collection is lockless, as such, but we
 	 * can not update one instance of the statistics from more
 	 * than one thread at a time, so grab the lock first.
 	 */
-	g_bioq_lock(&g_bio_run_up);
-	if (g_collectstats & 1)
-		devstat_end_transaction_bio(pp->stat, bp);
-	if (g_collectstats & 2)
-		devstat_end_transaction_bio(cp->stat, bp);
-
+	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
+	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
+		binuptime(&now);
+	mtxp = mtx_pool_find(mtxpool_sleep, cp);
+	mtx_lock(mtxp);
+	if (g_collectstats & G_STATS_PROVIDERS)
+		devstat_end_transaction_bio_bt(pp->stat, bp, &now);
+	if (g_collectstats & G_STATS_CONSUMERS)
+		devstat_end_transaction_bio_bt(cp->stat, bp, &now);
 	cp->nend++;
 	pp->nend++;
+	mtx_unlock(mtxp);
+
 	if (error != ENOMEM) {
 		bp->bio_error = error;
-		first = TAILQ_EMPTY(&g_bio_run_up.bio_queue);
-		TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
-		bp->bio_flags |= BIO_ONQUEUE;
-		g_bio_run_up.bio_queue_length++;
-		g_bioq_unlock(&g_bio_run_up);
-		if (first)
-			wakeup(&g_wait_up);
+		if (direct) {
+			biodone(bp);
+		} else {
+			g_bioq_lock(&g_bio_run_up);
+			first = TAILQ_EMPTY(&g_bio_run_up.bio_queue);
+			TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
+			bp->bio_flags |= BIO_ONQUEUE;
+			g_bio_run_up.bio_queue_length++;
+			g_bioq_unlock(&g_bio_run_up);
+			if (first)
+				wakeup(&g_wait_up);
+		}
 		return;
 	}
-	g_bioq_unlock(&g_bio_run_up);
 
 	if (bootverbose)
 		printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
@@ -572,15 +701,82 @@
 	bp->bio_driver2 = NULL;
 	bp->bio_pflags = 0;
 	g_io_request(bp, cp);
-	pace++;
+	pace = 1;
 	return;
 }
 
+SYSCTL_DECL(_kern_geom);
+
+static long transient_maps;
+SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD,
+    &transient_maps, 0,
+    "Total count of the transient mapping requests");
+u_int transient_map_retries = 10;
+SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW,
+    &transient_map_retries, 0,
+    "Max count of retries used before giving up on creating transient map");
+int transient_map_hard_failures;
+SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD,
+    &transient_map_hard_failures, 0,
+    "Failures to establish the transient mapping due to retry attempts "
+    "exhausted");
+int transient_map_soft_failures;
+SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD,
+    &transient_map_soft_failures, 0,
+    "Count of retried failures to establish the transient mapping");
+int inflight_transient_maps;
+SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD,
+    &inflight_transient_maps, 0,
+    "Current count of the active transient maps");
+
+static int
+g_io_transient_map_bio(struct bio *bp)
+{
+	vm_offset_t addr;
+	long size;
+	u_int retried;
+
+	KASSERT(unmapped_buf_allowed, ("unmapped disabled"));
+
+	size = round_page(bp->bio_ma_offset + bp->bio_length);
+	KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp));
+	addr = 0;
+	retried = 0;
+	atomic_add_long(&transient_maps, 1);
+retry:
+	if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
+		if (transient_map_retries != 0 &&
+		    retried >= transient_map_retries) {
+			CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
+			    bp, bp->bio_to->name);
+			atomic_add_int(&transient_map_hard_failures, 1);
+			return (EDEADLK/* XXXKIB */);
+		} else {
+			/*
+			 * Naive attempt to quisce the I/O to get more
+			 * in-flight requests completed and defragment
+			 * the transient_arena.
+			 */
+			CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
+			    bp, bp->bio_to->name, retried);
+			pause("g_d_tra", hz / 10);
+			retried++;
+			atomic_add_int(&transient_map_soft_failures, 1);
+			goto retry;
+		}
+	}
+	atomic_add_int(&inflight_transient_maps, 1);
+	pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
+	bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
+	bp->bio_flags |= BIO_TRANSIENT_MAPPING;
+	bp->bio_flags &= ~BIO_UNMAPPED;
+	return (EJUSTRETURN);
+}
+
 void
 g_io_schedule_down(struct thread *tp __unused)
 {
 	struct bio *bp;
-	off_t excess;
 	int error;
 
 	for(;;) {
@@ -594,49 +790,43 @@
 		}
 		CTR0(KTR_GEOM, "g_down has work to do");
 		g_bioq_unlock(&g_bio_run_down);
-		if (pace > 0) {
-			CTR1(KTR_GEOM, "g_down pacing self (pace %d)", pace);
-			pause("g_down", hz/10);
-			pace--;
+		if (pace != 0) {
+			/*
+			 * There has been at least one memory allocation
+			 * failure since the last I/O completed. Pause 1ms to
+			 * give the system a chance to free up memory. We only
+			 * do this once because a large number of allocations
+			 * can fail in the direct dispatch case and there's no
+			 * relationship between the number of these failures and
+			 * the length of the outage. If there's still an outage,
+			 * we'll pause again and again until it's
+			 * resolved. Older versions paused longer and once per
+			 * allocation failure. This was OK for a single threaded
+			 * g_down, but with direct dispatch would lead to max of
+			 * 10 IOPs for minutes at a time when transient memory
+			 * issues prevented allocation for a batch of requests
+			 * from the upper layers.
+			 *
+			 * XXX This pacing is really lame. It needs to be solved
+			 * by other methods. This is OK only because the worst
+			 * case scenario is so rare. In the worst case scenario
+			 * all memory is tied up waiting for I/O to complete
+			 * which can never happen since we can't allocate bios
+			 * for that I/O.
+			 */
+			CTR0(KTR_GEOM, "g_down pacing self");
+			pause("g_down", min(hz/1000, 1));
+			pace = 0;
 		}
+		CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
+		    bp->bio_to->name);
 		error = g_io_check(bp);
-		if (error) {
+		if (error >= 0) {
 			CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider "
 			    "%s returned %d", bp, bp->bio_to->name, error);
 			g_io_deliver(bp, error);
 			continue;
 		}
-		CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
-		    bp->bio_to->name);
-		switch (bp->bio_cmd) {
-		case BIO_READ:
-		case BIO_WRITE:
-		case BIO_DELETE:
-			/* Truncate requests to the end of providers media. */
-			/*
-			 * XXX: What if we truncate because of offset being
-			 * bad, not length?
-			 */
-			excess = bp->bio_offset + bp->bio_length;
-			if (excess > bp->bio_to->mediasize) {
-				excess -= bp->bio_to->mediasize;
-				bp->bio_length -= excess;
-				if (excess > 0)
-					CTR3(KTR_GEOM, "g_down truncated bio "
-					    "%p provider %s by %d", bp,
-					    bp->bio_to->name, excess);
-			}
-			/* Deliver zero length transfers right here. */
-			if (bp->bio_length == 0) {
-				g_io_deliver(bp, 0);
-				CTR2(KTR_GEOM, "g_down terminated 0-length "
-				    "bp %p provider %s", bp, bp->bio_to->name);
-				continue;
-			}
-			break;
-		default:
-			break;
-		}
 		THREAD_NO_SLEEPING();
 		CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
 		    "len %ld", bp, bp->bio_to->name, bp->bio_offset,



More information about the Midnightbsd-cvs mailing list