[Midnightbsd-cvs] src: sys/geom: Sync geom

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Tue Sep 30 11:14:27 EDT 2008


Log Message:
-----------
Sync geom

Modified Files:
--------------
    src/sys/geom:
        geom.h (r1.2 -> r1.3)
        geom_aes.c (r1.1.1.2 -> r1.2)
        geom_bsd.c (r1.2 -> r1.3)
        geom_ccd.c (r1.1.1.1 -> r1.2)
        geom_ctl.c (r1.1.1.1 -> r1.2)
        geom_dev.c (r1.3 -> r1.4)
        geom_disk.c (r1.1.1.1 -> r1.2)
        geom_disk.h (r1.1.1.1 -> r1.2)
        geom_dump.c (r1.1.1.1 -> r1.2)
        geom_event.c (r1.2 -> r1.3)
        geom_fox.c (r1.1.1.2 -> r1.2)
        geom_io.c (r1.2 -> r1.3)
        geom_kern.c (r1.1.1.1 -> r1.2)
        geom_mbr.c (r1.1.1.2 -> r1.2)
        geom_pc98.c (r1.1.1.2 -> r1.2)
        geom_slice.c (r1.2 -> r1.3)
        geom_slice.h (r1.2 -> r1.3)
        geom_subr.c (r1.1.1.1 -> r1.2)
        geom_sunlabel.c (r1.1.1.2 -> r1.2)
        geom_vfs.c (r1.2 -> r1.3)
        geom_vol_ffs.c (r1.1.1.2 -> r1.2)
    src/sys/geom/bde:
        g_bde_crypt.c (r1.1.1.1 -> r1.2)
        g_bde_lock.c (r1.1.1.2 -> r1.2)
        g_bde_work.c (r1.2 -> r1.3)
    src/sys/geom/concat:
        g_concat.c (r1.2 -> r1.3)
        g_concat.h (r1.2 -> r1.3)
    src/sys/geom/eli:
        g_eli.c (r1.2 -> r1.3)
        g_eli.h (r1.2 -> r1.3)
        g_eli_crypto.c (r1.2 -> r1.3)
        g_eli_ctl.c (r1.2 -> r1.3)
        g_eli_key.c (r1.2 -> r1.3)
        pkcs5v2.c (r1.2 -> r1.3)
        pkcs5v2.h (r1.2 -> r1.3)
    src/sys/geom/gate:
        g_gate.c (r1.1.1.1 -> r1.2)
        g_gate.h (r1.1.1.1 -> r1.2)
    src/sys/geom/label:
        g_label.c (r1.2 -> r1.3)
        g_label.h (r1.2 -> r1.3)
        g_label_ext2fs.c (r1.1.1.1 -> r1.2)
        g_label_iso9660.c (r1.2 -> r1.3)
        g_label_msdosfs.c (r1.2 -> r1.3)
        g_label_ntfs.c (r1.1.1.1 -> r1.2)
        g_label_reiserfs.c (r1.1.1.1 -> r1.2)
        g_label_ufs.c (r1.2 -> r1.3)
    src/sys/geom/mirror:
        g_mirror.c (r1.2 -> r1.3)
        g_mirror.h (r1.2 -> r1.3)
        g_mirror_ctl.c (r1.2 -> r1.3)
    src/sys/geom/nop:
        g_nop.c (r1.1.1.2 -> r1.2)
        g_nop.h (r1.1.1.2 -> r1.2)
    src/sys/geom/raid3:
        g_raid3.c (r1.2 -> r1.3)
        g_raid3.h (r1.2 -> r1.3)
        g_raid3_ctl.c (r1.2 -> r1.3)
    src/sys/geom/shsec:
        g_shsec.c (r1.1.1.1 -> r1.2)
        g_shsec.h (r1.1.1.1 -> r1.2)
    src/sys/geom/stripe:
        g_stripe.c (r1.1.1.1 -> r1.2)
        g_stripe.h (r1.1.1.1 -> r1.2)
    src/sys/geom/uzip:
        g_uzip.c (r1.1.1.2 -> r1.2)
    src/sys/geom/vinum:
        geom_vinum.c (r1.1.1.1 -> r1.2)
        geom_vinum.h (r1.1.1.1 -> r1.2)
        geom_vinum_drive.c (r1.1.1.1 -> r1.2)
        geom_vinum_init.c (r1.1.1.1 -> r1.2)
        geom_vinum_move.c (r1.1.1.1 -> r1.2)
        geom_vinum_plex.c (r1.1.1.1 -> r1.2)
        geom_vinum_raid5.h (r1.1.1.1 -> r1.2)
        geom_vinum_rename.c (r1.1.1.1 -> r1.2)
        geom_vinum_rm.c (r1.1.1.1 -> r1.2)
        geom_vinum_share.c (r1.1.1.1 -> r1.2)
        geom_vinum_state.c (r1.1.1.1 -> r1.2)
        geom_vinum_subr.c (r1.1.1.1 -> r1.2)
        geom_vinum_var.h (r1.1.1.1 -> r1.2)
        geom_vinum_volume.c (r1.1.1.1 -> r1.2)
    src/sys/geom/zero:
        g_zero.c (r1.1.1.2 -> r1.2)

Added Files:
-----------
    src/sys/geom/part:
        g_part.c (r1.1)
        g_part.h (r1.1)
        g_part_apm.c (r1.1)
        g_part_gpt.c (r1.1)
        g_part_if.m (r1.1)
        g_part_mbr.c (r1.1)
    src/sys/geom/virstor:
        binstream.c (r1.1)
        binstream.h (r1.1)
        g_virstor.c (r1.1)
        g_virstor.h (r1.1)
        g_virstor_md.c (r1.1)
        g_virstor_md.h (r1.1)

Removed Files:
-------------
    src/sys/geom:
        geom_apple.c
        geom_gpt.c

-------------- next part --------------
Index: g_nop.h
===================================================================
RCS file: /home/cvs/src/sys/geom/nop/g_nop.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/nop/g_nop.h -L sys/geom/nop/g_nop.h -u -r1.1.1.2 -r1.2
--- sys/geom/nop/g_nop.h
+++ sys/geom/nop/g_nop.h
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2004 Pawel Jakub Dawidek <pjd at FreeBSD.org>
+ * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -23,14 +23,14 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/nop/g_nop.h,v 1.4.8.1 2006/01/17 09:11:30 pjd Exp $
+ * $FreeBSD: src/sys/geom/nop/g_nop.h,v 1.7 2006/09/30 08:16:49 pjd Exp $
  */
 
 #ifndef	_G_NOP_H_
 #define	_G_NOP_H_
 
 #define	G_NOP_CLASS_NAME	"NOP"
-#define	G_NOP_VERSION		3
+#define	G_NOP_VERSION		4
 #define	G_NOP_SUFFIX		".nop"
 
 #ifdef _KERNEL
@@ -55,8 +55,10 @@
 } while (0)
 
 struct g_nop_softc {
+	int		sc_error;
 	off_t		sc_offset;
-	u_int		sc_failprob;
+	u_int		sc_rfailprob;
+	u_int		sc_wfailprob;
 	uintmax_t	sc_reads;
 	uintmax_t	sc_writes;
 	uintmax_t	sc_readbytes;
Index: g_nop.c
===================================================================
RCS file: /home/cvs/src/sys/geom/nop/g_nop.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/nop/g_nop.c -L sys/geom/nop/g_nop.c -u -r1.1.1.2 -r1.2
--- sys/geom/nop/g_nop.c
+++ sys/geom/nop/g_nop.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2004 Pawel Jakub Dawidek <pjd at FreeBSD.org>
+ * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/nop/g_nop.c,v 1.11.2.1 2006/01/17 09:11:30 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/nop/g_nop.c,v 1.19 2006/09/08 13:46:18 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -77,6 +77,7 @@
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct bio *cbp;
+	u_int failprob = 0;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
@@ -85,19 +86,21 @@
 	case BIO_READ:
 		sc->sc_reads++;
 		sc->sc_readbytes += bp->bio_length;
+		failprob = sc->sc_rfailprob;
 		break;
 	case BIO_WRITE:
 		sc->sc_writes++;
 		sc->sc_wrotebytes += bp->bio_length;
+		failprob = sc->sc_wfailprob;
 		break;
 	}
-	if (sc->sc_failprob > 0) {
+	if (failprob > 0) {
 		u_int rval;
 
 		rval = arc4random() % 100;
-		if (rval < sc->sc_failprob) {
-			G_NOP_LOGREQ(bp, "Returning EIO.");
-			g_io_deliver(bp, EIO);
+		if (rval < failprob) {
+			G_NOP_LOGREQ(bp, "Returning error=%d.", sc->sc_error);
+			g_io_deliver(bp, sc->sc_error);
 			return;
 		}
 	}
@@ -133,7 +136,8 @@
 
 static int
 g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
-    u_int failprob, off_t offset, off_t size, u_int secsize)
+    int ioerror, u_int rfailprob, u_int wfailprob, off_t offset, off_t size,
+    u_int secsize)
 {
 	struct g_nop_softc *sc;
 	struct g_geom *gp;
@@ -186,7 +190,9 @@
 	}
 	sc = g_malloc(sizeof(*sc), M_WAITOK);
 	sc->sc_offset = offset;
-	sc->sc_failprob = failprob;
+	sc->sc_error = ioerror;
+	sc->sc_rfailprob = rfailprob;
+	sc->sc_wfailprob = wfailprob;
 	sc->sc_reads = 0;
 	sc->sc_writes = 0;
 	sc->sc_readbytes = 0;
@@ -276,7 +282,7 @@
 g_nop_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_provider *pp;
-	intmax_t *failprob, *offset, *secsize, *size;
+	intmax_t *error, *rfailprob, *wfailprob, *offset, *secsize, *size;
 	const char *name;
 	char param[16];
 	int i, *nargs;
@@ -292,13 +298,27 @@
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
-	failprob = gctl_get_paraml(req, "failprob", sizeof(*failprob));
-	if (failprob == NULL) {
-		gctl_error(req, "No '%s' argument", "failprob");
+	error = gctl_get_paraml(req, "error", sizeof(*error));
+	if (error == NULL) {
+		gctl_error(req, "No '%s' argument", "error");
+		return;
+	}
+	rfailprob = gctl_get_paraml(req, "rfailprob", sizeof(*rfailprob));
+	if (rfailprob == NULL) {
+		gctl_error(req, "No '%s' argument", "rfailprob");
+		return;
+	}
+	if (*rfailprob < -1 || *rfailprob > 100) {
+		gctl_error(req, "Invalid '%s' argument", "rfailprob");
 		return;
 	}
-	if (*failprob < 0 || *failprob > 100) {
-		gctl_error(req, "Invalid '%s' argument", "failprob");
+	wfailprob = gctl_get_paraml(req, "wfailprob", sizeof(*wfailprob));
+	if (wfailprob == NULL) {
+		gctl_error(req, "No '%s' argument", "wfailprob");
+		return;
+	}
+	if (*wfailprob < -1 || *wfailprob > 100) {
+		gctl_error(req, "Invalid '%s' argument", "wfailprob");
 		return;
 	}
 	offset = gctl_get_paraml(req, "offset", sizeof(*offset));
@@ -331,7 +351,7 @@
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
-		name = gctl_get_asciiparam(req, param); 
+		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
@@ -342,10 +362,13 @@
 		if (pp == NULL) {
 			G_NOP_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
-			return; 
+			return;
 		}
-		if (g_nop_create(req, mp, pp, (u_int)*failprob, (off_t)*offset,
-		    (off_t)*size, (u_int)*secsize) != 0) {
+		if (g_nop_create(req, mp, pp,
+		    *error == -1 ? EIO : (int)*error,
+		    *rfailprob == -1 ? 0 : (u_int)*rfailprob,
+		    *wfailprob == -1 ? 0 : (u_int)*wfailprob,
+		    (off_t)*offset, (off_t)*size, (u_int)*secsize) != 0) {
 			return;
 		}
 	}
@@ -356,7 +379,7 @@
 {
 	struct g_nop_softc *sc;
 	struct g_provider *pp;
-	intmax_t *failprob;
+	intmax_t *error, *rfailprob, *wfailprob;
 	const char *name;
 	char param[16];
 	int i, *nargs;
@@ -372,19 +395,33 @@
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
-	failprob = gctl_get_paraml(req, "failprob", sizeof(*failprob));
-	if (failprob == NULL) {
-		gctl_error(req, "No '%s' argument", "failprob");
+	error = gctl_get_paraml(req, "error", sizeof(*error));
+	if (error == NULL) {
+		gctl_error(req, "No '%s' argument", "error");
 		return;
 	}
-	if (*failprob < 0 || *failprob > 100) {
-		gctl_error(req, "Invalid '%s' argument", "failprob");
+	rfailprob = gctl_get_paraml(req, "rfailprob", sizeof(*rfailprob));
+	if (rfailprob == NULL) {
+		gctl_error(req, "No '%s' argument", "rfailprob");
+		return;
+	}
+	if (*rfailprob < -1 || *rfailprob > 100) {
+		gctl_error(req, "Invalid '%s' argument", "rfailprob");
+		return;
+	}
+	wfailprob = gctl_get_paraml(req, "wfailprob", sizeof(*wfailprob));
+	if (wfailprob == NULL) {
+		gctl_error(req, "No '%s' argument", "wfailprob");
+		return;
+	}
+	if (*wfailprob < -1 || *wfailprob > 100) {
+		gctl_error(req, "Invalid '%s' argument", "wfailprob");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
-		name = gctl_get_asciiparam(req, param); 
+		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
@@ -395,10 +432,15 @@
 		if (pp == NULL || pp->geom->class != mp) {
 			G_NOP_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
-			return; 
+			return;
 		}
 		sc = pp->geom->softc;
-		sc->sc_failprob = (u_int)*failprob;
+		if (*error != -1)
+			sc->sc_error = (int)*error;
+		if (*rfailprob != -1)
+			sc->sc_rfailprob = (u_int)*rfailprob;
+		if (*wfailprob != -1)
+			sc->sc_wfailprob = (u_int)*wfailprob;
 	}
 }
 
@@ -441,7 +483,7 @@
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
-		name = gctl_get_asciiparam(req, param); 
+		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
@@ -452,7 +494,7 @@
 		if (gp == NULL) {
 			G_NOP_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
-			return; 
+			return;
 		}
 		error = g_nop_destroy(gp, *force);
 		if (error != 0) {
@@ -486,7 +528,7 @@
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
-		name = gctl_get_asciiparam(req, param); 
+		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
@@ -497,7 +539,7 @@
 		if (pp == NULL || pp->geom->class != mp) {
 			G_NOP_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
-			return; 
+			return;
 		}
 		sc = pp->geom->softc;
 		sc->sc_reads = 0;
@@ -552,7 +594,11 @@
 	sc = gp->softc;
 	sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent,
 	    (intmax_t)sc->sc_offset);
-	sbuf_printf(sb, "%s<FailProb>%u</FailProb>\n", indent, sc->sc_failprob);
+	sbuf_printf(sb, "%s<ReadFailProb>%u</ReadFailProb>\n", indent,
+	    sc->sc_rfailprob);
+	sbuf_printf(sb, "%s<WriteFailProb>%u</WriteFailProb>\n", indent,
+	    sc->sc_wfailprob);
+	sbuf_printf(sb, "%s<Error>%d</Error>\n", indent, sc->sc_error);
 	sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent, sc->sc_reads);
 	sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent, sc->sc_writes);
 	sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
--- /dev/null
+++ sys/geom/part/g_part_mbr.c
@@ -0,0 +1,390 @@
+/*-
+ * Copyright (c) 2007 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/geom/part/g_part_mbr.c,v 1.2 2007/06/17 22:19:19 marcel Exp $");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/diskmbr.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/kobj.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <geom/geom.h>
+#include <geom/part/g_part.h>
+
+#include "g_part_if.h"
+
+#define	MBRSIZE		512
+
+struct g_part_mbr_table {
+	struct g_part_table	base;
+	u_char		mbr[MBRSIZE];
+};
+
+struct g_part_mbr_entry {
+	struct g_part_entry	base;
+	struct dos_partition ent;
+};
+
+static int g_part_mbr_add(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+static int g_part_mbr_create(struct g_part_table *, struct g_part_parms *);
+static int g_part_mbr_destroy(struct g_part_table *, struct g_part_parms *);
+static int g_part_mbr_dumpto(struct g_part_table *, struct g_part_entry *);
+static int g_part_mbr_modify(struct g_part_table *, struct g_part_entry *,  
+    struct g_part_parms *);
+static char *g_part_mbr_name(struct g_part_table *, struct g_part_entry *,
+    char *, size_t);
+static int g_part_mbr_probe(struct g_part_table *, struct g_consumer *);
+static int g_part_mbr_read(struct g_part_table *, struct g_consumer *);
+static const char *g_part_mbr_type(struct g_part_table *, struct g_part_entry *,
+    char *, size_t);
+static int g_part_mbr_write(struct g_part_table *, struct g_consumer *);
+
+static kobj_method_t g_part_mbr_methods[] = {
+	KOBJMETHOD(g_part_add,		g_part_mbr_add),
+	KOBJMETHOD(g_part_create,	g_part_mbr_create),
+	KOBJMETHOD(g_part_destroy,	g_part_mbr_destroy),
+	KOBJMETHOD(g_part_dumpto,	g_part_mbr_dumpto),
+	KOBJMETHOD(g_part_modify,	g_part_mbr_modify),
+	KOBJMETHOD(g_part_name,		g_part_mbr_name),
+	KOBJMETHOD(g_part_probe,	g_part_mbr_probe),
+	KOBJMETHOD(g_part_read,		g_part_mbr_read),
+	KOBJMETHOD(g_part_type,		g_part_mbr_type),
+	KOBJMETHOD(g_part_write,	g_part_mbr_write),
+	{ 0, 0 }
+};
+
+static struct g_part_scheme g_part_mbr_scheme = {
+	"MBR",
+	g_part_mbr_methods,
+	sizeof(struct g_part_mbr_table),
+	.gps_entrysz = sizeof(struct g_part_mbr_entry),
+	.gps_minent = NDOSPART,
+	.gps_maxent = NDOSPART,
+};
+G_PART_SCHEME_DECLARE(g_part_mbr_scheme);
+
+static int
+mbr_parse_type(const char *type, u_char *dp_typ)
+{
+	const char *alias;
+	char *endp;
+	long lt;
+
+	if (type[0] == '!') {
+		lt = strtol(type + 1, &endp, 0);
+		if (type[1] == '\0' || *endp != '\0' || lt <= 0 || lt >= 256)
+			return (EINVAL);
+		*dp_typ = (u_char)lt;
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD);
+	if (!strcasecmp(type, alias)) {
+		*dp_typ = DOSPTYP_386BSD;
+		return (0);
+	}
+	return (EINVAL);
+}
+
+static void
+mbr_set_chs(struct g_part_table *table, uint32_t lba, u_char *cylp, u_char *hdp,
+    u_char *secp)
+{
+	uint32_t cyl, hd, sec;
+
+	sec = lba % table->gpt_sectors + 1;
+	lba /= table->gpt_sectors;
+	hd = lba % table->gpt_heads;
+	lba /= table->gpt_heads;
+	cyl = lba;
+	if (cyl > 1023)
+		sec = hd = cyl = ~0;
+
+	*cylp = cyl & 0xff;
+	*hdp = hd & 0xff;
+	*secp = (sec & 0x3f) | ((cyl >> 2) & 0xc0);
+}
+
+static int
+g_part_mbr_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
+    struct g_part_parms *gpp)
+{
+	struct g_part_mbr_entry *entry;
+	struct g_part_mbr_table *table;
+	uint32_t start, size, sectors;
+
+	if (gpp->gpp_parms & G_PART_PARM_LABEL)
+		return (EINVAL);
+
+	sectors = basetable->gpt_sectors;
+
+	entry = (struct g_part_mbr_entry *)baseentry;
+	table = (struct g_part_mbr_table *)basetable;
+
+	start = gpp->gpp_start;
+	size = gpp->gpp_size;
+	if (size < sectors)
+		return (EINVAL);
+	if (start % sectors) {
+		size = size - sectors + (start % sectors);
+		start = start - (start % sectors) + sectors;
+	}
+	if (size % sectors)
+		size = size - (size % sectors);
+	if (size < sectors)
+		return (EINVAL);
+
+	if (baseentry->gpe_deleted)
+		bzero(&entry->ent, sizeof(entry->ent));
+
+	KASSERT(baseentry->gpe_start <= start, (__func__));
+	KASSERT(baseentry->gpe_end >= start + size - 1, (__func__));
+	baseentry->gpe_start = start;
+	baseentry->gpe_end = start + size - 1;
+	entry->ent.dp_start = start;
+	entry->ent.dp_size = size;
+	mbr_set_chs(basetable, baseentry->gpe_start, &entry->ent.dp_scyl,
+	    &entry->ent.dp_shd, &entry->ent.dp_ssect);
+	mbr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
+	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
+	return (mbr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
+}
+
+static int
+g_part_mbr_create(struct g_part_table *basetable, struct g_part_parms *gpp)
+{
+	struct g_consumer *cp;
+	struct g_provider *pp;
+	struct g_part_mbr_table *table;
+	uint64_t msize;
+
+	pp = gpp->gpp_provider;
+	cp = LIST_FIRST(&pp->consumers);
+
+	if (pp->sectorsize < MBRSIZE)
+		return (ENOSPC);
+
+	msize = pp->mediasize / pp->sectorsize;
+	basetable->gpt_first = basetable->gpt_sectors;
+	basetable->gpt_last = msize - (msize % basetable->gpt_sectors) - 1;
+
+	table = (struct g_part_mbr_table *)basetable;
+	le16enc(table->mbr + DOSMAGICOFFSET, DOSMAGIC);
+	return (0);
+}
+
+static int
+g_part_mbr_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
+{
+
+	/* Wipe the first sector to clear the partitioning. */
+	basetable->gpt_smhead |= 1;
+	return (0);
+}
+
+static int
+g_part_mbr_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)  
+{
+	struct g_part_mbr_entry *entry;
+
+	/* Allow dumping to a FreeBSD partition only. */
+	entry = (struct g_part_mbr_entry *)baseentry;
+	return ((entry->ent.dp_typ == DOSPTYP_386BSD) ? 1 : 0);
+}
+
+static int
+g_part_mbr_modify(struct g_part_table *basetable,
+    struct g_part_entry *baseentry, struct g_part_parms *gpp)
+{
+	struct g_part_mbr_entry *entry;
+
+	if (gpp->gpp_parms & G_PART_PARM_LABEL)
+		return (EINVAL);
+
+	entry = (struct g_part_mbr_entry *)baseentry;
+	if (gpp->gpp_parms & G_PART_PARM_TYPE)
+		return (mbr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
+	return (0);
+}
+
+static char *
+g_part_mbr_name(struct g_part_table *table, struct g_part_entry *baseentry,
+    char *buf, size_t bufsz)
+{
+
+	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
+	return (buf);
+}
+
+static int
+g_part_mbr_probe(struct g_part_table *table, struct g_consumer *cp)
+{
+	struct g_provider *pp;
+	u_char *buf;
+	int error, res;
+
+	pp = cp->provider;
+
+	/* Sanity-check the provider. */
+	if (pp->sectorsize < MBRSIZE || pp->mediasize < pp->sectorsize)
+		return (ENOSPC);
+
+	/* Check that there's a MBR. */
+	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
+	if (buf == NULL)
+		return (error);
+	res = le16dec(buf + DOSMAGICOFFSET);
+	g_free(buf);
+	return ((res == DOSMAGIC) ? G_PART_PROBE_PRI_NORM : ENXIO);
+}
+
+static int
+g_part_mbr_read(struct g_part_table *basetable, struct g_consumer *cp)
+{
+	struct dos_partition ent;
+	struct g_provider *pp;
+	struct g_part_mbr_table *table;
+	struct g_part_mbr_entry *entry;
+	u_char *buf, *p;
+	off_t chs, msize;
+	u_int sectors, heads;
+	int error, index;
+
+	pp = cp->provider;
+	table = (struct g_part_mbr_table *)basetable;
+	msize = pp->mediasize / pp->sectorsize;
+
+	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
+	if (buf == NULL)
+		return (error);
+
+	bcopy(buf, table->mbr, sizeof(table->mbr));
+	for (index = NDOSPART - 1; index >= 0; index--) {
+		p = buf + DOSPARTOFF + index * DOSPARTSIZE;
+		ent.dp_flag = p[0];
+		ent.dp_shd = p[1];
+		ent.dp_ssect = p[2];
+		ent.dp_scyl = p[3];
+		ent.dp_typ = p[4];
+		ent.dp_ehd = p[5];
+		ent.dp_esect = p[6];
+		ent.dp_ecyl = p[7];
+		ent.dp_start = le32dec(p + 8);
+		ent.dp_size = le32dec(p + 12);
+		if (ent.dp_typ == 0 || ent.dp_typ == DOSPTYP_PMBR)
+			continue;
+		if (ent.dp_flag != 0 && ent.dp_flag != 0x80)
+			continue;
+		if (ent.dp_start == 0 || ent.dp_size == 0)
+			continue;
+		sectors = ent.dp_esect & 0x3f;
+		if (sectors > basetable->gpt_sectors &&
+		    !basetable->gpt_fixgeom) {
+			g_part_geometry_heads(msize, sectors, &chs, &heads);
+			if (chs != 0) {
+				basetable->gpt_sectors = sectors;
+				basetable->gpt_heads = heads;
+			}
+		}
+		if ((ent.dp_start % basetable->gpt_sectors) != 0)
+			printf("GEOM: %s: partition %d does not start on a "
+			    "track boundary.\n", pp->name, index + 1);
+		if ((ent.dp_size % basetable->gpt_sectors) != 0)
+			printf("GEOM: %s: partition %d does not end on a "
+			    "track boundary.\n", pp->name, index + 1);
+
+		entry = (struct g_part_mbr_entry *)g_part_new_entry(basetable,
+		    index + 1, ent.dp_start, ent.dp_start + ent.dp_size - 1);
+		entry->ent = ent;
+	}
+
+	basetable->gpt_entries = NDOSPART;
+	basetable->gpt_first = basetable->gpt_sectors;
+	basetable->gpt_last = msize - (msize % basetable->gpt_sectors) - 1;
+
+	return (0);
+}
+
+static const char *
+g_part_mbr_type(struct g_part_table *basetable, struct g_part_entry *baseentry, 
+    char *buf, size_t bufsz)
+{
+	struct g_part_mbr_entry *entry;
+	int type;
+
+	entry = (struct g_part_mbr_entry *)baseentry;
+	type = entry->ent.dp_typ;
+	if (type == DOSPTYP_386BSD)
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD));
+	snprintf(buf, bufsz, "!%d", type);
+	return (buf);
+}
+
+static int
+g_part_mbr_write(struct g_part_table *basetable, struct g_consumer *cp)
+{
+	struct g_part_entry *baseentry;
+	struct g_part_mbr_entry *entry;
+	struct g_part_mbr_table *table;
+	u_char *p;
+	int error, index;
+
+	table = (struct g_part_mbr_table *)basetable;
+	baseentry = LIST_FIRST(&basetable->gpt_entry);
+	for (index = 1; index <= basetable->gpt_entries; index++) {
+		p = table->mbr + DOSPARTOFF + (index - 1) * DOSPARTSIZE;
+		entry = (baseentry != NULL && index == baseentry->gpe_index)
+		    ? (struct g_part_mbr_entry *)baseentry : NULL;
+		if (entry != NULL && !baseentry->gpe_deleted) {
+			p[0] = entry->ent.dp_flag;
+			p[1] = entry->ent.dp_shd;
+			p[2] = entry->ent.dp_ssect;
+			p[3] = entry->ent.dp_scyl;
+			p[4] = entry->ent.dp_typ;
+			p[5] = entry->ent.dp_ehd;
+			p[6] = entry->ent.dp_esect;
+			p[7] = entry->ent.dp_ecyl;
+			le32enc(p + 8, entry->ent.dp_start);
+			le32enc(p + 12, entry->ent.dp_size);
+		} else
+			bzero(p, DOSPARTSIZE);
+
+		if (entry != NULL)
+			baseentry = LIST_NEXT(baseentry, gpe_entry);
+	}
+
+	error = g_write_data(cp, 0, table->mbr, cp->provider->sectorsize);
+	return (error);
+}
--- /dev/null
+++ sys/geom/part/g_part.c
@@ -0,0 +1,1541 @@
+/*-
+ * Copyright (c) 2002, 2005, 2006, 2007 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/geom/part/g_part.c,v 1.9.2.1 2007/10/29 00:11:39 marcel Exp $");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/diskmbr.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/kobj.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <sys/uuid.h>
+#include <geom/geom.h>
+#include <geom/geom_ctl.h>
+#include <geom/part/g_part.h>
+
+#include "g_part_if.h"
+
+static kobj_method_t g_part_null_methods[] = {
+	{ 0, 0 }
+};
+
+static struct g_part_scheme g_part_null_scheme = {
+	"n/a",
+	g_part_null_methods,
+	sizeof(struct g_part_table),
+};
+G_PART_SCHEME_DECLARE(g_part_null_scheme);
+
+SET_DECLARE(g_part_scheme_set, struct g_part_scheme);
+
+struct g_part_alias_list {
+	const char *lexeme;
+	enum g_part_alias alias;
+} g_part_alias_list[G_PART_ALIAS_COUNT] = {
+	{ "efi", G_PART_ALIAS_EFI },
+	{ "freebsd", G_PART_ALIAS_FREEBSD },
+	{ "freebsd-swap", G_PART_ALIAS_FREEBSD_SWAP },
+	{ "freebsd-ufs", G_PART_ALIAS_FREEBSD_UFS },
+	{ "freebsd-vinum", G_PART_ALIAS_FREEBSD_VINUM },
+	{ "freebsd-zfs", G_PART_ALIAS_FREEBSD_ZFS },
+	{ "mbr", G_PART_ALIAS_MBR }
+};
+
+/*
+ * The GEOM partitioning class.
+ */
+static g_ctl_req_t g_part_ctlreq;
+static g_ctl_destroy_geom_t g_part_destroy_geom;
+static g_taste_t g_part_taste;
+
+static g_access_t g_part_access;
+static g_dumpconf_t g_part_dumpconf;
+static g_orphan_t g_part_orphan;
+static g_spoiled_t g_part_spoiled;
+static g_start_t g_part_start;
+
+static struct g_class g_part_class = {
+	.name = "PART",
+	.version = G_VERSION,
+	/* Class methods. */
+	.ctlreq = g_part_ctlreq,
+	.destroy_geom = g_part_destroy_geom,
+	.taste = g_part_taste,
+	/* Geom methods. */
+	.access = g_part_access,
+	.dumpconf = g_part_dumpconf,
+	.orphan = g_part_orphan,
+	.spoiled = g_part_spoiled,
+	.start = g_part_start,
+};
+
+DECLARE_GEOM_CLASS(g_part_class, g_part);
+
+enum g_part_ctl {
+	G_PART_CTL_NONE,
+	G_PART_CTL_ADD,
+	G_PART_CTL_COMMIT,
+	G_PART_CTL_CREATE,
+	G_PART_CTL_DELETE,
+	G_PART_CTL_DESTROY,
+	G_PART_CTL_MODIFY,
+	G_PART_CTL_MOVE,
+	G_PART_CTL_RECOVER,
+	G_PART_CTL_RESIZE,
+	G_PART_CTL_UNDO
+};
+
+/*
+ * Support functions.
+ */
+
+static void g_part_wither(struct g_geom *, int);
+
+const char *
+g_part_alias_name(enum g_part_alias alias)
+{
+	int i;
+
+	for (i = 0; i < G_PART_ALIAS_COUNT; i++) {
+		if (g_part_alias_list[i].alias != alias)
+			continue;
+		return (g_part_alias_list[i].lexeme);
+	}
+
+	return (NULL);
+}
+
+void
+g_part_geometry_heads(off_t blocks, u_int sectors, off_t *bestchs,
+    u_int *bestheads)
+{
+	static u_int candidate_heads[] = { 1, 2, 16, 32, 64, 128, 255, 0 };
+	off_t chs, cylinders;
+	u_int heads;
+	int idx;
+
+	*bestchs = 0;
+	*bestheads = 0;
+	for (idx = 0; candidate_heads[idx] != 0; idx++) {
+		heads = candidate_heads[idx];
+		cylinders = blocks / heads / sectors;
+		if (cylinders < heads || cylinders < sectors)
+			break;
+		if (cylinders > 1023)
+			continue;
+		chs = cylinders * heads * sectors;
+		if (chs > *bestchs || (chs == *bestchs && *bestheads == 1)) {
+			*bestchs = chs;
+			*bestheads = heads;
+		}
+	}
+}
+
+static void
+g_part_geometry(struct g_part_table *table, struct g_consumer *cp,
+    off_t blocks)
+{
+	static u_int candidate_sectors[] = { 1, 9, 17, 33, 63, 0 };
+	off_t chs, bestchs;
+	u_int heads, sectors;
+	int idx;
+
+	if (g_getattr("GEOM::fwsectors", cp, &sectors) != 0 ||
+	    sectors < 1 || sectors > 63 ||
+	    g_getattr("GEOM::fwheads", cp, &heads) != 0 ||
+	    heads < 1 || heads > 255) {
+		table->gpt_fixgeom = 0;
+		table->gpt_heads = 0;
+		table->gpt_sectors = 0;
+		bestchs = 0;
+		for (idx = 0; candidate_sectors[idx] != 0; idx++) {
+			sectors = candidate_sectors[idx];
+			g_part_geometry_heads(blocks, sectors, &chs, &heads);
+			if (chs == 0)
+				continue;
+			/*
+			 * Prefer a geometry with sectors > 1, but only if
+			 * it doesn't bump down the numbver of heads to 1.
+			 */
+			if (chs > bestchs || (chs == bestchs && heads > 1 &&
+			    table->gpt_sectors == 1)) {
+				bestchs = chs;
+				table->gpt_heads = heads;
+				table->gpt_sectors = sectors;
+			}
+		}
+		/*
+		 * If we didn't find a geometry at all, then the disk is
+		 * too big. This means we can use the maximum number of
+		 * heads and sectors.
+		 */
+		if (bestchs == 0) {
+			table->gpt_heads = 255;
+			table->gpt_sectors = 63;
+		}
+	} else {
+		table->gpt_fixgeom = 1;
+		table->gpt_heads = heads;
+		table->gpt_sectors = sectors;
+	}
+}
+
+struct g_part_entry *
+g_part_new_entry(struct g_part_table *table, int index, quad_t start,
+    quad_t end)
+{
+	struct g_part_entry *entry, *last;
+
+	last = NULL;
+	LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) {
+		if (entry->gpe_index == index)
+			break;
+		if (entry->gpe_index > index) {
+			entry = NULL;
+			break;
+		}
+		last = entry;
+	}
+	if (entry == NULL) {
+		entry = g_malloc(table->gpt_scheme->gps_entrysz,
+		    M_WAITOK | M_ZERO);
+		entry->gpe_index = index;
+		if (last == NULL)
+			LIST_INSERT_HEAD(&table->gpt_entry, entry, gpe_entry);
+		else
+			LIST_INSERT_AFTER(last, entry, gpe_entry);
+	}
+	entry->gpe_start = start;
+	entry->gpe_end = end;
+	return (entry);
+}
+
+static void
+g_part_new_provider(struct g_geom *gp, struct g_part_table *table,
+    struct g_part_entry *entry)
+{
+	char buf[32];
+	struct g_consumer *cp;
+	struct g_provider *pp;
+
+	cp = LIST_FIRST(&gp->consumer);
+	pp = cp->provider;
+
+	entry->gpe_offset = entry->gpe_start * pp->sectorsize;
+
+	if (entry->gpe_pp == NULL) {
+		entry->gpe_pp = g_new_providerf(gp, "%s%s", gp->name,
+		    G_PART_NAME(table, entry, buf, sizeof(buf)));
+		entry->gpe_pp->private = entry;		/* Close the circle. */
+	}
+	entry->gpe_pp->index = entry->gpe_index - 1;	/* index is 1-based. */
+	entry->gpe_pp->mediasize = (entry->gpe_end - entry->gpe_start + 1) *
+	    pp->sectorsize;
+	entry->gpe_pp->sectorsize = pp->sectorsize;
+	entry->gpe_pp->flags = pp->flags & G_PF_CANDELETE;
+	if (pp->stripesize > 0) {
+		entry->gpe_pp->stripesize = pp->stripesize;
+		entry->gpe_pp->stripeoffset = (pp->stripeoffset +
+		    entry->gpe_offset) % pp->stripesize;
+	}
+	g_error_provider(entry->gpe_pp, 0);
+}
+
+static int
+g_part_parm_geom(const char *p, struct g_geom **v)
+{
+	struct g_geom *gp;
+
+	LIST_FOREACH(gp, &g_part_class.geom, geom) {
+		if (!strcmp(p, gp->name))
+			break;
+	}
+	if (gp == NULL)
+		return (EINVAL);
+	*v = gp;
+	return (0);
+}
+
+static int
+g_part_parm_provider(const char *p, struct g_provider **v)
+{
+	struct g_provider *pp;
+
+	pp = g_provider_by_name(p);
+	if (pp == NULL)
+		return (EINVAL);
+	*v = pp;
+	return (0);
+}
+
+static int
+g_part_parm_quad(const char *p, quad_t *v)
+{
+	char *x;
+	quad_t q;
+
+	q = strtoq(p, &x, 0);
+	if (*x != '\0' || q < 0)
+		return (EINVAL);
+	*v = q;
+	return (0);
+}
+
+static int
+g_part_parm_scheme(const char *p, struct g_part_scheme **v)
+{
+	struct g_part_scheme **iter, *s;
+
+	s = NULL;
+	SET_FOREACH(iter, g_part_scheme_set) {
+		if ((*iter)->name == NULL)
+			continue;
+		if (!strcasecmp((*iter)->name, p)) {
+			s = *iter;
+			break;
+		}
+	}
+	if (s == NULL)
+		return (EINVAL);
+	*v = s;
+	return (0);
+}
+
+static int
+g_part_parm_str(const char *p, const char **v)
+{
+
+	if (p[0] == '\0')
+		return (EINVAL);
+	*v = p;
+	return (0);
+}
+
+static int
+g_part_parm_uint(const char *p, u_int *v)
+{
+	char *x;
+	long l;
+
+	l = strtol(p, &x, 0);
+	if (*x != '\0' || l < 0 || l > INT_MAX)
+		return (EINVAL);
+	*v = (unsigned int)l;
+	return (0);
+}
+
+static int
+g_part_probe(struct g_geom *gp, struct g_consumer *cp, int depth)
+{
+	struct g_part_scheme **iter, *scheme;
+	struct g_part_table *table;
+	int pri, probe;
+
+	table = gp->softc;
+	scheme = (table != NULL) ? table->gpt_scheme : &g_part_null_scheme;
+	pri = (scheme != &g_part_null_scheme) ? G_PART_PROBE(table, cp) :
+	    INT_MIN;
+	if (pri == 0)
+		goto done;
+	if (pri > 0) {	/* error */
+		scheme = &g_part_null_scheme;
+		pri = INT_MIN;
+	}
+
+	SET_FOREACH(iter, g_part_scheme_set) {
+		if ((*iter) == &g_part_null_scheme)
+			continue;
+		table = (void *)kobj_create((kobj_class_t)(*iter), M_GEOM,
+		    M_WAITOK);
+		table->gpt_gp = gp;
+		table->gpt_scheme = *iter;
+		table->gpt_depth = depth;
+		probe = G_PART_PROBE(table, cp);
+		if (probe <= 0 && probe > pri) {
+			pri = probe;
+			scheme = *iter;
+			if (gp->softc != NULL)
+				kobj_delete((kobj_t)gp->softc, M_GEOM);
+			gp->softc = table;
+			if (pri == 0)
+				goto done;
+		} else
+			kobj_delete((kobj_t)table, M_GEOM);
+	}
+
+done:
+	return ((scheme == &g_part_null_scheme) ? ENXIO : 0);
+}
+
+/*
+ * Control request functions.
+ */
+
+static int
+g_part_ctl_add(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	char buf[32];
+	struct g_geom *gp;
+	struct g_provider *pp;
+	struct g_part_entry *delent, *last, *entry;
+	struct g_part_table *table;
+	struct sbuf *sb;
+	quad_t end;
+	unsigned int index;
+	int error;
+
+	gp = gpp->gpp_geom;
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name));
+	g_topology_assert();
+
+	pp = LIST_FIRST(&gp->consumer)->provider;
+	table = gp->softc;
+	end = gpp->gpp_start + gpp->gpp_size - 1;
+
+	if (gpp->gpp_start < table->gpt_first ||
+	    gpp->gpp_start > table->gpt_last) {
+		gctl_error(req, "%d start '%jd'", EINVAL,
+		    (intmax_t)gpp->gpp_start);
+		return (EINVAL);
+	}
+	if (end < gpp->gpp_start || end > table->gpt_last) {
+		gctl_error(req, "%d size '%jd'", EINVAL,
+		    (intmax_t)gpp->gpp_size);
+		return (EINVAL);
+	}
+	if (gpp->gpp_index > table->gpt_entries) {
+		gctl_error(req, "%d index '%d'", EINVAL, gpp->gpp_index);
+		return (EINVAL);
+	}
+
+	delent = last = NULL;
+	index = (gpp->gpp_index > 0) ? gpp->gpp_index : 1;
+	LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) {
+		if (entry->gpe_deleted) {
+			if (entry->gpe_index == index)
+				delent = entry;
+			continue;
+		}
+		if (entry->gpe_index == index) {
+			index = entry->gpe_index + 1;
+			last = entry;
+		}
+		if (gpp->gpp_start >= entry->gpe_start &&
+		    gpp->gpp_start <= entry->gpe_end) {
+			gctl_error(req, "%d start '%jd'", ENOSPC,
+			    (intmax_t)gpp->gpp_start);
+			return (ENOSPC);
+		}
+		if (end >= entry->gpe_start && end <= entry->gpe_end) {
+			gctl_error(req, "%d end '%jd'", ENOSPC, (intmax_t)end);
+			return (ENOSPC);
+		}
+		if (gpp->gpp_start < entry->gpe_start && end > entry->gpe_end) {
+			gctl_error(req, "%d size '%jd'", ENOSPC,
+			    (intmax_t)gpp->gpp_size);
+			return (ENOSPC);
+		}
+	}
+	if (gpp->gpp_index > 0 && index != gpp->gpp_index) {
+		gctl_error(req, "%d index '%d'", EEXIST, gpp->gpp_index);
+		return (EEXIST);
+	}
+
+	entry = (delent == NULL) ? g_malloc(table->gpt_scheme->gps_entrysz,
+	    M_WAITOK | M_ZERO) : delent;
+	entry->gpe_index = index;
+	entry->gpe_start = gpp->gpp_start;
+	entry->gpe_end = end;
+	error = G_PART_ADD(table, entry, gpp);
+	if (error) {
+		gctl_error(req, "%d", error);
+		if (delent == NULL)
+			g_free(entry);
+		return (error);
+	}
+	if (delent == NULL) {
+		if (last == NULL)
+			LIST_INSERT_HEAD(&table->gpt_entry, entry, gpe_entry);
+		else
+			LIST_INSERT_AFTER(last, entry, gpe_entry);
+		entry->gpe_created = 1;
+	} else {
+		entry->gpe_deleted = 0;
+		entry->gpe_modified = 1;
+	}
+	g_part_new_provider(gp, table, entry);
+
+	/* Provide feedback if so requested. */
+	if (gpp->gpp_parms & G_PART_PARM_OUTPUT) {
+		sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+		sbuf_printf(sb, "%s%s added\n", gp->name,
+		    G_PART_NAME(table, entry, buf, sizeof(buf)));
+		sbuf_finish(sb);
+		gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
+		sbuf_delete(sb);
+	}
+	return (0);
+}
+
+static int
+g_part_ctl_commit(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	struct g_consumer *cp;
+	struct g_geom *gp;
+	struct g_provider *pp;
+	struct g_part_entry *entry, *tmp;
+	struct g_part_table *table;
+	char *buf;
+	int error, i;
+
+	gp = gpp->gpp_geom;
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name));
+	g_topology_assert();
+
+	table = gp->softc;
+	if (!table->gpt_opened) {
+		gctl_error(req, "%d", EPERM);
+		return (EPERM);
+	}
+
+	cp = LIST_FIRST(&gp->consumer);
+	if ((table->gpt_smhead | table->gpt_smtail) != 0) {
+		pp = cp->provider;
+		buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
+		while (table->gpt_smhead != 0) {
+			i = ffs(table->gpt_smhead) - 1;
+			error = g_write_data(cp, i * pp->sectorsize, buf,
+			    pp->sectorsize);
+			if (error) {
+				g_free(buf);
+				goto fail;
+			}
+			table->gpt_smhead &= ~(1 << i);
+		}
+		while (table->gpt_smtail != 0) {
+			i = ffs(table->gpt_smtail) - 1;
+			error = g_write_data(cp, pp->mediasize - (i + 1) *
+			    pp->sectorsize, buf, pp->sectorsize);
+			if (error) {
+				g_free(buf);
+				goto fail;
+			}
+			table->gpt_smtail &= ~(1 << i);
+		}
+		g_free(buf);
+	}
+
+	if (table->gpt_scheme == &g_part_null_scheme) {
+		g_access(cp, -1, -1, -1);
+		g_part_wither(gp, ENXIO);
+		return (0);
+	}
+
+	error = G_PART_WRITE(table, cp);
+	if (error)
+		goto fail;
+
+	LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) {
+		if (!entry->gpe_deleted) {
+			entry->gpe_created = 0;
+			entry->gpe_modified = 0;
+			continue;
+		}
+		LIST_REMOVE(entry, gpe_entry);
+		g_free(entry);
+	}
+	table->gpt_created = 0;
+	table->gpt_opened = 0;
+	g_access(cp, -1, -1, -1);
+	return (0);
+
+fail:
+	gctl_error(req, "%d", error);
+	return (error);
+}
+
+static int
+g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	struct g_consumer *cp;
+	struct g_geom *gp;
+	struct g_provider *pp;
+	struct g_part_scheme *scheme;
+	struct g_part_table *null, *table;
+	struct sbuf *sb;
+	int attr, error;
+
+	pp = gpp->gpp_provider;
+	scheme = gpp->gpp_scheme;
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, pp->name));
+	g_topology_assert();
+
+	/* Check that there isn't already a g_part geom on the provider. */
+	error = g_part_parm_geom(pp->name, &gp);
+	if (!error) {
+		null = gp->softc;
+		if (null->gpt_scheme != &g_part_null_scheme) {
+			gctl_error(req, "%d geom '%s'", EEXIST, pp->name);
+			return (EEXIST);
+		}
+	} else
+		null = NULL;
+
+	if ((gpp->gpp_parms & G_PART_PARM_ENTRIES) &&
+	    (gpp->gpp_entries < scheme->gps_minent ||
+	     gpp->gpp_entries > scheme->gps_maxent)) {
+		gctl_error(req, "%d entries '%d'", EINVAL, gpp->gpp_entries);
+		return (EINVAL);
+	}
+
+	if (null == NULL)
+		gp = g_new_geomf(&g_part_class, "%s", pp->name);
+	gp->softc = kobj_create((kobj_class_t)gpp->gpp_scheme, M_GEOM,
+	    M_WAITOK);
+	table = gp->softc;
+	table->gpt_gp = gp;
+	table->gpt_scheme = gpp->gpp_scheme;
+	table->gpt_entries = (gpp->gpp_parms & G_PART_PARM_ENTRIES) ?
+	    gpp->gpp_entries : scheme->gps_minent;
+	LIST_INIT(&table->gpt_entry);
+	if (null == NULL) {
+		cp = g_new_consumer(gp);
+		error = g_attach(cp, pp);
+		if (error == 0)
+			error = g_access(cp, 1, 1, 1);
+		if (error != 0) {
+			g_part_wither(gp, error);
+			gctl_error(req, "%d geom '%s'", error, pp->name);
+			return (error);
+		}
+		table->gpt_opened = 1;
+	} else {
+		cp = LIST_FIRST(&gp->consumer);
+		table->gpt_opened = null->gpt_opened;
+		table->gpt_smhead = null->gpt_smhead;
+		table->gpt_smtail = null->gpt_smtail;
+	}
+
+	g_topology_unlock();
+
+	/* Make sure the provider has media. */
+	if (pp->mediasize == 0 || pp->sectorsize == 0) {
+		error = ENODEV;
+		goto fail;
+	}
+
+	/* Make sure we can nest and if so, determine our depth. */
+	error = g_getattr("PART::isleaf", cp, &attr);
+	if (!error && attr) {
+		error = ENODEV;
+		goto fail;
+	}
+	error = g_getattr("PART::depth", cp, &attr);
+	table->gpt_depth = (!error) ? attr + 1 : 0;
+
+	/*
+	 * Synthesize a disk geometry. Some partitioning schemes
+	 * depend on it and since some file systems need it even
+	 * when the partitition scheme doesn't, we do it here in
+	 * scheme-independent code.
+	 */
+	g_part_geometry(table, cp, pp->mediasize / pp->sectorsize);
+
+	error = G_PART_CREATE(table, gpp);
+	if (error)
+		goto fail;
+
+	g_topology_lock();
+
+	table->gpt_created = 1;
+	if (null != NULL)
+		kobj_delete((kobj_t)null, M_GEOM);
+
+	/*
+	 * Support automatic commit by filling in the gpp_geom
+	 * parameter.
+	 */
+	gpp->gpp_parms |= G_PART_PARM_GEOM;
+	gpp->gpp_geom = gp;
+
+	/* Provide feedback if so requested. */
+	if (gpp->gpp_parms & G_PART_PARM_OUTPUT) {
+		sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+		sbuf_printf(sb, "%s created\n", gp->name);
+		sbuf_finish(sb);
+		gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
+		sbuf_delete(sb);
+	}
+	return (0);
+
+fail:
+	g_topology_lock();
+	if (null == NULL) {
+		g_access(cp, -1, -1, -1);
+		g_part_wither(gp, error);
+	} else {
+		kobj_delete((kobj_t)gp->softc, M_GEOM);
+		gp->softc = null;
+	}
+	gctl_error(req, "%d provider", error);
+	return (error);
+}
+
+static int
+g_part_ctl_delete(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	char buf[32];
+	struct g_geom *gp;
+	struct g_provider *pp;
+	struct g_part_entry *entry;
+	struct g_part_table *table;
+	struct sbuf *sb;
+
+	gp = gpp->gpp_geom;
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name));
+	g_topology_assert();
+
+	table = gp->softc;
+
+	LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) {
+		if (entry->gpe_deleted)
+			continue;
+		if (entry->gpe_index == gpp->gpp_index)
+			break;
+	}
+	if (entry == NULL) {
+		gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index);
+		return (ENOENT);
+	}
+
+	pp = entry->gpe_pp;
+	if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) {
+		gctl_error(req, "%d", EBUSY);
+		return (EBUSY);
+	}
+
+	pp->private = NULL;
+	entry->gpe_pp = NULL;
+	if (entry->gpe_created) {
+		LIST_REMOVE(entry, gpe_entry);
+		g_free(entry);
+	} else {
+		entry->gpe_modified = 0;
+		entry->gpe_deleted = 1;
+	}
+	g_wither_provider(pp, ENXIO);
+
+	/* Provide feedback if so requested. */
+	if (gpp->gpp_parms & G_PART_PARM_OUTPUT) {
+		sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+		sbuf_printf(sb, "%s%s deleted\n", gp->name,
+		    G_PART_NAME(table, entry, buf, sizeof(buf)));
+		sbuf_finish(sb);
+		gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
+		sbuf_delete(sb);
+	}
+	return (0);
+}
+
+static int
+g_part_ctl_destroy(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	struct g_geom *gp;
+	struct g_part_entry *entry;
+	struct g_part_table *null, *table;
+	struct sbuf *sb;
+	int error;
+
+	gp = gpp->gpp_geom;
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name));
+	g_topology_assert();
+
+	table = gp->softc;
+	LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) {
+		if (entry->gpe_deleted)
+			continue;
+		gctl_error(req, "%d", EBUSY);
+		return (EBUSY);
+	}
+
+	error = G_PART_DESTROY(table, gpp);
+	if (error) {
+		gctl_error(req, "%d", error);
+		return (error);
+	}
+
+	gp->softc = kobj_create((kobj_class_t)&g_part_null_scheme, M_GEOM,
+	    M_WAITOK);
+	null = gp->softc;
+	null->gpt_gp = gp;
+	null->gpt_scheme = &g_part_null_scheme;
+	LIST_INIT(&null->gpt_entry);
+	null->gpt_depth = table->gpt_depth;
+	null->gpt_opened = table->gpt_opened;
+	null->gpt_smhead = table->gpt_smhead;
+	null->gpt_smtail = table->gpt_smtail;
+
+	while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) {
+		LIST_REMOVE(entry, gpe_entry);
+		g_free(entry);
+	}
+	kobj_delete((kobj_t)table, M_GEOM);
+
+	/* Provide feedback if so requested. */
+	if (gpp->gpp_parms & G_PART_PARM_OUTPUT) {
+		sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+		sbuf_printf(sb, "%s destroyed\n", gp->name);
+		sbuf_finish(sb);
+		gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
+		sbuf_delete(sb);
+	}
+	return (0);
+}
+
+static int
+g_part_ctl_modify(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	char buf[32];
+	struct g_geom *gp;
+	struct g_part_entry *entry;
+	struct g_part_table *table;
+	struct sbuf *sb;
+	int error;
+
+	gp = gpp->gpp_geom;
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name));
+	g_topology_assert();
+
+	table = gp->softc;
+
+	LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) {
+		if (entry->gpe_deleted)
+			continue;
+		if (entry->gpe_index == gpp->gpp_index)
+			break;
+	}
+	if (entry == NULL) {
+		gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index);
+		return (ENOENT);
+	}
+
+	error = G_PART_MODIFY(table, entry, gpp);
+	if (error) {
+		gctl_error(req, "%d", error);
+		return (error);
+	}
+
+	if (!entry->gpe_created)
+		entry->gpe_modified = 1;
+
+	/* Provide feedback if so requested. */
+	if (gpp->gpp_parms & G_PART_PARM_OUTPUT) {
+		sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+		sbuf_printf(sb, "%s%s modified\n", gp->name,
+		    G_PART_NAME(table, entry, buf, sizeof(buf)));
+		sbuf_finish(sb);
+		gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
+		sbuf_delete(sb);
+	}
+	return (0);
+}
+
+static int
+g_part_ctl_move(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	gctl_error(req, "%d verb 'move'", ENOSYS);
+	return (ENOSYS);
+} 
+
+static int
+g_part_ctl_recover(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	gctl_error(req, "%d verb 'recover'", ENOSYS);
+	return (ENOSYS);
+}
+
+static int
+g_part_ctl_resize(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	gctl_error(req, "%d verb 'resize'", ENOSYS);
+	return (ENOSYS);
+} 
+
+static int
+g_part_ctl_undo(struct gctl_req *req, struct g_part_parms *gpp)
+{
+	struct g_consumer *cp;
+	struct g_provider *pp;
+	struct g_geom *gp;
+	struct g_part_entry *entry, *tmp;
+	struct g_part_table *table;
+	int error, reprobe;
+
+	gp = gpp->gpp_geom;
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name));
+	g_topology_assert();
+
+	table = gp->softc;
+	if (!table->gpt_opened) {
+		gctl_error(req, "%d", EPERM);
+		return (EPERM);
+	}
+
+	cp = LIST_FIRST(&gp->consumer);
+	LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) {
+		entry->gpe_modified = 0;
+		if (entry->gpe_created) {
+			pp = entry->gpe_pp;
+			pp->private = NULL;
+			entry->gpe_pp = NULL;
+			g_wither_provider(pp, ENXIO);
+			entry->gpe_deleted = 1;
+		}
+		if (entry->gpe_deleted) {
+			LIST_REMOVE(entry, gpe_entry);
+			g_free(entry);
+		}
+	}
+
+	g_topology_unlock();
+
+	reprobe = (table->gpt_scheme == &g_part_null_scheme ||
+	    table->gpt_created) ? 1 : 0;
+
+	if (reprobe) {
+		if (!LIST_EMPTY(&table->gpt_entry)) {
+			error = EBUSY;
+			goto fail;
+		}
+		error = g_part_probe(gp, cp, table->gpt_depth);
+		if (error) {
+			g_topology_lock();
+			g_access(cp, -1, -1, -1);
+			g_part_wither(gp, error);
+			return (0);
+		}
+		table = gp->softc;
+	}
+
+	error = G_PART_READ(table, cp);
+	if (error)
+		goto fail;
+
+	g_topology_lock();
+
+	LIST_FOREACH(entry, &table->gpt_entry, gpe_entry)
+		g_part_new_provider(gp, table, entry);
+
+	table->gpt_opened = 0;
+	g_access(cp, -1, -1, -1);
+	return (0);
+
+fail:
+	g_topology_lock();
+	gctl_error(req, "%d", error);
+	return (error);
+}
+
+static void
+g_part_wither(struct g_geom *gp, int error)
+{
+	struct g_part_entry *entry;
+	struct g_part_table *table;
+
+	table = gp->softc;
+	if (table != NULL) {
+		while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) {
+			LIST_REMOVE(entry, gpe_entry);
+			g_free(entry);
+		}
+		if (gp->softc != NULL) {
+			kobj_delete((kobj_t)gp->softc, M_GEOM);
+			gp->softc = NULL;
+		}
+	}
+	g_wither_geom(gp, error);
+}
+
+/*
+ * Class methods.
+ */
+
+static void
+g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb)
+{
+	struct g_part_parms gpp;
+	struct g_part_table *table;
+	struct gctl_req_arg *ap;
+	const char *p;
+	enum g_part_ctl ctlreq;
+	unsigned int i, mparms, oparms, parm;
+	int auto_commit, close_on_error;
+	int error, modifies;
+
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, verb));
+	g_topology_assert();
+
+	ctlreq = G_PART_CTL_NONE;
+	modifies = 1;
+	mparms = 0;
+	oparms = G_PART_PARM_FLAGS | G_PART_PARM_OUTPUT | G_PART_PARM_VERSION;
+	switch (*verb) {
+	case 'a':
+		if (!strcmp(verb, "add")) {
+			ctlreq = G_PART_CTL_ADD;
+			mparms |= G_PART_PARM_GEOM | G_PART_PARM_SIZE |
+			    G_PART_PARM_START | G_PART_PARM_TYPE;
+			oparms |= G_PART_PARM_INDEX | G_PART_PARM_LABEL;
+		}
+		break;
+	case 'c':
+		if (!strcmp(verb, "commit")) {
+			ctlreq = G_PART_CTL_COMMIT;
+			mparms |= G_PART_PARM_GEOM;
+			modifies = 0;
+		} else if (!strcmp(verb, "create")) {
+			ctlreq = G_PART_CTL_CREATE;
+			mparms |= G_PART_PARM_PROVIDER | G_PART_PARM_SCHEME;
+			oparms |= G_PART_PARM_ENTRIES;
+		}
+		break;
+	case 'd':
+		if (!strcmp(verb, "delete")) {
+			ctlreq = G_PART_CTL_DELETE;
+			mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX;
+		} else if (!strcmp(verb, "destroy")) {
+			ctlreq = G_PART_CTL_DESTROY;
+			mparms |= G_PART_PARM_GEOM;
+		}
+		break;
+	case 'm':
+		if (!strcmp(verb, "modify")) {
+			ctlreq = G_PART_CTL_MODIFY;
+			mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX;
+			oparms |= G_PART_PARM_LABEL | G_PART_PARM_TYPE;
+		} else if (!strcmp(verb, "move")) {
+			ctlreq = G_PART_CTL_MOVE;
+			mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX;
+		}
+		break;
+	case 'r':
+		if (!strcmp(verb, "recover")) {
+			ctlreq = G_PART_CTL_RECOVER;
+			mparms |= G_PART_PARM_GEOM;
+		} else if (!strcmp(verb, "resize")) {
+			ctlreq = G_PART_CTL_RESIZE;
+			mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX;
+		}
+		break;
+	case 'u':
+		if (!strcmp(verb, "undo")) {
+			ctlreq = G_PART_CTL_UNDO;
+			mparms |= G_PART_PARM_GEOM;
+			modifies = 0;
+		}
+		break;
+	}
+	if (ctlreq == G_PART_CTL_NONE) {
+		gctl_error(req, "%d verb '%s'", EINVAL, verb);
+		return;
+	}
+
+	bzero(&gpp, sizeof(gpp));
+	for (i = 0; i < req->narg; i++) {
+		ap = &req->arg[i];
+		parm = 0;
+		switch (ap->name[0]) {
+		case 'c':
+			if (!strcmp(ap->name, "class"))
+				continue;
+			break;
+		case 'e':
+			if (!strcmp(ap->name, "entries"))
+				parm = G_PART_PARM_ENTRIES;
+			break;
+		case 'f':
+			if (!strcmp(ap->name, "flags"))
+				parm = G_PART_PARM_FLAGS;
+			break;
+		case 'g':
+			if (!strcmp(ap->name, "geom"))
+				parm = G_PART_PARM_GEOM;
+			break;
+		case 'i':
+			if (!strcmp(ap->name, "index"))
+				parm = G_PART_PARM_INDEX;
+			break;
+		case 'l':
+			if (!strcmp(ap->name, "label"))
+				parm = G_PART_PARM_LABEL;
+			break;
+		case 'o':
+			if (!strcmp(ap->name, "output"))
+				parm = G_PART_PARM_OUTPUT;
+			break;
+		case 'p':
+			if (!strcmp(ap->name, "provider"))
+				parm = G_PART_PARM_PROVIDER;
+			break;
+		case 's':
+			if (!strcmp(ap->name, "scheme"))
+				parm = G_PART_PARM_SCHEME;
+			else if (!strcmp(ap->name, "size"))
+				parm = G_PART_PARM_SIZE;
+			else if (!strcmp(ap->name, "start"))
+				parm = G_PART_PARM_START;
+			break;
+		case 't':
+			if (!strcmp(ap->name, "type"))
+				parm = G_PART_PARM_TYPE;
+			break;
+		case 'v':
+			if (!strcmp(ap->name, "verb"))
+				continue;
+			else if (!strcmp(ap->name, "version"))
+				parm = G_PART_PARM_VERSION;
+			break;
+		}
+		if ((parm & (mparms | oparms)) == 0) {
+			gctl_error(req, "%d param '%s'", EINVAL, ap->name);
+			return;
+		}
+		p = gctl_get_asciiparam(req, ap->name);
+		if (p == NULL) {
+			gctl_error(req, "%d param '%s'", ENOATTR, ap->name);
+			return;
+		}
+		switch (parm) {
+		case G_PART_PARM_ENTRIES:
+			error = g_part_parm_uint(p, &gpp.gpp_entries);
+			break;
+		case G_PART_PARM_FLAGS:
+			if (p[0] == '\0')
+				continue;
+			error = g_part_parm_str(p, &gpp.gpp_flags);
+			break;
+		case G_PART_PARM_GEOM:
+			error = g_part_parm_geom(p, &gpp.gpp_geom);
+			break;
+		case G_PART_PARM_INDEX:
+			error = g_part_parm_uint(p, &gpp.gpp_index);
+			break;
+		case G_PART_PARM_LABEL:
+			/* An empty label is always valid. */
+			gpp.gpp_label = p;
+			error = 0;
+			break;
+		case G_PART_PARM_OUTPUT:
+			error = 0;	/* Write-only parameter */
+			break;
+		case G_PART_PARM_PROVIDER:
+			error = g_part_parm_provider(p, &gpp.gpp_provider);
+			break;
+		case G_PART_PARM_SCHEME:
+			error = g_part_parm_scheme(p, &gpp.gpp_scheme);
+			break;
+		case G_PART_PARM_SIZE:
+			error = g_part_parm_quad(p, &gpp.gpp_size);
+			break;
+		case G_PART_PARM_START:
+			error = g_part_parm_quad(p, &gpp.gpp_start);
+			break;
+		case G_PART_PARM_TYPE:
+			error = g_part_parm_str(p, &gpp.gpp_type);
+			break;
+		case G_PART_PARM_VERSION:
+			error = g_part_parm_uint(p, &gpp.gpp_version);
+			break;
+		default:
+			error = EDOOFUS;
+			break;
+		}
+		if (error) {
+			gctl_error(req, "%d %s '%s'", error, ap->name, p);
+			return;
+		}
+		gpp.gpp_parms |= parm;
+	}
+	if ((gpp.gpp_parms & mparms) != mparms) {
+		parm = mparms - (gpp.gpp_parms & mparms);
+		gctl_error(req, "%d param '%x'", ENOATTR, parm);
+		return;
+	}
+
+	/* Obtain permissions if possible/necessary. */
+	close_on_error = 0;
+	table = NULL;	/* Suppress uninit. warning. */
+	if (modifies && (gpp.gpp_parms & G_PART_PARM_GEOM)) {
+		table = gpp.gpp_geom->softc;
+		if (table != NULL && !table->gpt_opened) {
+			error = g_access(LIST_FIRST(&gpp.gpp_geom->consumer),
+			    1, 1, 1);
+			if (error) {
+				gctl_error(req, "%d geom '%s'", error,
+				    gpp.gpp_geom->name);
+				return;
+			}
+			table->gpt_opened = 1;
+			close_on_error = 1;
+		}
+	}
+
+	error = EDOOFUS;	/* Prevent bogus  uninit. warning. */
+	switch (ctlreq) {
+	case G_PART_CTL_NONE:
+		panic("%s", __func__);
+	case G_PART_CTL_ADD:
+		error = g_part_ctl_add(req, &gpp);
+		break;
+	case G_PART_CTL_COMMIT:
+		error = g_part_ctl_commit(req, &gpp);
+		break;
+	case G_PART_CTL_CREATE:
+		error = g_part_ctl_create(req, &gpp);
+		break;
+	case G_PART_CTL_DELETE:
+		error = g_part_ctl_delete(req, &gpp);
+		break;
+	case G_PART_CTL_DESTROY:
+		error = g_part_ctl_destroy(req, &gpp);
+		break;
+	case G_PART_CTL_MODIFY:
+		error = g_part_ctl_modify(req, &gpp);
+		break;
+	case G_PART_CTL_MOVE:
+		error = g_part_ctl_move(req, &gpp);
+		break;
+	case G_PART_CTL_RECOVER:
+		error = g_part_ctl_recover(req, &gpp);
+		break;
+	case G_PART_CTL_RESIZE:
+		error = g_part_ctl_resize(req, &gpp);
+		break;
+	case G_PART_CTL_UNDO:
+		error = g_part_ctl_undo(req, &gpp);
+		break;
+	}
+
+	/* Implement automatic commit. */
+	if (!error) {
+		auto_commit = (modifies &&
+		    (gpp.gpp_parms & G_PART_PARM_FLAGS) &&
+		    strchr(gpp.gpp_flags, 'C') != NULL) ? 1 : 0;
+		if (auto_commit) {
+			KASSERT(gpp.gpp_parms & G_PART_PARM_GEOM, (__func__));
+			error = g_part_ctl_commit(req, &gpp);
+		}
+	}
+
+	if (error && close_on_error) {
+		g_access(LIST_FIRST(&gpp.gpp_geom->consumer), -1, -1, -1);
+		table->gpt_opened = 0;
+	}
+}
+
+static int
+g_part_destroy_geom(struct gctl_req *req, struct g_class *mp,
+    struct g_geom *gp)
+{
+
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, gp->name));
+	g_topology_assert();
+
+	g_part_wither(gp, EINVAL);
+	return (0);
+}
+
+static struct g_geom *
+g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+	struct g_consumer *cp;
+	struct g_geom *gp;
+	struct g_part_entry *entry;
+	struct g_part_table *table;
+	int attr, depth;
+	int error;
+
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name));
+	g_topology_assert();
+
+	/*
+	 * Create a GEOM with consumer and hook it up to the provider.
+	 * With that we become part of the topology. Optain read access
+	 * to the provider.
+	 */
+	gp = g_new_geomf(mp, "%s", pp->name);
+	cp = g_new_consumer(gp);
+	error = g_attach(cp, pp);
+	if (error == 0)
+		error = g_access(cp, 1, 0, 0);
+	if (error != 0) {
+		g_part_wither(gp, error);
+		return (NULL);
+	}
+
+	g_topology_unlock();
+
+	/*
+	 * Short-circuit the whole probing galore when there's no
+	 * media present.
+	 */
+	if (pp->mediasize == 0 || pp->sectorsize == 0) {
+		error = ENODEV;
+		goto fail;
+	}
+
+	/* Make sure we can nest and if so, determine our depth. */
+	error = g_getattr("PART::isleaf", cp, &attr);
+	if (!error && attr) {
+		error = ENODEV;
+		goto fail;
+	}
+	error = g_getattr("PART::depth", cp, &attr);
+	depth = (!error) ? attr + 1 : 0;
+
+	error = g_part_probe(gp, cp, depth);
+	if (error)
+		goto fail;
+
+	table = gp->softc;
+	
+	/*
+	 * Synthesize a disk geometry. Some partitioning schemes
+	 * depend on it and since some file systems need it even
+	 * when the partitition scheme doesn't, we do it here in
+	 * scheme-independent code.
+	 */
+	g_part_geometry(table, cp, pp->mediasize / pp->sectorsize);
+
+	error = G_PART_READ(table, cp);
+	if (error)
+		goto fail;
+
+	g_topology_lock();
+	LIST_FOREACH(entry, &table->gpt_entry, gpe_entry)
+		g_part_new_provider(gp, table, entry);
+
+	g_access(cp, -1, 0, 0);
+	return (gp);
+
+ fail:
+	g_topology_lock();
+	g_access(cp, -1, 0, 0);
+	g_part_wither(gp, error);
+	return (NULL);
+}
+
+/*
+ * Geom methods.
+ */
+
+static int
+g_part_access(struct g_provider *pp, int dr, int dw, int de)
+{
+	struct g_consumer *cp;
+
+	G_PART_TRACE((G_T_ACCESS, "%s(%s,%d,%d,%d)", __func__, pp->name, dr,
+	    dw, de));
+
+	cp = LIST_FIRST(&pp->geom->consumer);
+
+	/* We always gain write-exclusive access. */
+	return (g_access(cp, dr, dw, dw + de));
+}
+
+static void
+g_part_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
+    struct g_consumer *cp, struct g_provider *pp)
+{
+	char buf[64];
+	struct g_part_entry *entry;
+	struct g_part_table *table;
+
+	KASSERT(sb != NULL && gp != NULL, (__func__));
+	table = gp->softc;
+
+	if (indent == NULL) {
+		KASSERT(cp == NULL && pp != NULL, (__func__));
+		entry = pp->private;
+		if (entry == NULL)
+			return;
+		sbuf_printf(sb, " i %u o %ju ty %s", entry->gpe_index,
+		    (uintmax_t)entry->gpe_offset,
+		    G_PART_TYPE(table, entry, buf, sizeof(buf)));
+	} else if (cp != NULL) {	/* Consumer configuration. */
+		KASSERT(pp == NULL, (__func__));
+		/* none */
+	} else if (pp != NULL) {	/* Provider configuration. */
+		entry = pp->private;
+		if (entry == NULL)
+			return;
+		sbuf_printf(sb, "%s<index>%u</index>\n", indent,
+		    entry->gpe_index);
+		sbuf_printf(sb, "%s<type>%s</type>\n", indent,
+		    G_PART_TYPE(table, entry, buf, sizeof(buf)));
+		sbuf_printf(sb, "%s<offset>%ju</offset>\n", indent,
+		    (uintmax_t)entry->gpe_offset);
+		sbuf_printf(sb, "%s<length>%ju</length>\n", indent,
+		    (uintmax_t)pp->mediasize);
+		G_PART_DUMPCONF(table, entry, sb, indent);
+	} else {			/* Geom configuration. */
+		sbuf_printf(sb, "%s<scheme>%s</scheme>\n", indent,
+		    table->gpt_scheme->name);
+		sbuf_printf(sb, "%s<entries>%u</entries>\n", indent,
+		    table->gpt_entries);
+		sbuf_printf(sb, "%s<first>%ju</first>\n", indent,
+		    (uintmax_t)table->gpt_first);
+		sbuf_printf(sb, "%s<last>%ju</last>\n", indent,
+		    (uintmax_t)table->gpt_last);
+		sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n", indent,
+		    table->gpt_sectors);
+		sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n", indent,
+		    table->gpt_heads);
+		G_PART_DUMPCONF(table, NULL, sb, indent);
+	}
+}
+
+static void
+g_part_orphan(struct g_consumer *cp)
+{
+	struct g_provider *pp;
+
+	pp = cp->provider;
+	KASSERT(pp != NULL, (__func__));
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, pp->name));
+	g_topology_assert();
+
+	KASSERT(pp->error != 0, (__func__));
+	g_part_wither(cp->geom, pp->error);
+}
+
+static void
+g_part_spoiled(struct g_consumer *cp)
+{
+
+	G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name));
+	g_topology_assert();
+
+	g_part_wither(cp->geom, ENXIO);
+}
+
+static void
+g_part_start(struct bio *bp)
+{
+	struct bio *bp2;
+	struct g_consumer *cp;
+	struct g_geom *gp;
+	struct g_part_entry *entry;
+	struct g_part_table *table;
+	struct g_kerneldump *gkd;
+	struct g_provider *pp;
+
+	pp = bp->bio_to;
+	gp = pp->geom;
+	table = gp->softc;
+	cp = LIST_FIRST(&gp->consumer);
+
+	G_PART_TRACE((G_T_BIO, "%s: cmd=%d, provider=%s", __func__, bp->bio_cmd,
+	    pp->name));
+
+	entry = pp->private;
+	if (entry == NULL) {
+		g_io_deliver(bp, ENXIO);
+		return;
+	}
+
+	switch(bp->bio_cmd) {
+	case BIO_DELETE:
+	case BIO_READ:
+	case BIO_WRITE:
+		if (bp->bio_offset >= pp->mediasize) {
+			g_io_deliver(bp, EIO);
+			return;
+		}
+		bp2 = g_clone_bio(bp);
+		if (bp2 == NULL) {
+			g_io_deliver(bp, ENOMEM);
+			return;
+		}
+		if (bp2->bio_offset + bp2->bio_length > pp->mediasize)
+			bp2->bio_length = pp->mediasize - bp2->bio_offset;
+		bp2->bio_done = g_std_done;
+		bp2->bio_offset += entry->gpe_offset;
+		g_io_request(bp2, cp);
+		return;
+	case BIO_FLUSH:
+		break;
+	case BIO_GETATTR:
+		if (g_handleattr_int(bp, "GEOM::fwheads", table->gpt_heads))
+			return;
+		if (g_handleattr_int(bp, "GEOM::fwsectors", table->gpt_sectors))
+			return;
+		if (g_handleattr_int(bp, "PART::isleaf", table->gpt_isleaf))
+			return;
+		if (g_handleattr_int(bp, "PART::depth", table->gpt_depth))
+			return;
+		if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) {
+			/*
+			 * Check that the partition is suitable for kernel
+			 * dumps. Typically only swap partitions should be
+			 * used.
+			 */
+			if (!G_PART_DUMPTO(table, entry)) {
+				g_io_deliver(bp, ENXIO);
+				return;
+			}
+			gkd = (struct g_kerneldump *)bp->bio_data;
+			if (gkd->offset >= pp->mediasize) {
+				g_io_deliver(bp, EIO);
+				return;
+			}
+			if (gkd->offset + gkd->length > pp->mediasize)
+				gkd->length = pp->mediasize - gkd->offset;
+			gkd->offset += entry->gpe_offset;
+		}
+		break;
+	default:
+		g_io_deliver(bp, EOPNOTSUPP);
+		return;
+	}
+
+	bp2 = g_clone_bio(bp);
+	if (bp2 == NULL) {
+		g_io_deliver(bp, ENOMEM);
+		return;
+	}
+	bp2->bio_done = g_std_done;
+	g_io_request(bp2, cp);
+}
--- /dev/null
+++ sys/geom/part/g_part_apm.c
@@ -0,0 +1,436 @@
+/*-
+ * Copyright (c) 2006, 2007 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/geom/part/g_part_apm.c,v 1.3.2.1 2007/10/29 00:11:39 marcel Exp $");
+
+#include <sys/param.h>
+#include <sys/apm.h>
+#include <sys/bio.h>
+#include <sys/diskmbr.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/kobj.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <geom/geom.h>
+#include <geom/part/g_part.h>
+
+#include "g_part_if.h"
+
+struct g_part_apm_table {
+	struct g_part_table	base;
+	struct apm_ddr		ddr;
+	struct apm_ent		self;
+};
+
+struct g_part_apm_entry {
+	struct g_part_entry	base;
+	struct apm_ent		ent;
+};
+
+static int g_part_apm_add(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+static int g_part_apm_create(struct g_part_table *, struct g_part_parms *);
+static int g_part_apm_destroy(struct g_part_table *, struct g_part_parms *);
+static int g_part_apm_dumpto(struct g_part_table *, struct g_part_entry *);
+static int g_part_apm_modify(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+static char *g_part_apm_name(struct g_part_table *, struct g_part_entry *,
+    char *, size_t);
+static int g_part_apm_probe(struct g_part_table *, struct g_consumer *);
+static int g_part_apm_read(struct g_part_table *, struct g_consumer *);
+static const char *g_part_apm_type(struct g_part_table *, struct g_part_entry *,
+    char *, size_t);
+static int g_part_apm_write(struct g_part_table *, struct g_consumer *);
+
+static kobj_method_t g_part_apm_methods[] = {
+	KOBJMETHOD(g_part_add,		g_part_apm_add),
+	KOBJMETHOD(g_part_create,	g_part_apm_create),
+	KOBJMETHOD(g_part_destroy,	g_part_apm_destroy),
+	KOBJMETHOD(g_part_dumpto,	g_part_apm_dumpto),
+	KOBJMETHOD(g_part_modify,	g_part_apm_modify),
+	KOBJMETHOD(g_part_name,		g_part_apm_name),
+	KOBJMETHOD(g_part_probe,	g_part_apm_probe),
+	KOBJMETHOD(g_part_read,		g_part_apm_read),
+	KOBJMETHOD(g_part_type,		g_part_apm_type),
+	KOBJMETHOD(g_part_write,	g_part_apm_write),
+	{ 0, 0 }
+};
+
+static struct g_part_scheme g_part_apm_scheme = {
+	"APM",
+	g_part_apm_methods,
+	sizeof(struct g_part_apm_table),
+	.gps_entrysz = sizeof(struct g_part_apm_entry),
+	.gps_minent = 16,
+	.gps_maxent = INT_MAX,
+};
+G_PART_SCHEME_DECLARE(g_part_apm_scheme);
+
+static int
+apm_parse_type(const char *type, char *buf, size_t bufsz)
+{
+	const char *alias;
+
+	if (type[0] == '!') {
+		type++;
+		if (strlen(type) > bufsz)
+			return (EINVAL);
+		if (!strcmp(type, APM_ENT_TYPE_SELF) ||
+		    !strcmp(type, APM_ENT_TYPE_UNUSED))
+			return (EINVAL);
+		strncpy(buf, type, bufsz);
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD);
+	if (!strcasecmp(type, alias)) {
+		strcpy(buf, APM_ENT_TYPE_FREEBSD);
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP);
+	if (!strcasecmp(type, alias)) {
+		strcpy(buf, APM_ENT_TYPE_FREEBSD_SWAP);
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS);
+	if (!strcasecmp(type, alias)) {
+		strcpy(buf, APM_ENT_TYPE_FREEBSD_UFS);
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM);
+	if (!strcasecmp(type, alias)) {
+		strcpy(buf, APM_ENT_TYPE_FREEBSD_VINUM);
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS);
+	if (!strcasecmp(type, alias)) {
+		strcpy(buf, APM_ENT_TYPE_FREEBSD_ZFS);
+		return (0);
+	}
+	return (EINVAL);
+}
+
+static int
+apm_read_ent(struct g_consumer *cp, uint32_t blk, struct apm_ent *ent)
+{
+	struct g_provider *pp;
+	char *buf;
+	int error;
+
+	pp = cp->provider;
+	buf = g_read_data(cp, pp->sectorsize * blk, pp->sectorsize, &error);
+	if (buf == NULL)
+		return (error);
+	ent->ent_sig = be16dec(buf);
+	ent->ent_pmblkcnt = be32dec(buf + 4);
+	ent->ent_start = be32dec(buf + 8);
+	ent->ent_size = be32dec(buf + 12);
+	bcopy(buf + 16, ent->ent_name, sizeof(ent->ent_name));
+	bcopy(buf + 48, ent->ent_type, sizeof(ent->ent_type));
+	g_free(buf);
+	return (0);
+}
+
+static int
+g_part_apm_add(struct g_part_table *basetable, struct g_part_entry *baseentry, 
+    struct g_part_parms *gpp)
+{
+	struct g_part_apm_entry *entry;
+	struct g_part_apm_table *table;
+	int error;
+
+	entry = (struct g_part_apm_entry *)baseentry;
+	table = (struct g_part_apm_table *)basetable;
+	entry->ent.ent_sig = APM_ENT_SIG;
+	entry->ent.ent_pmblkcnt = table->self.ent_pmblkcnt;
+	entry->ent.ent_start = gpp->gpp_start;
+	entry->ent.ent_size = gpp->gpp_size;
+	if (baseentry->gpe_deleted) {
+		bzero(entry->ent.ent_type, sizeof(entry->ent.ent_type));
+		bzero(entry->ent.ent_name, sizeof(entry->ent.ent_name));
+	}
+	error = apm_parse_type(gpp->gpp_type, entry->ent.ent_type,
+	    sizeof(entry->ent.ent_type));
+	if (error)
+		return (error);
+	if (gpp->gpp_parms & G_PART_PARM_LABEL) {
+		if (strlen(gpp->gpp_label) > sizeof(entry->ent.ent_name))
+			return (EINVAL);
+		strncpy(entry->ent.ent_name, gpp->gpp_label,
+		    sizeof(entry->ent.ent_name));
+	}
+	return (0);
+}
+
+static int
+g_part_apm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
+{
+	struct g_provider *pp;
+	struct g_part_apm_table *table;
+
+	table = (struct g_part_apm_table *)basetable;
+	pp = gpp->gpp_provider;
+	if (pp->sectorsize != 512 ||
+	    pp->mediasize < (2 + 2 * basetable->gpt_entries) * pp->sectorsize)
+		return (ENOSPC);
+
+	basetable->gpt_first = 2 + basetable->gpt_entries;
+	basetable->gpt_last = (pp->mediasize / pp->sectorsize) - 1;
+
+	table->ddr.ddr_sig = APM_DDR_SIG;
+	table->ddr.ddr_blksize = pp->sectorsize;
+	table->ddr.ddr_blkcount = basetable->gpt_last + 1;
+
+	table->self.ent_sig = APM_ENT_SIG;
+	table->self.ent_pmblkcnt = basetable->gpt_entries + 1;
+	table->self.ent_start = 1;
+	table->self.ent_size = table->self.ent_pmblkcnt;
+	strcpy(table->self.ent_name, "Apple");
+	strcpy(table->self.ent_type, APM_ENT_TYPE_SELF);
+	return (0);
+}
+
+static int
+g_part_apm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
+{
+
+	/* Wipe the first 2 sectors to clear the partitioning. */
+	basetable->gpt_smhead |= 3;
+	return (0);
+}
+
+static int
+g_part_apm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
+{
+	struct g_part_apm_entry *entry;
+
+	entry = (struct g_part_apm_entry *)baseentry;
+	return ((!strcmp(entry->ent.ent_type, APM_ENT_TYPE_FREEBSD_SWAP))
+	    ? 1 : 0);
+}
+
+static int
+g_part_apm_modify(struct g_part_table *basetable,
+    struct g_part_entry *baseentry, struct g_part_parms *gpp)
+{
+	struct g_part_apm_entry *entry;
+	int error;
+
+	entry = (struct g_part_apm_entry *)baseentry;
+	if (gpp->gpp_parms & G_PART_PARM_LABEL) {
+		if (strlen(gpp->gpp_label) > sizeof(entry->ent.ent_name))
+			return (EINVAL);
+	}
+	if (gpp->gpp_parms & G_PART_PARM_TYPE) {
+		error = apm_parse_type(gpp->gpp_type, entry->ent.ent_type,
+		    sizeof(entry->ent.ent_type));
+		if (error)
+			return (error);
+	}
+	if (gpp->gpp_parms & G_PART_PARM_LABEL) {
+		strncpy(entry->ent.ent_name, gpp->gpp_label,
+		    sizeof(entry->ent.ent_name));
+	}
+	return (0);
+}
+
+static char *
+g_part_apm_name(struct g_part_table *table, struct g_part_entry *baseentry,
+    char *buf, size_t bufsz)
+{
+
+	snprintf(buf, bufsz, "s%d", baseentry->gpe_index + 1);
+	return (buf);
+}
+
+static int
+g_part_apm_probe(struct g_part_table *basetable, struct g_consumer *cp)
+{
+	struct g_provider *pp;
+	struct g_part_apm_table *table;
+	char *buf;
+	int error;
+
+	/* We don't nest, which means that our depth should be 0. */
+	if (basetable->gpt_depth != 0)
+		return (ENXIO);
+
+	table = (struct g_part_apm_table *)basetable;
+	pp = cp->provider;
+
+	/* Sanity-check the provider. */
+	if (pp->mediasize < 4 * pp->sectorsize)
+		return (ENOSPC);
+
+	/* Check that there's a Driver Descriptor Record (DDR). */
+	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
+	if (buf == NULL)
+		return (error);
+	table->ddr.ddr_sig = be16dec(buf);
+	table->ddr.ddr_blksize = be16dec(buf + 2);
+	table->ddr.ddr_blkcount = be32dec(buf + 4);
+	g_free(buf);
+	if (table->ddr.ddr_sig != APM_DDR_SIG)
+		return (ENXIO);
+	if (table->ddr.ddr_blksize != pp->sectorsize)
+		return (ENXIO);
+
+	/* Check that there's a Partition Map. */
+	error = apm_read_ent(cp, 1, &table->self);
+	if (error)
+		return (error);
+	if (table->self.ent_sig != APM_ENT_SIG)
+		return (ENXIO);
+	if (strcmp(table->self.ent_type, APM_ENT_TYPE_SELF))
+		return (ENXIO);
+	if (table->self.ent_pmblkcnt >= table->ddr.ddr_blkcount)
+		return (ENXIO);
+	return (G_PART_PROBE_PRI_NORM);
+}
+
+static int
+g_part_apm_read(struct g_part_table *basetable, struct g_consumer *cp)
+{
+	struct apm_ent ent;
+	struct g_part_apm_entry *entry;
+	struct g_part_apm_table *table;
+	int error, index;
+
+	table = (struct g_part_apm_table *)basetable;
+
+	basetable->gpt_first = table->self.ent_pmblkcnt + 1;
+	basetable->gpt_last = table->ddr.ddr_blkcount - 1;
+	basetable->gpt_entries = table->self.ent_pmblkcnt - 1;
+
+	for (index = table->self.ent_pmblkcnt - 1; index > 0; index--) {
+		error = apm_read_ent(cp, index + 1, &ent);
+		if (error)
+			continue;
+		if (!strcmp(ent.ent_type, APM_ENT_TYPE_UNUSED))
+			continue;
+		entry = (struct g_part_apm_entry *)g_part_new_entry(basetable,
+		    index, ent.ent_start, ent.ent_start + ent.ent_size - 1);
+		entry->ent = ent;
+	}
+
+	return (0);
+}
+
+static const char *
+g_part_apm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
+    char *buf, size_t bufsz)
+{
+	struct g_part_apm_entry *entry;
+	const char *type;
+	size_t len;
+
+	entry = (struct g_part_apm_entry *)baseentry;
+	type = entry->ent.ent_type;
+	if (!strcmp(type, APM_ENT_TYPE_FREEBSD))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD));
+	if (!strcmp(type, APM_ENT_TYPE_FREEBSD_SWAP))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP));
+	if (!strcmp(type, APM_ENT_TYPE_FREEBSD_UFS))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS));
+	if (!strcmp(type, APM_ENT_TYPE_FREEBSD_VINUM))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM));
+	if (!strcmp(type, APM_ENT_TYPE_FREEBSD_ZFS))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS));
+	buf[0] = '!';
+	len = MIN(sizeof(entry->ent.ent_type), bufsz - 2);
+	bcopy(type, buf + 1, len);
+	buf[len + 1] = '\0';
+	return (buf);
+}
+
+static int
+g_part_apm_write(struct g_part_table *basetable, struct g_consumer *cp)
+{
+	char buf[512];
+	struct g_part_entry *baseentry;
+	struct g_part_apm_entry *entry;
+	struct g_part_apm_table *table;
+	int error, index;
+
+	table = (struct g_part_apm_table *)basetable;
+	bzero(buf, sizeof(buf));
+
+	/* Write the DDR and 'self' entry only when we're newly created. */
+	if (basetable->gpt_created) {
+		be16enc(buf, table->ddr.ddr_sig);
+		be16enc(buf + 2, table->ddr.ddr_blksize);
+		be32enc(buf + 4, table->ddr.ddr_blkcount);
+		error = g_write_data(cp, 0, buf, sizeof(buf));
+		if (error)
+			return (error);
+	}
+
+	be16enc(buf, table->self.ent_sig);
+	be16enc(buf + 2, 0);
+	be32enc(buf + 4, table->self.ent_pmblkcnt);
+
+	if (basetable->gpt_created) {
+		be32enc(buf + 8, table->self.ent_start);
+		be32enc(buf + 12, table->self.ent_size);
+		bcopy(table->self.ent_name, buf + 16,
+		    sizeof(table->self.ent_name));
+		bcopy(table->self.ent_type, buf + 48,
+		    sizeof(table->self.ent_type));
+		error = g_write_data(cp, 512, buf, sizeof(buf));
+		if (error)
+			return (error);
+	}
+
+	baseentry = LIST_FIRST(&basetable->gpt_entry);
+	for (index = 1; index <= basetable->gpt_entries; index++) {
+		entry = (baseentry != NULL && index == baseentry->gpe_index)
+		    ? (struct g_part_apm_entry *)baseentry : NULL;
+		if (entry != NULL && !baseentry->gpe_deleted) {
+			be32enc(buf + 8, entry->ent.ent_start);
+			be32enc(buf + 12, entry->ent.ent_size);
+			bcopy(entry->ent.ent_name, buf + 16,
+			    sizeof(entry->ent.ent_name));
+			bcopy(entry->ent.ent_type, buf + 48,
+			    sizeof(entry->ent.ent_type));
+		} else {
+			bzero(buf + 8, 4 + 4 + 32 + 32);
+			strcpy(buf + 48, APM_ENT_TYPE_UNUSED);
+		}
+		error = g_write_data(cp, (index + 1) * 512, buf, sizeof(buf));
+		if (error)
+			return (error);
+		if (entry != NULL)
+			baseentry = LIST_NEXT(baseentry, gpe_entry);
+	}
+
+	return (0);
+}
--- /dev/null
+++ sys/geom/part/g_part.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 2006, 2007 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/geom/part/g_part.h,v 1.3.2.1 2007/10/29 00:11:39 marcel Exp $
+ */
+
+#ifndef _GEOM_PART_H_
+#define	_GEOM_PART_H_
+
+#define	G_PART_TRACE(args)	g_trace args
+
+#define G_PART_PROBE_PRI_LOW	-10
+#define	G_PART_PROBE_PRI_NORM	-5
+#define	G_PART_PROBE_PRI_HIGH	0
+
+enum g_part_alias {
+	G_PART_ALIAS_EFI,		/* A EFI system partition entry. */
+	G_PART_ALIAS_FREEBSD,		/* A BSD labeled partition entry. */
+	G_PART_ALIAS_FREEBSD_SWAP,	/* A swap partition entry. */
+	G_PART_ALIAS_FREEBSD_UFS,	/* A UFS/UFS2 file system entry. */
+	G_PART_ALIAS_FREEBSD_VINUM,	/* A Vinum partition entry. */
+	G_PART_ALIAS_FREEBSD_ZFS,	/* A ZFS file system entry. */
+	G_PART_ALIAS_MBR,		/* A MBR (extended) partition entry. */
+	/* Keep the following last */
+	G_PART_ALIAS_COUNT
+};
+
+const char *g_part_alias_name(enum g_part_alias);
+
+/* G_PART scheme (KOBJ class). */
+struct g_part_scheme {
+	KOBJ_CLASS_FIELDS;
+	size_t		gps_entrysz;
+	int		gps_minent;
+	int		gps_maxent;
+};
+#define	G_PART_SCHEME_DECLARE(s)	DATA_SET(g_part_scheme_set, s)
+
+struct g_part_entry {
+	LIST_ENTRY(g_part_entry) gpe_entry;
+	struct g_provider *gpe_pp;	/* Corresponding provider. */
+	off_t		gpe_offset;	/* Byte offset. */
+	quad_t		gpe_start;	/* First LBA of partition. */
+	quad_t		gpe_end;	/* Last LBA of partition. */
+	int		gpe_index;
+	int		gpe_created:1;	/* Entry is newly created. */
+	int		gpe_deleted:1;	/* Entry has been deleted. */
+	int		gpe_modified:1;	/* Entry has been modified. */
+};
+
+/* G_PART table (KOBJ instance). */
+struct g_part_table {
+	KOBJ_FIELDS;
+	struct g_part_scheme *gpt_scheme;
+	struct g_geom	*gpt_gp;
+	LIST_HEAD(, g_part_entry) gpt_entry;
+	quad_t		gpt_first;	/* First allocatable LBA */
+	quad_t		gpt_last;	/* Last allocatable LBA */
+	int		gpt_entries;
+	/*
+	 * gpt_smhead and gpt_smtail are bitmaps representing the first
+	 * 32 sectors on the disk (gpt_smhead) and the last 32 sectors
+	 * on the disk (gpt_smtail). These maps are used by the commit
+	 * verb to clear sectors previously used by a scheme after the
+	 * partitioning scheme has been destroyed.
+	 */
+	uint32_t	gpt_smhead;
+	uint32_t	gpt_smtail;
+	/*
+	 * gpt_sectors and gpt_heads are the fixed or synchesized number
+	 * of sectors per track and heads (resp) that make up a disks
+	 * geometry. This is to support partitioning schemes as well as
+	 * file systems that work on a geometry. The MBR scheme and the
+	 * MS-DOS (FAT) file system come to mind.
+	 * We keep track of whether the geometry is fixed or synchesized
+	 * so that a partitioning scheme can correct the synthesized
+	 * geometry, based on the on-disk metadata.
+	 */
+	uint32_t	gpt_sectors;
+	uint32_t	gpt_heads;
+
+	int		gpt_depth;	/* Sub-partitioning level. */
+	int		gpt_isleaf:1;	/* Cannot be sub-partitioned. */
+	int		gpt_created:1;	/* Newly created. */
+	int		gpt_modified:1;	/* Table changes have been made. */
+	int		gpt_opened:1;	/* Permissions obtained. */
+	int		gpt_fixgeom:1;	/* Geometry is fixed. */
+};
+
+struct g_part_entry *g_part_new_entry(struct g_part_table *, int, quad_t,
+    quad_t);
+
+/* G_PART ctlreq parameters. */
+#define	G_PART_PARM_ENTRIES	0x0001
+#define	G_PART_PARM_FLAGS	0x0002
+#define	G_PART_PARM_GEOM	0x0004
+#define	G_PART_PARM_INDEX	0x0008
+#define	G_PART_PARM_LABEL	0x0010
+#define	G_PART_PARM_OUTPUT	0x0020
+#define	G_PART_PARM_PROVIDER	0x0040
+#define	G_PART_PARM_SCHEME	0x0080
+#define	G_PART_PARM_SIZE	0x0100
+#define	G_PART_PARM_START	0x0200
+#define	G_PART_PARM_TYPE	0x0400
+#define	G_PART_PARM_VERSION	0x0800
+
+struct g_part_parms {
+	unsigned int	gpp_parms;
+	unsigned int	gpp_entries;
+	const char	*gpp_flags;
+	struct g_geom	*gpp_geom;
+	unsigned int	gpp_index;
+	const char	*gpp_label;
+	struct g_provider *gpp_provider;
+	struct g_part_scheme *gpp_scheme;
+	quad_t		gpp_size;
+	quad_t		gpp_start;
+	const char	*gpp_type;
+	unsigned int	gpp_version;
+};
+
+void g_part_geometry_heads(off_t, u_int, off_t *, u_int *);
+
+#endif /* !_GEOM_PART_H_ */
--- /dev/null
+++ sys/geom/part/g_part_if.m
@@ -0,0 +1,118 @@
+#-
+# Copyright (c) 2006, 2007 Marcel Moolenaar
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# $FreeBSD: src/sys/geom/part/g_part_if.m,v 1.2 2007/02/08 04:02:56 rodrigc Exp $
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sbuf.h>
+#include <sys/bus.h>
+#include <machine/bus.h>
+#include <sys/systm.h>
+#include <geom/geom.h>
+#include <geom/part/g_part.h>
+
+# The G_PART scheme interface.
+
+INTERFACE g_part;
+
+# add() - scheme specific processing for the add verb.
+METHOD int add {
+	struct g_part_table *table;
+	struct g_part_entry *entry;
+	struct g_part_parms *gpp;
+};
+
+# create() - scheme specific processing for the create verb.
+METHOD int create {
+	struct g_part_table *table;
+	struct g_part_parms *gpp;
+};
+
+# destroy() - scheme specific processing for the destroy verb.
+METHOD int destroy {
+	struct g_part_table *table;
+	struct g_part_parms *gpp;
+};
+
+# dumpconf()
+METHOD void dumpconf {
+	struct g_part_table *table;
+	struct g_part_entry *entry;
+	struct sbuf *sb;
+	const char *indent;
+};
+
+# dumpto() - return whether the partiton can be used for kernel dumps.
+METHOD int dumpto {
+	struct g_part_table *table;
+	struct g_part_entry *entry;
+};
+
+# modify() - scheme specific processing for the modify verb.
+METHOD int modify {
+	struct g_part_table *table;
+	struct g_part_entry *entry;
+	struct g_part_parms *gpp;
+};
+
+# name() - return the name of the given partition entry.
+# Typical names are "p1", "s0" or "c".
+METHOD const char * name {
+	struct g_part_table *table;
+	struct g_part_entry *entry;
+	char *buf;
+	size_t bufsz;
+};
+
+# probe() - probe the provider attached to the given consumer for the
+# existence of the scheme implemented by the G_PART interface handler.
+METHOD int probe {
+	struct g_part_table *table;
+	struct g_consumer *cp;
+};
+
+# read() - read the on-disk partition table into memory.
+METHOD int read {
+	struct g_part_table *table;
+	struct g_consumer *cp;
+};
+
+# type() - return a string representation of the partition type.
+# Preferrably, the alias names.
+METHOD const char * type {
+        struct g_part_table *table;
+        struct g_part_entry *entry;
+        char *buf;
+        size_t bufsz;
+};
+
+# write() - write the in-memory partition table to disk.
+METHOD int write {
+	struct g_part_table *table;
+	struct g_consumer *cp;
+};
--- /dev/null
+++ sys/geom/part/g_part_gpt.c
@@ -0,0 +1,766 @@
+/*-
+ * Copyright (c) 2002, 2005, 2006, 2007 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/geom/part/g_part_gpt.c,v 1.3.2.1 2007/10/29 00:11:39 marcel Exp $");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/diskmbr.h>
+#include <sys/endian.h>
+#include <sys/gpt.h>
+#include <sys/kernel.h>
+#include <sys/kobj.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <sys/uuid.h>
+#include <geom/geom.h>
+#include <geom/part/g_part.h>
+
+#include "g_part_if.h"
+
+CTASSERT(offsetof(struct gpt_hdr, padding) == 92);
+CTASSERT(sizeof(struct gpt_ent) == 128);
+
+#define	EQUUID(a,b)	(memcmp(a, b, sizeof(struct uuid)) == 0)
+
+enum gpt_elt {
+	GPT_ELT_PRIHDR,
+	GPT_ELT_PRITBL,
+	GPT_ELT_SECHDR,
+	GPT_ELT_SECTBL,
+	GPT_ELT_COUNT
+};
+
+enum gpt_state {
+	GPT_STATE_UNKNOWN,	/* Not determined. */
+	GPT_STATE_MISSING,	/* No signature found. */
+	GPT_STATE_CORRUPT,	/* Checksum mismatch. */
+	GPT_STATE_INVALID,	/* Nonconformant/invalid. */
+	GPT_STATE_OK		/* Perfectly fine. */
+};
+
+struct g_part_gpt_table {
+	struct g_part_table	base;
+	struct gpt_hdr		hdr;
+	quad_t			lba[GPT_ELT_COUNT];
+	enum gpt_state		state[GPT_ELT_COUNT];
+};
+
+struct g_part_gpt_entry {
+	struct g_part_entry	base;
+	struct gpt_ent		ent;
+};
+
+static int g_part_gpt_add(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+static int g_part_gpt_create(struct g_part_table *, struct g_part_parms *);
+static int g_part_gpt_destroy(struct g_part_table *, struct g_part_parms *);
+static int g_part_gpt_dumpto(struct g_part_table *, struct g_part_entry *);
+static int g_part_gpt_modify(struct g_part_table *, struct g_part_entry *,  
+    struct g_part_parms *);
+static char *g_part_gpt_name(struct g_part_table *, struct g_part_entry *,
+    char *, size_t);
+static int g_part_gpt_probe(struct g_part_table *, struct g_consumer *);
+static int g_part_gpt_read(struct g_part_table *, struct g_consumer *);
+static const char *g_part_gpt_type(struct g_part_table *, struct g_part_entry *,
+    char *, size_t);
+static int g_part_gpt_write(struct g_part_table *, struct g_consumer *);
+
+static kobj_method_t g_part_gpt_methods[] = {
+	KOBJMETHOD(g_part_add,		g_part_gpt_add),
+	KOBJMETHOD(g_part_create,	g_part_gpt_create),
+	KOBJMETHOD(g_part_destroy,	g_part_gpt_destroy),
+	KOBJMETHOD(g_part_dumpto,	g_part_gpt_dumpto),
+	KOBJMETHOD(g_part_modify,	g_part_gpt_modify),
+	KOBJMETHOD(g_part_name,		g_part_gpt_name),
+	KOBJMETHOD(g_part_probe,	g_part_gpt_probe),
+	KOBJMETHOD(g_part_read,		g_part_gpt_read),
+	KOBJMETHOD(g_part_type,		g_part_gpt_type),
+	KOBJMETHOD(g_part_write,	g_part_gpt_write),
+	{ 0, 0 }
+};
+
+static struct g_part_scheme g_part_gpt_scheme = {
+	"GPT",
+	g_part_gpt_methods,
+	sizeof(struct g_part_gpt_table),
+	.gps_entrysz = sizeof(struct g_part_gpt_entry),
+	.gps_minent = 128,
+	.gps_maxent = INT_MAX,
+};
+G_PART_SCHEME_DECLARE(g_part_gpt_scheme);
+
+static struct uuid gpt_uuid_efi = GPT_ENT_TYPE_EFI;
+static struct uuid gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD;
+static struct uuid gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
+static struct uuid gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS;
+static struct uuid gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM;
+static struct uuid gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS;
+static struct uuid gpt_uuid_linux_swap = GPT_ENT_TYPE_LINUX_SWAP;
+static struct uuid gpt_uuid_mbr = GPT_ENT_TYPE_MBR;
+static struct uuid gpt_uuid_unused = GPT_ENT_TYPE_UNUSED;
+
+static void
+gpt_read_hdr(struct g_part_gpt_table *table, struct g_consumer *cp,
+    enum gpt_elt elt, struct gpt_hdr *hdr)
+{
+	struct uuid uuid;
+	struct g_provider *pp;
+	char *buf;
+	quad_t lba, last;
+	int error;
+	uint32_t crc, sz;
+
+	pp = cp->provider;
+	last = (pp->mediasize / pp->sectorsize) - 1;
+	table->lba[elt] = (elt == GPT_ELT_PRIHDR) ? 1 : last;
+	table->state[elt] = GPT_STATE_MISSING;
+	buf = g_read_data(cp, table->lba[elt] * pp->sectorsize, pp->sectorsize,
+	    &error);
+	if (buf == NULL)
+		return;
+	bcopy(buf, hdr, sizeof(*hdr));
+	if (memcmp(hdr->hdr_sig, GPT_HDR_SIG, sizeof(hdr->hdr_sig)) != 0)
+		return;
+
+	table->state[elt] = GPT_STATE_CORRUPT;
+	sz = le32toh(hdr->hdr_size);
+	if (sz < 92 || sz > pp->sectorsize)
+		return;
+	crc = le32toh(hdr->hdr_crc_self);
+	hdr->hdr_crc_self = 0;
+	if (crc32(hdr, sz) != crc)
+		return;
+	hdr->hdr_size = sz;
+	hdr->hdr_crc_self = crc;
+
+	table->state[elt] = GPT_STATE_INVALID;
+	hdr->hdr_revision = le32toh(hdr->hdr_revision);
+	if (hdr->hdr_revision < 0x00010000)
+		return;
+	hdr->hdr_lba_self = le64toh(hdr->hdr_lba_self);
+	if (hdr->hdr_lba_self != table->lba[elt])
+		return;
+	hdr->hdr_lba_alt = le64toh(hdr->hdr_lba_alt);
+
+	/* Check the managed area. */
+	hdr->hdr_lba_start = le64toh(hdr->hdr_lba_start);
+	if (hdr->hdr_lba_start < 2 || hdr->hdr_lba_start >= last)
+		return;
+	hdr->hdr_lba_end = le64toh(hdr->hdr_lba_end);
+	if (hdr->hdr_lba_end < hdr->hdr_lba_start || hdr->hdr_lba_end >= last)
+		return;
+
+	/* Check the table location and size of the table. */
+	hdr->hdr_entries = le32toh(hdr->hdr_entries);
+	hdr->hdr_entsz = le32toh(hdr->hdr_entsz);
+	if (hdr->hdr_entries == 0 || hdr->hdr_entsz < 128 ||
+	    (hdr->hdr_entsz & 7) != 0)
+		return;
+	hdr->hdr_lba_table = le64toh(hdr->hdr_lba_table);
+	if (hdr->hdr_lba_table < 2 || hdr->hdr_lba_table >= last)
+		return;
+	if (hdr->hdr_lba_table >= hdr->hdr_lba_start &&
+	    hdr->hdr_lba_table <= hdr->hdr_lba_end)
+		return;
+	lba = hdr->hdr_lba_table +
+	    (hdr->hdr_entries * hdr->hdr_entsz + pp->sectorsize - 1) /
+	    pp->sectorsize - 1;
+	if (lba >= last)
+		return;
+	if (lba >= hdr->hdr_lba_start && lba <= hdr->hdr_lba_end)
+		return;
+
+	table->state[elt] = GPT_STATE_OK;
+	le_uuid_dec(&hdr->hdr_uuid, &uuid);
+	hdr->hdr_uuid = uuid;
+	hdr->hdr_crc_table = le32toh(hdr->hdr_crc_table);
+}
+
+static struct gpt_ent *
+gpt_read_tbl(struct g_part_gpt_table *table, struct g_consumer *cp,
+    enum gpt_elt elt, struct gpt_hdr *hdr)
+{
+	struct g_provider *pp;
+	struct gpt_ent *ent, *tbl;
+	char *buf, *p;
+	unsigned int idx, sectors, tblsz;
+	int error;
+	uint16_t ch;
+
+	pp = cp->provider;
+	table->lba[elt] = hdr->hdr_lba_table;
+
+	table->state[elt] = GPT_STATE_MISSING;
+	tblsz = hdr->hdr_entries * hdr->hdr_entsz;
+	sectors = (tblsz + pp->sectorsize - 1) / pp->sectorsize;
+	buf = g_read_data(cp, table->lba[elt] * pp->sectorsize, 
+	    sectors * pp->sectorsize, &error);
+	if (buf == NULL)
+		return (NULL);
+
+	table->state[elt] = GPT_STATE_CORRUPT;
+	if (crc32(buf, tblsz) != hdr->hdr_crc_table) {
+		g_free(buf);
+		return (NULL);
+	}
+
+	table->state[elt] = GPT_STATE_OK;
+	tbl = g_malloc(hdr->hdr_entries * sizeof(struct gpt_ent),
+	    M_WAITOK | M_ZERO);
+
+	for (idx = 0, ent = tbl, p = buf;
+	     idx < hdr->hdr_entries;
+	     idx++, ent++, p += hdr->hdr_entsz) {
+		le_uuid_dec(p, &ent->ent_type);
+		le_uuid_dec(p + 16, &ent->ent_uuid);
+		ent->ent_lba_start = le64dec(p + 32);
+		ent->ent_lba_end = le64dec(p + 40);
+		ent->ent_attr = le64dec(p + 48);
+		for (ch = 0; ch < sizeof(ent->ent_name)/2; ch++)
+			ent->ent_name[ch] = le16dec(p + 56 + ch * 2);
+	}
+
+	g_free(buf);
+	return (tbl);
+}
+
+static int
+gpt_matched_hdrs(struct gpt_hdr *pri, struct gpt_hdr *sec)
+{
+
+	if (!EQUUID(&pri->hdr_uuid, &sec->hdr_uuid))
+		return (0);
+	return ((pri->hdr_revision == sec->hdr_revision &&
+	    pri->hdr_size == sec->hdr_size &&
+	    pri->hdr_lba_start == sec->hdr_lba_start &&
+	    pri->hdr_lba_end == sec->hdr_lba_end &&
+	    pri->hdr_entries == sec->hdr_entries &&
+	    pri->hdr_entsz == sec->hdr_entsz &&
+	    pri->hdr_crc_table == sec->hdr_crc_table) ? 1 : 0);
+}
+
+static int
+gpt_parse_type(const char *type, struct uuid *uuid)
+{
+	struct uuid tmp;
+	const char *alias;
+	int error;
+
+	if (type[0] == '!') {
+		error = parse_uuid(type + 1, &tmp);
+		if (error)
+			return (error);
+		if (EQUUID(&tmp, &gpt_uuid_unused))
+			return (EINVAL);
+		*uuid = tmp;
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_EFI);
+	if (!strcasecmp(type, alias)) {
+		*uuid = gpt_uuid_efi;
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD);
+	if (!strcasecmp(type, alias)) {
+		*uuid = gpt_uuid_freebsd;
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP);
+	if (!strcasecmp(type, alias)) {
+		*uuid = gpt_uuid_freebsd_swap;
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS);
+	if (!strcasecmp(type, alias)) {
+		*uuid = gpt_uuid_freebsd_ufs;
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM);
+	if (!strcasecmp(type, alias)) {
+		*uuid = gpt_uuid_freebsd_vinum;
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS);
+	if (!strcasecmp(type, alias)) {
+		*uuid = gpt_uuid_freebsd_zfs;
+		return (0);
+	}
+	alias = g_part_alias_name(G_PART_ALIAS_MBR);
+	if (!strcasecmp(type, alias)) {
+		*uuid = gpt_uuid_mbr;
+		return (0);
+	}
+	return (EINVAL);
+}
+
+static int
+g_part_gpt_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
+    struct g_part_parms *gpp)
+{
+	struct g_part_gpt_entry *entry;
+	int error;
+
+	entry = (struct g_part_gpt_entry *)baseentry;
+	error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type);
+	if (error)
+		return (error);
+	kern_uuidgen(&entry->ent.ent_uuid, 1);
+	entry->ent.ent_lba_start = baseentry->gpe_start;
+	entry->ent.ent_lba_end = baseentry->gpe_end;
+	if (baseentry->gpe_deleted) {
+		entry->ent.ent_attr = 0;
+		bzero(entry->ent.ent_name, sizeof(entry->ent.ent_name));
+	}
+	/* XXX label */
+	return (0);
+}
+
+static int
+g_part_gpt_create(struct g_part_table *basetable, struct g_part_parms *gpp)
+{
+	struct g_provider *pp;
+	struct g_part_gpt_table *table;
+	quad_t last;
+	size_t tblsz;
+
+	table = (struct g_part_gpt_table *)basetable;
+	pp = gpp->gpp_provider;
+	tblsz = (basetable->gpt_entries * sizeof(struct gpt_ent) +
+	    pp->sectorsize - 1) / pp->sectorsize;
+	if (pp->sectorsize < 512 ||
+	    pp->mediasize < (3 + 2 * tblsz + basetable->gpt_entries) *
+	    pp->sectorsize)
+		return (ENOSPC);
+
+	last = (pp->mediasize / pp->sectorsize) - 1;
+
+	table->lba[GPT_ELT_PRIHDR] = 1;
+	table->lba[GPT_ELT_PRITBL] = 2;
+	table->lba[GPT_ELT_SECHDR] = last;
+	table->lba[GPT_ELT_SECTBL] = last - tblsz;
+
+	bcopy(GPT_HDR_SIG, table->hdr.hdr_sig, sizeof(table->hdr.hdr_sig));
+	table->hdr.hdr_revision = GPT_HDR_REVISION;
+	table->hdr.hdr_size = offsetof(struct gpt_hdr, padding);
+	table->hdr.hdr_lba_start = 2 + tblsz;
+	table->hdr.hdr_lba_end = last - tblsz - 1;
+	kern_uuidgen(&table->hdr.hdr_uuid, 1);
+	table->hdr.hdr_entries = basetable->gpt_entries;
+	table->hdr.hdr_entsz = sizeof(struct gpt_ent);
+
+	basetable->gpt_first = table->hdr.hdr_lba_start;
+	basetable->gpt_last = table->hdr.hdr_lba_end;
+	return (0);
+}
+
+static int
+g_part_gpt_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
+{
+
+	/*
+	 * Wipe the first 2 sectors as well as the last to clear the
+	 * partitioning.
+	 */
+	basetable->gpt_smhead |= 3;
+	basetable->gpt_smtail |= 1;
+	return (0);
+}
+
+static int
+g_part_gpt_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)  
+{
+	struct g_part_gpt_entry *entry;
+
+	entry = (struct g_part_gpt_entry *)baseentry;
+	return ((EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd_swap) ||
+	    EQUUID(&entry->ent.ent_type, &gpt_uuid_linux_swap)) ? 1 : 0);
+}
+
+static int
+g_part_gpt_modify(struct g_part_table *basetable,
+    struct g_part_entry *baseentry, struct g_part_parms *gpp)
+{
+	struct g_part_gpt_entry *entry;
+	int error;
+
+	entry = (struct g_part_gpt_entry *)baseentry;
+	if (gpp->gpp_parms & G_PART_PARM_TYPE) {
+		error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type);
+		if (error)
+			return (error);
+	}
+	/* XXX label */
+	return (0);
+}
+
+static char *
+g_part_gpt_name(struct g_part_table *table, struct g_part_entry *baseentry,
+    char *buf, size_t bufsz)
+{
+	struct g_part_gpt_entry *entry;
+	char c;
+
+	entry = (struct g_part_gpt_entry *)baseentry;
+	c = (EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd)) ? 's' : 'p';
+	snprintf(buf, bufsz, "%c%d", c, baseentry->gpe_index);
+	return (buf);
+}
+
+static int
+g_part_gpt_probe(struct g_part_table *table, struct g_consumer *cp)
+{
+	struct g_provider *pp;
+	char *buf;
+	int error, res;
+
+	/* We don't nest, which means that our depth should be 0. */
+	if (table->gpt_depth != 0)
+		return (ENXIO);
+
+	pp = cp->provider;
+
+	/*
+	 * Sanity-check the provider. Since the first sector on the provider
+	 * must be a PMBR and a PMBR is 512 bytes large, the sector size
+	 * must be at least 512 bytes.  Also, since the theoretical minimum
+	 * number of sectors needed by GPT is 6, any medium that has less
+	 * than 6 sectors is never going to be able to hold a GPT. The
+	 * number 6 comes from:
+	 *	1 sector for the PMBR
+	 *	2 sectors for the GPT headers (each 1 sector)
+	 *	2 sectors for the GPT tables (each 1 sector)
+	 *	1 sector for an actual partition
+	 * It's better to catch this pathological case early than behaving
+	 * pathologically later on...
+	 */
+	if (pp->sectorsize < 512 || pp->mediasize < 6 * pp->sectorsize)
+		return (ENOSPC);
+
+	/* Check that there's a MBR. */
+	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
+	if (buf == NULL)
+		return (error);
+	res = le16dec(buf + DOSMAGICOFFSET);
+	g_free(buf);
+	if (res != DOSMAGIC) 
+		return (ENXIO);
+
+	/* Check that there's a primary header. */
+	buf = g_read_data(cp, pp->sectorsize, pp->sectorsize, &error);
+	if (buf == NULL)
+		return (error);
+	res = memcmp(buf, GPT_HDR_SIG, 8);
+	g_free(buf);
+	if (res == 0)
+		return (G_PART_PROBE_PRI_HIGH);
+
+	/* No primary? Check that there's a secondary. */
+	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
+	    &error);
+	if (buf == NULL)
+		return (error);
+	res = memcmp(buf, GPT_HDR_SIG, 8); 
+	g_free(buf);
+	return ((res == 0) ? G_PART_PROBE_PRI_HIGH : ENXIO);
+}
+
+static int
+g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp)
+{
+	struct gpt_hdr prihdr, sechdr;
+	struct gpt_ent *tbl, *pritbl, *sectbl;
+	struct g_provider *pp;
+	struct g_part_gpt_table *table;
+	struct g_part_gpt_entry *entry;
+	int index;
+
+	table = (struct g_part_gpt_table *)basetable;
+	pp = cp->provider;
+
+	/* Read the primary header and table. */
+	gpt_read_hdr(table, cp, GPT_ELT_PRIHDR, &prihdr);
+	if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK) {
+		pritbl = gpt_read_tbl(table, cp, GPT_ELT_PRITBL, &prihdr);
+	} else {
+		table->state[GPT_ELT_PRITBL] = GPT_STATE_MISSING;
+		pritbl = NULL;
+	}
+
+	/* Read the secondary header and table. */
+	gpt_read_hdr(table, cp, GPT_ELT_SECHDR, &sechdr);
+	if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK) {
+		sectbl = gpt_read_tbl(table, cp, GPT_ELT_SECTBL, &sechdr);
+	} else {
+		table->state[GPT_ELT_SECTBL] = GPT_STATE_MISSING;
+		sectbl = NULL;
+	}
+
+	/* Fail if we haven't got any good tables at all. */
+	if (table->state[GPT_ELT_PRITBL] != GPT_STATE_OK &&
+	    table->state[GPT_ELT_SECTBL] != GPT_STATE_OK) {
+		printf("GEOM: %s: corrupt or invalid GPT detected.\n",
+		    pp->name);
+		printf("GEOM: %s: GPT rejected -- may not be recoverable.\n",
+		    pp->name);
+		return (EINVAL);
+	}
+
+	/*
+	 * If both headers are good but they disagree with each other,
+	 * then invalidate one. We prefer to keep the primary header,
+	 * unless the primary table is corrupt.
+	 */
+	if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK &&
+	    table->state[GPT_ELT_SECHDR] == GPT_STATE_OK &&
+	    !gpt_matched_hdrs(&prihdr, &sechdr)) {
+		if (table->state[GPT_ELT_PRITBL] == GPT_STATE_OK)
+			table->state[GPT_ELT_SECHDR] = GPT_STATE_INVALID;
+		else
+			table->state[GPT_ELT_PRIHDR] = GPT_STATE_INVALID;
+	}
+
+	if (table->state[GPT_ELT_PRIHDR] != GPT_STATE_OK) {
+		printf("GEOM: %s: the primary GPT table is corrupt or "
+		    "invalid.\n", pp->name);
+		printf("GEOM: %s: using the secondary instead -- recovery "
+		    "strongly advised.\n", pp->name);
+		table->hdr = sechdr;
+		tbl = sectbl;
+		if (pritbl != NULL)
+			g_free(pritbl);
+	} else {
+		if (table->state[GPT_ELT_SECHDR] != GPT_STATE_OK) {
+			printf("GEOM: %s: the secondary GPT table is corrupt "
+			    "or invalid.\n", pp->name);
+			printf("GEOM: %s: using the primary only -- recovery "
+			    "suggested.\n", pp->name);
+		}
+		table->hdr = prihdr;
+		tbl = pritbl;
+		if (sectbl != NULL)
+			g_free(sectbl);
+	}
+
+	basetable->gpt_first = table->hdr.hdr_lba_start;
+	basetable->gpt_last = table->hdr.hdr_lba_end;
+	basetable->gpt_entries = table->hdr.hdr_entries;
+
+	for (index = basetable->gpt_entries - 1; index >= 0; index--) {
+		if (EQUUID(&tbl[index].ent_type, &gpt_uuid_unused))
+			continue;
+		entry = (struct g_part_gpt_entry *)g_part_new_entry(basetable,  
+		    index+1, tbl[index].ent_lba_start, tbl[index].ent_lba_end);
+		entry->ent = tbl[index];
+	}
+
+	g_free(tbl);
+	return (0);
+}
+
+static const char *
+g_part_gpt_type(struct g_part_table *basetable, struct g_part_entry *baseentry, 
+    char *buf, size_t bufsz)
+{
+	struct g_part_gpt_entry *entry;
+	struct uuid *type;
+ 
+	entry = (struct g_part_gpt_entry *)baseentry;
+	type = &entry->ent.ent_type;
+	if (EQUUID(type, &gpt_uuid_efi))
+		return (g_part_alias_name(G_PART_ALIAS_EFI));
+	if (EQUUID(type, &gpt_uuid_freebsd))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD));
+	if (EQUUID(type, &gpt_uuid_freebsd_swap))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP));
+	if (EQUUID(type, &gpt_uuid_freebsd_ufs))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS));
+	if (EQUUID(type, &gpt_uuid_freebsd_vinum))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM));
+	if (EQUUID(type, &gpt_uuid_freebsd_zfs))
+		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS));
+	if (EQUUID(type, &gpt_uuid_mbr))
+		return (g_part_alias_name(G_PART_ALIAS_MBR));
+	buf[0] = '!';
+	snprintf_uuid(buf + 1, bufsz - 1, type);
+	return (buf);
+}
+
+static int
+g_part_gpt_write(struct g_part_table *basetable, struct g_consumer *cp)
+{
+	unsigned char *buf, *bp;
+	struct g_provider *pp;
+	struct g_part_entry *baseentry;
+	struct g_part_gpt_entry *entry;
+	struct g_part_gpt_table *table;
+	size_t tlbsz;
+	uint32_t crc;
+	int error, index;
+
+	pp = cp->provider;
+	table = (struct g_part_gpt_table *)basetable;
+	tlbsz = (table->hdr.hdr_entries * table->hdr.hdr_entsz +
+	    pp->sectorsize - 1) / pp->sectorsize;
+
+	if (basetable->gpt_created) {
+		buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
+		le16enc(buf + DOSMAGICOFFSET, DOSMAGIC);
+		buf[DOSPARTOFF + 1] = 0xff;		/* shd */
+		buf[DOSPARTOFF + 2] = 0xff;		/* ssect */
+		buf[DOSPARTOFF + 3] = 0xff;		/* scyl */
+		buf[DOSPARTOFF + 4] = 0xee;		/* typ */
+		buf[DOSPARTOFF + 5] = 0xff;		/* ehd */
+		buf[DOSPARTOFF + 6] = 0xff;		/* esect */
+		buf[DOSPARTOFF + 7] = 0xff;		/* ecyl */
+		le32enc(buf + DOSPARTOFF + 8, 1);	/* start */
+		le32enc(buf + DOSPARTOFF + 12,
+		    MIN(pp->mediasize / pp->sectorsize - 1, 0xffffffffLL));
+		error = g_write_data(cp, 0, buf, pp->sectorsize);
+		g_free(buf);
+		if (error)
+			return (error);
+	}
+
+	/* Allocate space for the header and entries. */
+	buf = g_malloc((tlbsz + 1) * pp->sectorsize, M_WAITOK | M_ZERO);
+
+	memcpy(buf, table->hdr.hdr_sig, sizeof(table->hdr.hdr_sig));
+	le32enc(buf + 8, table->hdr.hdr_revision);
+	le32enc(buf + 12, table->hdr.hdr_size);
+	le64enc(buf + 40, table->hdr.hdr_lba_start);
+	le64enc(buf + 48, table->hdr.hdr_lba_end);
+	le_uuid_enc(buf + 56, &table->hdr.hdr_uuid);
+	le32enc(buf + 80, table->hdr.hdr_entries);
+	le32enc(buf + 84, table->hdr.hdr_entsz);
+
+	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
+		if (baseentry->gpe_deleted)
+			continue;
+		entry = (struct g_part_gpt_entry *)baseentry;
+		index = baseentry->gpe_index - 1;
+		bp = buf + pp->sectorsize + table->hdr.hdr_entsz * index;
+		le_uuid_enc(bp, &entry->ent.ent_type);
+		le_uuid_enc(bp + 16, &entry->ent.ent_uuid);
+		le64enc(bp + 32, entry->ent.ent_lba_start);
+		le64enc(bp + 40, entry->ent.ent_lba_end);
+		le64enc(bp + 48, entry->ent.ent_attr);
+		memcpy(bp + 56, entry->ent.ent_name,
+		    sizeof(entry->ent.ent_name));
+	}
+
+	crc = crc32(buf + pp->sectorsize,
+	    table->hdr.hdr_entries * table->hdr.hdr_entsz);
+	le32enc(buf + 88, crc);
+
+	/* Write primary meta-data. */
+	le32enc(buf + 16, 0);	/* hdr_crc_self. */
+	le64enc(buf + 24, table->lba[GPT_ELT_PRIHDR]);	/* hdr_lba_self. */
+	le64enc(buf + 32, table->lba[GPT_ELT_SECHDR]);	/* hdr_lba_alt. */
+	le64enc(buf + 72, table->lba[GPT_ELT_PRITBL]);	/* hdr_lba_table. */
+	crc = crc32(buf, table->hdr.hdr_size);
+	le32enc(buf + 16, crc);
+
+	error = g_write_data(cp, table->lba[GPT_ELT_PRITBL] * pp->sectorsize,
+	    buf + pp->sectorsize, tlbsz * pp->sectorsize);
+	if (error)
+		goto out;
+	error = g_write_data(cp, table->lba[GPT_ELT_PRIHDR] * pp->sectorsize,
+	    buf, pp->sectorsize);
+	if (error)
+		goto out;
+
+	/* Write secondary meta-data. */
+	le32enc(buf + 16, 0);	/* hdr_crc_self. */
+	le64enc(buf + 24, table->lba[GPT_ELT_SECHDR]);	/* hdr_lba_self. */
+	le64enc(buf + 32, table->lba[GPT_ELT_PRIHDR]);	/* hdr_lba_alt. */
+	le64enc(buf + 72, table->lba[GPT_ELT_SECTBL]);	/* hdr_lba_table. */
+	crc = crc32(buf, table->hdr.hdr_size);
+	le32enc(buf + 16, crc);
+
+	error = g_write_data(cp, table->lba[GPT_ELT_SECTBL] * pp->sectorsize,
+	    buf + pp->sectorsize, tlbsz * pp->sectorsize);
+	if (error)
+		goto out;
+	error = g_write_data(cp, table->lba[GPT_ELT_SECHDR] * pp->sectorsize,
+	    buf, pp->sectorsize);
+
+ out:
+	g_free(buf);
+	return (error);
+}
+
+#if 0
+static void
+g_gpt_to_utf8(struct sbuf *sb, uint16_t *str, size_t len)
+{
+	u_int bo;
+	uint32_t ch;
+	uint16_t c;
+
+	bo = BYTE_ORDER;
+	while (len > 0 && *str != 0) {
+		ch = (bo == BIG_ENDIAN) ? be16toh(*str) : le16toh(*str);
+		str++, len--;
+		if ((ch & 0xf800) == 0xd800) {
+			if (len > 0) {
+				c = (bo == BIG_ENDIAN) ? be16toh(*str)
+				    : le16toh(*str);
+				str++, len--;
+			} else
+				c = 0xfffd;
+			if ((ch & 0x400) == 0 && (c & 0xfc00) == 0xdc00) {
+				ch = ((ch & 0x3ff) << 10) + (c & 0x3ff);
+				ch += 0x10000;
+			} else
+				ch = 0xfffd;
+		} else if (ch == 0xfffe) { /* BOM (U+FEFF) swapped. */
+			bo = (bo == BIG_ENDIAN) ? LITTLE_ENDIAN : BIG_ENDIAN;
+			continue;
+		} else if (ch == 0xfeff) /* BOM (U+FEFF) unswapped. */
+			continue;
+
+		if (ch < 0x80)
+			sbuf_printf(sb, "%c", ch);
+		else if (ch < 0x800)
+			sbuf_printf(sb, "%c%c", 0xc0 | (ch >> 6),
+			    0x80 | (ch & 0x3f));
+		else if (ch < 0x10000)
+			sbuf_printf(sb, "%c%c%c", 0xe0 | (ch >> 12),
+			    0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f));
+		else if (ch < 0x200000)
+			sbuf_printf(sb, "%c%c%c%c", 0xf0 | (ch >> 18),
+			    0x80 | ((ch >> 12) & 0x3f),
+			    0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f));
+	}
+}
+#endif
Index: g_raid3_ctl.c
===================================================================
RCS file: /home/cvs/src/sys/geom/raid3/g_raid3_ctl.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/raid3/g_raid3_ctl.c -L sys/geom/raid3/g_raid3_ctl.c -u -r1.2 -r1.3
--- sys/geom/raid3/g_raid3_ctl.c
+++ sys/geom/raid3/g_raid3_ctl.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/raid3/g_raid3_ctl.c,v 1.11.2.4 2006/09/19 11:42:42 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/raid3/g_raid3_ctl.c,v 1.21 2006/11/01 22:51:49 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -98,8 +98,9 @@
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	const char *name;
-	int *nargs, do_sync = 0;
+	int *nargs, do_sync = 0, dirty = 1;
 	int *autosync, *noautosync;
+	int *failsync, *nofailsync;
 	int *round_robin, *noround_robin;
 	int *verify, *noverify;
 	u_int n;
@@ -128,6 +129,21 @@
 		    "noautosync");
 		return;
 	}
+	failsync = gctl_get_paraml(req, "failsync", sizeof(*failsync));
+	if (failsync == NULL) {
+		gctl_error(req, "No '%s' argument.", "failsync");
+		return;
+	}
+	nofailsync = gctl_get_paraml(req, "nofailsync", sizeof(*nofailsync));
+	if (nofailsync == NULL) {
+		gctl_error(req, "No '%s' argument.", "nofailsync");
+		return;
+	}
+	if (*failsync && *nofailsync) {
+		gctl_error(req, "'%s' and '%s' specified.", "failsync",
+		    "nofailsync");
+		return;
+	}
 	round_robin = gctl_get_paraml(req, "round_robin", sizeof(*round_robin));
 	if (round_robin == NULL) {
 		gctl_error(req, "No '%s' argument.", "round_robin");
@@ -159,8 +175,8 @@
 		    "noverify");
 		return;
 	}
-	if (!*autosync && !*noautosync && !*round_robin && !*noround_robin &&
-	    !*verify && !*noverify) {
+	if (!*autosync && !*noautosync && !*failsync && !*nofailsync &&
+	    !*round_robin && !*noround_robin && !*verify && !*noverify) {
 		gctl_error(req, "Nothing has changed.");
 		return;
 	}
@@ -188,6 +204,15 @@
 		if (*noautosync)
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC;
 	}
+	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) {
+		if (*failsync)
+			sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_NOFAILSYNC;
+	} else {
+		if (*nofailsync) {
+			sc->sc_flags |= G_RAID3_DEVICE_FLAG_NOFAILSYNC;
+			dirty = 0;
+		}
+	}
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0) {
 		if (*noverify)
 			sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_VERIFY;
@@ -215,6 +240,8 @@
 			if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING)
 				disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC;
 		}
+		if (!dirty)
+			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		g_raid3_update_metadata(disk);
 		if (do_sync) {
 			if (disk->d_state == G_RAID3_DISK_STATE_STALE) {
@@ -471,11 +498,11 @@
 	g_raid3_fill_metadata(disk, &md);
 	sx_xunlock(&sc->sc_lock);
 	md.md_syncid = 0;
-        md.md_dflags = 0;
+	md.md_dflags = 0;
 	if (*hardcode)
-                strlcpy(md.md_provider, pp->name, sizeof(md.md_provider));
-        else
-                bzero(md.md_provider, sizeof(md.md_provider));
+		strlcpy(md.md_provider, pp->name, sizeof(md.md_provider));
+	else
+		bzero(md.md_provider, sizeof(md.md_provider));
 	md.md_provsize = pp->mediasize;
 	sector = g_malloc(pp->sectorsize, M_WAITOK);
 	raid3_metadata_encode(&md, sector);
Index: g_raid3.h
===================================================================
RCS file: /home/cvs/src/sys/geom/raid3/g_raid3.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/raid3/g_raid3.h -L sys/geom/raid3/g_raid3.h -u -r1.2 -r1.3
--- sys/geom/raid3/g_raid3.h
+++ sys/geom/raid3/g_raid3.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/raid3/g_raid3.h,v 1.11.2.3 2006/05/10 07:15:37 pjd Exp $
+ * $FreeBSD: src/sys/geom/raid3/g_raid3.h,v 1.19 2006/11/01 22:51:49 pjd Exp $
  */
 
 #ifndef	_G_RAID3_H_
@@ -42,8 +42,9 @@
  * 2 - Added 'verify reading' algorithm.
  * 3 - Added md_genid field to metadata.
  * 4 - Added md_provsize field to metadata.
+ * 5 - Added 'no failure synchronization' flag.
  */
-#define	G_RAID3_VERSION		4
+#define	G_RAID3_VERSION		5
 
 #define	G_RAID3_DISK_FLAG_DIRTY		0x0000000000000001ULL
 #define	G_RAID3_DISK_FLAG_SYNCHRONIZING	0x0000000000000002ULL
@@ -57,9 +58,11 @@
 #define	G_RAID3_DEVICE_FLAG_NOAUTOSYNC	0x0000000000000001ULL
 #define	G_RAID3_DEVICE_FLAG_ROUND_ROBIN	0x0000000000000002ULL
 #define	G_RAID3_DEVICE_FLAG_VERIFY	0x0000000000000004ULL
+#define	G_RAID3_DEVICE_FLAG_NOFAILSYNC	0x0000000000000008ULL
 #define	G_RAID3_DEVICE_FLAG_MASK	(G_RAID3_DEVICE_FLAG_NOAUTOSYNC | \
 					 G_RAID3_DEVICE_FLAG_ROUND_ROBIN | \
-					 G_RAID3_DEVICE_FLAG_VERIFY)
+					 G_RAID3_DEVICE_FLAG_VERIFY | \
+					 G_RAID3_DEVICE_FLAG_NOFAILSYNC)
 
 #ifdef _KERNEL
 extern u_int g_raid3_debug;
@@ -363,7 +366,7 @@
 	return (0);
 }
 static __inline int
-raid3_metadata_decode_v4(const u_char *data, struct g_raid3_metadata *md)
+raid3_metadata_decode_v4v5(const u_char *data, struct g_raid3_metadata *md)
 {
 	MD5_CTX ctx;
 
@@ -405,7 +408,8 @@
 		error = raid3_metadata_decode_v3(data, md);
 		break;
 	case 4:
-		error = raid3_metadata_decode_v4(data, md);
+	case 5:
+		error = raid3_metadata_decode_v4v5(data, md);
 		break;
 	default:
 		error = EINVAL;
@@ -442,6 +446,8 @@
 			printf(" ROUND-ROBIN");
 		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0)
 			printf(" VERIFY");
+		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
+			printf(" NOFAILSYNC");
 	}
 	printf("\n");
 	printf("    dflags:");
Index: g_raid3.c
===================================================================
RCS file: /home/cvs/src/sys/geom/raid3/g_raid3.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/raid3/g_raid3.c -L sys/geom/raid3/g_raid3.c -u -r1.2 -r1.3
--- sys/geom/raid3/g_raid3.c
+++ sys/geom/raid3/g_raid3.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/raid3/g_raid3.c,v 1.40.2.15 2006/09/19 11:16:14 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/raid3/g_raid3.c,v 1.81 2007/06/05 00:00:52 jeff Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -696,23 +696,23 @@
 	error = g_write_data(cp, offset, sector, length);
 	free(sector, M_RAID3);
 	if (error != 0) {
-		if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {												      
-			G_RAID3_DEBUG(0, "Cannot write metadata on %s "												     
-			    "(device=%s, error=%d).",															
-			    g_raid3_get_diskname(disk), sc->sc_name, error);												
-			disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;													  
-		} else {																		     
-			G_RAID3_DEBUG(1, "Cannot write metadata on %s "												     
-			    "(device=%s, error=%d).",															
-			    g_raid3_get_diskname(disk), sc->sc_name, error);												
-		}																			    
-		if (g_raid3_disconnect_on_failure &&															
+		if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
+			G_RAID3_DEBUG(0, "Cannot write metadata on %s "
+			    "(device=%s, error=%d).",
+			    g_raid3_get_diskname(disk), sc->sc_name, error);
+			disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
+		} else {
+			G_RAID3_DEBUG(1, "Cannot write metadata on %s "
+			    "(device=%s, error=%d).",
+			    g_raid3_get_diskname(disk), sc->sc_name, error);
+		}
+		if (g_raid3_disconnect_on_failure &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
-			sc->sc_bump_id |= G_RAID3_BUMP_GENID;												    
-			g_raid3_event_send(disk,															    
-			    G_RAID3_DISK_STATE_DISCONNECTED,														
-			    G_RAID3_EVENT_DONTWAIT);															
-		}		 
+			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
+			g_raid3_event_send(disk,
+			    G_RAID3_DISK_STATE_DISCONNECTED,
+			    G_RAID3_EVENT_DONTWAIT);
+		}
 	}
 	return (error);
 }
@@ -861,6 +861,8 @@
 
 	if (sc->sc_provider == NULL)
 		return (0);
+	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
+		return (0);
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_writes > 0)
@@ -892,6 +894,8 @@
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
+	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
+		return;
 	sc->sc_idle = 0;
 	sc->sc_last_write = time_uptime;
 	for (i = 0; i < sc->sc_ndisks; i++) {
@@ -1370,6 +1374,50 @@
 }
 
 static void
+g_raid3_flush(struct g_raid3_softc *sc, struct bio *bp)
+{
+	struct bio_queue_head queue;
+	struct g_raid3_disk *disk;
+	struct g_consumer *cp;
+	struct bio *cbp;
+	u_int i;
+
+	bioq_init(&queue);
+	for (i = 0; i < sc->sc_ndisks; i++) {
+		disk = &sc->sc_disks[i];
+		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
+			continue;
+		cbp = g_clone_bio(bp);
+		if (cbp == NULL) {
+			for (cbp = bioq_first(&queue); cbp != NULL;
+			    cbp = bioq_first(&queue)) {
+				bioq_remove(&queue, cbp);
+				g_destroy_bio(cbp);
+			}
+			if (bp->bio_error == 0)
+				bp->bio_error = ENOMEM;
+			g_io_deliver(bp, bp->bio_error);
+			return;
+		}
+		bioq_insert_tail(&queue, cbp);
+		cbp->bio_done = g_std_done;
+		cbp->bio_caller1 = disk;
+		cbp->bio_to = disk->d_consumer->provider;
+	}
+	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
+		bioq_remove(&queue, cbp);
+		G_RAID3_LOGREQ(3, cbp, "Sending request.");
+		disk = cbp->bio_caller1;
+		cbp->bio_caller1 = NULL;
+		cp = disk->d_consumer;
+		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
+		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
+		    cp->acr, cp->acw, cp->ace));
+		g_io_request(cbp, disk->d_consumer);
+	}
+}
+
+static void
 g_raid3_start(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
@@ -1390,6 +1438,9 @@
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
+	case BIO_FLUSH:
+		g_raid3_flush(sc, bp);
+		return;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
@@ -1466,8 +1517,8 @@
 g_raid3_regular_delay(struct g_raid3_softc *sc, struct bio *bp)
 {
 
-        G_RAID3_LOGREQ(2, bp, "Delaying request.");
-        bioq_insert_head(&sc->sc_regular_delayed, bp);
+	G_RAID3_LOGREQ(2, bp, "Delaying request.");
+	bioq_insert_head(&sc->sc_regular_delayed, bp);
 }
 
 /*
@@ -1477,8 +1528,8 @@
 g_raid3_sync_delay(struct g_raid3_softc *sc, struct bio *bp)
 {
 
-        G_RAID3_LOGREQ(2, bp, "Delaying synchronization request.");
-        bioq_insert_tail(&sc->sc_sync_delayed, bp);
+	G_RAID3_LOGREQ(2, bp, "Delaying synchronization request.");
+	bioq_insert_tail(&sc->sc_sync_delayed, bp);
 }
 
 /*
@@ -1488,13 +1539,13 @@
 static void
 g_raid3_regular_release(struct g_raid3_softc *sc)
 {
-        struct bio *bp, *bp2;
+	struct bio *bp, *bp2;
 
-        TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
-                if (g_raid3_sync_collision(sc, bp))
-                        continue;
-                bioq_remove(&sc->sc_regular_delayed, bp);
-                G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
+	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
+		if (g_raid3_sync_collision(sc, bp))
+			continue;
+		bioq_remove(&sc->sc_regular_delayed, bp);
+		G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_head(&sc->sc_queue, bp);
 #if 0
@@ -1505,7 +1556,7 @@
 		wakeup(&sc->sc_queue);
 #endif
 		mtx_unlock(&sc->sc_queue_mtx);
-        }
+	}
 }
 
 /*
@@ -1515,16 +1566,16 @@
 static void
 g_raid3_sync_release(struct g_raid3_softc *sc)
 {
-        struct bio *bp, *bp2;
+	struct bio *bp, *bp2;
 
-        TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
-                if (g_raid3_regular_collision(sc, bp))
-                        continue;
-                bioq_remove(&sc->sc_sync_delayed, bp);
-                G_RAID3_LOGREQ(2, bp,
-                    "Releasing delayed synchronization request.");
-                g_io_request(bp, bp->bio_from);
-        }
+	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
+		if (g_raid3_regular_collision(sc, bp))
+			continue;
+		bioq_remove(&sc->sc_sync_delayed, bp);
+		G_RAID3_LOGREQ(2, bp,
+		    "Releasing delayed synchronization request.");
+		g_io_request(bp, bp->bio_from);
+	}
 }
 
 /*
@@ -1966,9 +2017,9 @@
 	int timeout;
 
 	sc = arg;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
@@ -2107,6 +2158,8 @@
 {
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
+	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
+		return;
 	if (!sc->sc_idle && (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) {
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
@@ -2159,7 +2212,8 @@
 
 	G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_raid3_get_diskname(disk));
-	disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
+	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) == 0)
+		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 	KASSERT(disk->d_sync.ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_raid3_get_diskname(disk)));
@@ -2269,8 +2323,9 @@
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
-	G_RAID3_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
-	    pp->name);
+	G_RAID3_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
+	    g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE), sc->sc_ndisks);
+
 	if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED)
 		g_raid3_sync_start(sc);
 }
@@ -2635,7 +2690,7 @@
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
-		G_RAID3_DEBUG(0, "Device %s: provider %s detected.",
+		G_RAID3_DEBUG(1, "Device %s: provider %s detected.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING)
 			break;
@@ -2678,7 +2733,7 @@
 		disk->d_sync.ds_offset_done = 0;
 		g_raid3_update_idle(sc, disk);
 		g_raid3_update_metadata(disk);
-		G_RAID3_DEBUG(0, "Device %s: provider %s activated.",
+		G_RAID3_DEBUG(1, "Device %s: provider %s activated.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		break;
 	case G_RAID3_DISK_STATE_STALE:
@@ -2858,6 +2913,12 @@
 		    "md_all", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
+	if ((md->md_mediasize % md->md_sectorsize) != 0) {
+		G_RAID3_DEBUG(1, "Invalid metadata (mediasize %% sectorsize != "
+		    "0) on disk %s (device %s), skipping.", pp->name,
+		    sc->sc_name);
+		return (EINVAL);
+	}
 	if (md->md_mediasize != sc->sc_mediasize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
@@ -3129,7 +3190,8 @@
 		return (NULL);
 	}
 
-	G_RAID3_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
+	G_RAID3_DEBUG(1, "Device %s created (%u components, id=%u).",
+	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GRAID3");
 	G_RAID3_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
@@ -3426,6 +3488,7 @@
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
+			ADD_FLAG(G_RAID3_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN,
 			    "ROUND-ROBIN");
Index: g_concat.h
===================================================================
RCS file: /home/cvs/src/sys/geom/concat/g_concat.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/concat/g_concat.h -L sys/geom/concat/g_concat.h -u -r1.2 -r1.3
--- sys/geom/concat/g_concat.h
+++ sys/geom/concat/g_concat.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/concat/g_concat.h,v 1.11.2.1 2006/03/01 17:55:29 pjd Exp $
+ * $FreeBSD: src/sys/geom/concat/g_concat.h,v 1.12 2006/02/01 12:05:59 pjd Exp $
  */
 
 #ifndef	_G_CONCAT_H_
Index: g_concat.c
===================================================================
RCS file: /home/cvs/src/sys/geom/concat/g_concat.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/concat/g_concat.c -L sys/geom/concat/g_concat.c -u -r1.2 -r1.3
--- sys/geom/concat/g_concat.c
+++ sys/geom/concat/g_concat.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/concat/g_concat.c,v 1.24.2.2 2006/03/01 17:55:29 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/concat/g_concat.c,v 1.29 2006/10/31 21:23:50 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -212,6 +212,42 @@
 }
 
 static void
+g_concat_flush(struct g_concat_softc *sc, struct bio *bp)
+{
+	struct bio_queue_head queue;
+	struct g_consumer *cp;
+	struct bio *cbp;
+	u_int no;
+
+	bioq_init(&queue);
+	for (no = 0; no < sc->sc_ndisks; no++) {
+		cbp = g_clone_bio(bp);
+		if (cbp == NULL) {
+			for (cbp = bioq_first(&queue); cbp != NULL;
+			    cbp = bioq_first(&queue)) {
+				bioq_remove(&queue, cbp);
+				g_destroy_bio(cbp);
+			}
+			if (bp->bio_error == 0)
+				bp->bio_error = ENOMEM;
+			g_io_deliver(bp, bp->bio_error);
+			return;
+		}
+		bioq_insert_tail(&queue, cbp);
+		cbp->bio_done = g_std_done;
+		cbp->bio_caller1 = sc->sc_disks[no].d_consumer;
+		cbp->bio_to = sc->sc_disks[no].d_consumer->provider;
+	}
+	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
+		bioq_remove(&queue, cbp);
+		G_CONCAT_LOGREQ(cbp, "Sending request.");
+		cp = cbp->bio_caller1;
+		cbp->bio_caller1 = NULL;
+		g_io_request(cbp, cp);
+	}
+}
+
+static void
 g_concat_start(struct bio *bp)
 {
 	struct bio_queue_head queue;
@@ -240,6 +276,9 @@
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
+	case BIO_FLUSH:
+		g_concat_flush(sc, bp);
+		return;
 	case BIO_GETATTR:
 		/* To which provider it should be delivered? */
 	default:
Index: pkcs5v2.c
===================================================================
RCS file: /home/cvs/src/sys/geom/eli/pkcs5v2.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/eli/pkcs5v2.c -L sys/geom/eli/pkcs5v2.c -u -r1.2 -r1.3
--- sys/geom/eli/pkcs5v2.c
+++ sys/geom/eli/pkcs5v2.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/eli/pkcs5v2.c,v 1.1.2.2 2006/03/01 17:52:15 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/eli/pkcs5v2.c,v 1.2 2006/02/01 12:05:59 pjd Exp $");
 
 #include <sys/param.h>
 #ifdef _KERNEL
Index: g_eli.c
===================================================================
RCS file: /home/cvs/src/sys/geom/eli/g_eli.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/eli/g_eli.c -L sys/geom/eli/g_eli.c -u -r1.2 -r1.3
--- sys/geom/eli/g_eli.c
+++ sys/geom/eli/g_eli.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005 Pawel Jakub Dawidek <pjd at FreeBSD.org>
+ * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli.c,v 1.3.2.9 2006/04/05 22:12:28 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli.c,v 1.38 2007/06/05 00:00:51 jeff Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -69,16 +69,20 @@
     &g_eli_visible_passphrase, 0,
     "Turn on echo when entering the passphrase (for debug purposes only!!)");
 u_int g_eli_overwrites = 5;
+TUNABLE_INT("kern.geom.eli.overwrites", &g_eli_overwrites);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RW, &g_eli_overwrites,
     0, "Number of times on-disk keys should be overwritten when destroying them");
 static u_int g_eli_threads = 0;
 TUNABLE_INT("kern.geom.eli.threads", &g_eli_threads);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RW, &g_eli_threads, 0,
     "Number of threads doing crypto work");
+u_int g_eli_batch = 0;
+TUNABLE_INT("kern.geom.eli.batch", &g_eli_batch);
+SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RW, &g_eli_batch, 0,
+    "Use crypto operations batching");
 
 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
-static void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp);
 
 static g_taste_t g_eli_taste;
 static g_dumpconf_t g_eli_dumpconf;
@@ -106,7 +110,7 @@
  * accelerator or something like this.
  * The function updates the SID and rerun the operation.
  */
-static int
+int
 g_eli_crypto_rerun(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
@@ -139,7 +143,7 @@
  *
  * g_eli_start -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  */
-static void
+void
 g_eli_read_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
@@ -149,10 +153,20 @@
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
+	/*
+	 * Do we have all sectors already?
+	 */
+	pbp->bio_inbed++;
+	if (pbp->bio_inbed < pbp->bio_children)
+		return;
 	g_destroy_bio(bp);
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed", __func__);
 		pbp->bio_completed = 0;
+		if (pbp->bio_driver2 != NULL) {
+			free(pbp->bio_driver2, M_ELI);
+			pbp->bio_driver2 = NULL;
+		}
 		g_io_deliver(pbp, pbp->bio_error);
 		return;
 	}
@@ -164,69 +178,30 @@
 }
 
 /*
- * The function is called after we read and decrypt data.
- *
- * g_eli_start -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> G_ELI_CRYPTO_READ_DONE -> g_io_deliver
- */
-static int
-g_eli_crypto_read_done(struct cryptop *crp)
-{
-	struct bio *bp;
-
-	if (crp->crp_etype == EAGAIN) {
-		if (g_eli_crypto_rerun(crp) == 0)
-			return (0);
-	}
-	bp = (struct bio *)crp->crp_opaque;
-	bp->bio_inbed++;
-	if (crp->crp_etype == 0) {
-		G_ELI_DEBUG(3, "Crypto READ request done (%d/%d).",
-		    bp->bio_inbed, bp->bio_children);
-		bp->bio_completed += crp->crp_olen;
-	} else {
-		G_ELI_DEBUG(1, "Crypto READ request failed (%d/%d) error=%d.",
-		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
-		if (bp->bio_error == 0)
-			bp->bio_error = crp->crp_etype;
-	}
-	/*
-	 * Do we have all sectors already?
-	 */
-	if (bp->bio_inbed < bp->bio_children)
-		return (0);
-	free(bp->bio_driver2, M_ELI);
-	bp->bio_driver2 = NULL;
-	if (bp->bio_error != 0) {
-		G_ELI_LOGREQ(0, bp, "Crypto READ request failed (error=%d).",
-		    bp->bio_error);
-		bp->bio_completed = 0;
-	}
-	/*
-	 * Read is finished, send it up.
-	 */
-	g_io_deliver(bp, bp->bio_error);
-	return (0);
-}
-
-/*
  * The function is called after we encrypt and write data.
  *
  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
  */
-static void
+void
 g_eli_write_done(struct bio *bp)
 {
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
-	if (pbp->bio_error == 0)
-		pbp->bio_error = bp->bio_error;
+	if (pbp->bio_error == 0) {
+		if (bp->bio_error != 0)
+			pbp->bio_error = bp->bio_error;
+	}
+	/*
+	 * Do we have all sectors already?
+	 */
+	pbp->bio_inbed++;
+	if (pbp->bio_inbed < pbp->bio_children)
+		return;
 	free(pbp->bio_driver2, M_ELI);
 	pbp->bio_driver2 = NULL;
-	if (pbp->bio_error == 0)
-		pbp->bio_completed = pbp->bio_length;
-	else {
+	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "Crypto WRITE request failed (error=%d).",
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
@@ -235,68 +210,11 @@
 	/*
 	 * Write is finished, send it up.
 	 */
+	pbp->bio_completed = pbp->bio_length;
 	g_io_deliver(pbp, pbp->bio_error);
 }
 
 /*
- * The function is called after data encryption.
- *
- * g_eli_start -> g_eli_crypto_run -> G_ELI_CRYPTO_WRITE_DONE -> g_io_request -> g_eli_write_done -> g_io_deliver
- */
-static int
-g_eli_crypto_write_done(struct cryptop *crp)
-{
-	struct g_geom *gp;
-	struct g_consumer *cp;
-	struct bio *bp, *cbp;
-
-	if (crp->crp_etype == EAGAIN) {
-		if (g_eli_crypto_rerun(crp) == 0)
-			return (0);
-	}
-	bp = (struct bio *)crp->crp_opaque;
-	bp->bio_inbed++;
-	if (crp->crp_etype == 0) {
-		G_ELI_DEBUG(3, "Crypto WRITE request done (%d/%d).",
-		    bp->bio_inbed, bp->bio_children);
-	} else {
-		G_ELI_DEBUG(1, "Crypto WRITE request failed (%d/%d) error=%d.",
-		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
-		if (bp->bio_error == 0)
-			bp->bio_error = crp->crp_etype;
-	}
-	/*
-	 * All sectors are already encrypted?
-	 */
-	if (bp->bio_inbed < bp->bio_children)
-		return (0);
-	bp->bio_inbed = 0;
-	bp->bio_children = 1;
-	cbp = bp->bio_driver1;
-	bp->bio_driver1 = NULL;
-	if (bp->bio_error != 0) {
-		G_ELI_LOGREQ(0, bp, "Crypto WRITE request failed (error=%d).",
-		    bp->bio_error);
-		free(bp->bio_driver2, M_ELI);
-		bp->bio_driver2 = NULL;
-		g_destroy_bio(cbp);
-		g_io_deliver(bp, bp->bio_error);
-		return (0);
-	}
-	cbp->bio_data = bp->bio_driver2;
-	cbp->bio_done = g_eli_write_done;
-	gp = bp->bio_to->geom;
-	cp = LIST_FIRST(&gp->consumer);
-	cbp->bio_to = cp->provider;
-	G_ELI_LOGREQ(2, cbp, "Sending request.");
-	/*
-	 * Send encrypted data to the provider.
-	 */
-	g_io_request(cbp, cp);
-	return (0);
-}
-
-/*
  * This function should never be called, but GEOM made as it set ->orphan()
  * method for every geom.
  */
@@ -327,6 +245,7 @@
 g_eli_start(struct bio *bp)
 {
 	struct g_eli_softc *sc;
+	struct g_consumer *cp;
 	struct bio *cbp;
 
 	sc = bp->bio_to->geom->softc;
@@ -338,6 +257,8 @@
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
+	case BIO_GETATTR:
+	case BIO_FLUSH:
 		break;
 	case BIO_DELETE:
 		/*
@@ -345,8 +266,7 @@
 		 * It could be done by overwritting requested sector with
 		 * random data g_eli_overwrites number of times.
 		 */
-	case BIO_GETATTR:
-	default:	
+	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
@@ -355,23 +275,37 @@
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
-	if (bp->bio_cmd == BIO_READ) {
-		struct g_consumer *cp;
-
-		cbp->bio_done = g_eli_read_done;
-		cp = LIST_FIRST(&sc->sc_geom->consumer);
-		cbp->bio_to = cp->provider;
-		G_ELI_LOGREQ(2, bp, "Sending request.");
-		/*
-		 * Read encrypted data from provider.
-		 */
-		g_io_request(cbp, cp);
-	} else /* if (bp->bio_cmd == BIO_WRITE) */ {
+	switch (bp->bio_cmd) {
+	case BIO_READ:
+		if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
+			bp->bio_driver2 = NULL;
+			cbp->bio_done = g_eli_read_done;
+			cp = LIST_FIRST(&sc->sc_geom->consumer);
+			cbp->bio_to = cp->provider;
+			G_ELI_LOGREQ(2, cbp, "Sending request.");
+			/*
+			 * Read encrypted data from provider.
+			 */
+			g_io_request(cbp, cp);
+			break;
+		}
+		bp->bio_pflags = 255;
+		/* FALLTHROUGH */
+	case BIO_WRITE:
 		bp->bio_driver1 = cbp;
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_tail(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 		wakeup(sc);
+		break;
+	case BIO_GETATTR:
+	case BIO_FLUSH:
+		cbp->bio_done = g_std_done;
+		cp = LIST_FIRST(&sc->sc_geom->consumer);
+		cbp->bio_to = cp->provider;
+		G_ELI_LOGREQ(2, cbp, "Sending request.");
+		g_io_request(cbp, cp);
+		break;
 	}
 }
 
@@ -390,11 +324,19 @@
 
 	wr = arg;
 	sc = wr->w_softc;
-	mtx_lock_spin(&sched_lock);
+#ifdef SMP
+	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
+	if (mp_ncpus > 1 && sc->sc_crypto == G_ELI_CRYPTO_SW &&
+	    g_eli_threads == 0) {
+		while (!smp_started)
+			tsleep(wr, 0, "geli:smp", hz / 4);
+	}
+#endif
+	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	if (sc->sc_crypto == G_ELI_CRYPTO_SW && g_eli_threads == 0)
 		sched_bind(curthread, wr->w_number);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
 
@@ -402,7 +344,7 @@
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = bioq_takefirst(&sc->sc_queue);
 		if (bp == NULL) {
-			if ((sc->sc_flags & G_ELI_FLAG_DESTROY) != 0) {
+			if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
 				LIST_REMOVE(wr, w_next);
 				crypto_freesession(wr->w_sid);
 				free(wr, M_ELI);
@@ -417,20 +359,27 @@
 			continue;
 		}
 		mtx_unlock(&sc->sc_queue_mtx);
-		g_eli_crypto_run(wr, bp);
+		if (bp->bio_cmd == BIO_READ && bp->bio_pflags == 255)
+			g_eli_auth_read(sc, bp);
+		else if (sc->sc_flags & G_ELI_FLAG_AUTH)
+			g_eli_auth_run(wr, bp);
+		else
+			g_eli_crypto_run(wr, bp);
 	}
 }
 
 /*
  * Here we generate IV. It is unique for every sector.
  */
-static void
+void
 g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
     size_t size)
 {
-	u_char hash[SHA256_DIGEST_LENGTH];
+	u_char off[8], hash[SHA256_DIGEST_LENGTH];
 	SHA256_CTX ctx;
 
+	if (!(sc->sc_flags & G_ELI_FLAG_NATIVE_BYTE_ORDER))
+		le64enc(off, (uint64_t)offset);
 	/* Copy precalculated SHA256 context for IV-Key. */
 	bcopy(&sc->sc_ivctx, &ctx, sizeof(ctx));
 	SHA256_Update(&ctx, (uint8_t *)&offset, sizeof(offset));
@@ -438,111 +387,6 @@
 	bcopy(hash, iv, size);
 }
 
-/*
- * This is the main function responsible for cryptography (ie. communication
- * with crypto(9) subsystem).
- */
-static void
-g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp)
-{
-	struct g_eli_softc *sc;
-	struct cryptop *crp;
-	struct cryptodesc *crd;
-	struct uio *uio;
-	struct iovec *iov;
-	u_int i, nsec, add, secsize;
-	int err, error;
-	size_t size;
-	u_char *p, *data;
-
-	G_ELI_LOGREQ(3, bp, "%s", __func__);
-
-	bp->bio_pflags = wr->w_number;
-	sc = wr->w_softc;
-	secsize = LIST_FIRST(&sc->sc_geom->provider)->sectorsize;
-	nsec = bp->bio_length / secsize;
-
-	/*
-	 * Calculate how much memory do we need.
-	 * We need separate crypto operation for every single sector.
-	 * It is much faster to calculate total amount of needed memory here and
-	 * do the allocation once instead of allocating memory in pieces (many,
-	 * many pieces).
-	 */
-	size = sizeof(*crp) * nsec;
-	size += sizeof(*crd) * nsec;
-	size += sizeof(*uio) * nsec;
-	size += sizeof(*iov) * nsec;
-	/*
-	 * If we write the data we cannot destroy current bio_data content,
-	 * so we need to allocate more memory for encrypted data.
-	 */
-	if (bp->bio_cmd == BIO_WRITE)
-		size += bp->bio_length;
-	p = malloc(size, M_ELI, M_WAITOK);
-
-	bp->bio_inbed = 0;
-	bp->bio_children = nsec;
-	bp->bio_driver2 = p;
-
-	if (bp->bio_cmd == BIO_READ)
-		data = bp->bio_data;
-	else {
-		data = p;
-		p += bp->bio_length;
-		bcopy(bp->bio_data, data, bp->bio_length);
-	}
-
-	error = 0;
-	for (i = 0, add = 0; i < nsec; i++, add += secsize) {
-		crp = (struct cryptop *)p;	p += sizeof(*crp);
-		crd = (struct cryptodesc *)p;	p += sizeof(*crd);
-		uio = (struct uio *)p;		p += sizeof(*uio);
-		iov = (struct iovec *)p;	p += sizeof(*iov);
-
-		iov->iov_len = secsize;
-		iov->iov_base = data;
-		data += secsize;
-
-		uio->uio_iov = iov;
-		uio->uio_iovcnt = 1;
-		uio->uio_segflg = UIO_SYSSPACE;
-		uio->uio_resid = secsize;
-
-		crp->crp_sid = wr->w_sid;
-		crp->crp_ilen = secsize;
-		crp->crp_olen = secsize;
-		crp->crp_opaque = (void *)bp;
-		crp->crp_buf = (void *)uio;
-		if (bp->bio_cmd == BIO_WRITE)
-			crp->crp_callback = g_eli_crypto_write_done;
-		else /* if (bp->bio_cmd == BIO_READ) */
-			crp->crp_callback = g_eli_crypto_read_done;
-		crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC | CRYPTO_F_REL;
-		crp->crp_desc = crd;
-
-		crd->crd_skip = 0;
-		crd->crd_len = secsize;
-		crd->crd_flags =
-		    CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT | CRD_F_KEY_EXPLICIT;
-		if (bp->bio_cmd == BIO_WRITE)
-			crd->crd_flags |= CRD_F_ENCRYPT;
-		crd->crd_alg = sc->sc_algo;
-		crd->crd_key = sc->sc_datakey;
-		crd->crd_klen = sc->sc_keylen;
-		g_eli_crypto_ivgen(sc, bp->bio_offset + add, crd->crd_iv,
-		    sizeof(crd->crd_iv));
-		crd->crd_next = NULL;
-
-		crp->crp_etype = 0;
-		err = crypto_dispatch(crp);
-		if (error == 0)
-			error = err;
-	}
-	if (bp->bio_error == 0)
-		bp->bio_error = error;
-}
-
 int
 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md)
@@ -623,6 +467,10 @@
 	sc = gp->softc;
 
 	if (dw > 0) {
+		if (sc->sc_flags & G_ELI_FLAG_RO) {
+			/* Deny write attempts. */
+			return (EROFS);
+		}
 		/* Someone is opening us for write, we need to remember that. */
 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
 		return (0);
@@ -641,6 +489,16 @@
 	return (0);
 }
 
+static int
+g_eli_cpu_is_disabled(int cpu)
+{
+#ifdef SMP
+	return ((hlt_cpus_mask & (1 << cpu)) != 0);
+#else
+	return (0);
+#endif
+}
+
 struct g_geom *
 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
@@ -650,7 +508,7 @@
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
-	struct cryptoini cri;
+	struct cryptoini crie, cria;
 	u_int i, threads;
 	int error;
 
@@ -663,31 +521,60 @@
 	gp->start = g_eli_start;
 	/*
 	 * Spoiling cannot happen actually, because we keep provider open for
-	 * writing all the time.
+	 * writing all the time or provider is read-only.
 	 */
 	gp->spoiled = g_eli_orphan_spoil_assert;
 	gp->orphan = g_eli_orphan;
+	gp->dumpconf = g_eli_dumpconf;
 	/*
-	 * If detach-on-last-close feature is not enabled, we can simply use
-	 * g_std_access().
+	 * If detach-on-last-close feature is not enabled and we don't operate
+	 * on read-only provider, we can simply use g_std_access().
 	 */
-	if (md->md_flags & G_ELI_FLAG_WO_DETACH)
+	if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO))
 		gp->access = g_eli_access;
 	else
 		gp->access = g_std_access;
-	gp->dumpconf = g_eli_dumpconf;
 
 	sc->sc_crypto = G_ELI_CRYPTO_SW;
 	sc->sc_flags = md->md_flags;
-	sc->sc_algo = md->md_algo;
+	/* Backward compatibility. */
+	if (md->md_version < 2)
+		sc->sc_flags |= G_ELI_FLAG_NATIVE_BYTE_ORDER;
+	sc->sc_ealgo = md->md_ealgo;
 	sc->sc_nkey = nkey;
 	/*
 	 * Remember the keys in our softc structure.
 	 */
-	bcopy(mkey, sc->sc_ivkey, sizeof(sc->sc_ivkey));
-	mkey += sizeof(sc->sc_ivkey);
-	bcopy(mkey, sc->sc_datakey, sizeof(sc->sc_datakey));
-	sc->sc_keylen = md->md_keylen;
+	g_eli_mkey_propagate(sc, mkey);
+	sc->sc_ekeylen = md->md_keylen;
+
+	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
+		sc->sc_akeylen = sizeof(sc->sc_akey) * 8;
+		sc->sc_aalgo = md->md_aalgo;
+		sc->sc_alen = g_eli_hashlen(sc->sc_aalgo);
+
+		sc->sc_data_per_sector = bpp->sectorsize - sc->sc_alen;
+		/*
+		 * Some hash functions (like SHA1 and RIPEMD160) generates hash
+		 * which length is not multiple of 128 bits, but we want data
+		 * length to be multiple of 128, so we can encrypt without
+		 * padding. The line below rounds down data length to multiple
+		 * of 128 bits.
+		 */
+		sc->sc_data_per_sector -= sc->sc_data_per_sector % 16;
+
+		sc->sc_bytes_per_sector =
+		    (md->md_sectorsize - 1) / sc->sc_data_per_sector + 1;
+		sc->sc_bytes_per_sector *= bpp->sectorsize;
+		/*
+		 * Precalculate SHA256 for HMAC key generation.
+		 * This is expensive operation and we can do it only once now or
+		 * for every access to sector, so now will be much better.
+		 */
+		SHA256_Init(&sc->sc_akeyctx);
+		SHA256_Update(&sc->sc_akeyctx, sc->sc_akey,
+		    sizeof(sc->sc_akey));
+	}
 
 	/*
 	 * Precalculate SHA256 for IV generation.
@@ -720,8 +607,13 @@
 	 * Keep provider open all the time, so we can run critical tasks,
 	 * like Master Keys deletion, without wondering if we can open
 	 * provider or not.
+	 * We don't open provider for writing only when user requested read-only
+	 * access.
 	 */
-	error = g_access(cp, 1, 1, 1);
+	if (sc->sc_flags & G_ELI_FLAG_RO)
+		error = g_access(cp, 1, 0, 1);
+	else
+		error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot access %s (error=%d).",
@@ -735,10 +627,17 @@
 
 	LIST_INIT(&sc->sc_workers);
 
-	bzero(&cri, sizeof(cri));
-	cri.cri_alg = sc->sc_algo;
-	cri.cri_klen = sc->sc_keylen;
-	cri.cri_key = sc->sc_datakey;
+	bzero(&crie, sizeof(crie));
+	crie.cri_alg = sc->sc_ealgo;
+	crie.cri_klen = sc->sc_ekeylen;
+	crie.cri_key = sc->sc_ekey;
+	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
+		bzero(&cria, sizeof(cria));
+		cria.cri_alg = sc->sc_aalgo;
+		cria.cri_klen = sc->sc_akeylen;
+		cria.cri_key = sc->sc_akey;
+		crie.cri_next = &cria;
+	}
 
 	threads = g_eli_threads;
 	if (threads == 0)
@@ -749,6 +648,11 @@
 		G_ELI_DEBUG(0, "Reducing number of threads to %u.", threads);
 	}
 	for (i = 0; i < threads; i++) {
+		if (g_eli_cpu_is_disabled(i)) {
+			G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
+			    bpp->name, i);
+			continue;
+		}
 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
 		wr->w_softc = sc;
 		wr->w_number = i;
@@ -757,13 +661,15 @@
 		 * If this is the first pass, try to get hardware support.
 		 * Use software cryptography, if we cannot get it.
 		 */
-		if (i == 0) {
-			error = crypto_newsession(&wr->w_sid, &cri, 1);
+		if (LIST_EMPTY(&sc->sc_workers)) {
+			error = crypto_newsession(&wr->w_sid, &crie,
+					CRYPTOCAP_F_HARDWARE);
 			if (error == 0)
 				sc->sc_crypto = G_ELI_CRYPTO_HW;
 		}
 		if (sc->sc_crypto == G_ELI_CRYPTO_SW)
-			error = crypto_newsession(&wr->w_sid, &cri, 0);
+			error = crypto_newsession(&wr->w_sid, &crie,
+					CRYPTOCAP_F_SOFTWARE);
 		if (error != 0) {
 			free(wr, M_ELI);
 			if (req != NULL) {
@@ -802,14 +708,22 @@
 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	pp->sectorsize = md->md_sectorsize;
 	pp->mediasize = bpp->mediasize;
-	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0)
+	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME))
 		pp->mediasize -= bpp->sectorsize;
-	pp->mediasize -= (pp->mediasize % pp->sectorsize);
+	if (!(sc->sc_flags & G_ELI_FLAG_AUTH))
+		pp->mediasize -= (pp->mediasize % pp->sectorsize);
+	else {
+		pp->mediasize /= sc->sc_bytes_per_sector;
+		pp->mediasize *= pp->sectorsize;
+	}
+
 	g_error_provider(pp, 0);
 
 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
-	G_ELI_DEBUG(0, "    Cipher: %s", g_eli_algo2str(sc->sc_algo));
-	G_ELI_DEBUG(0, "Key length: %u", sc->sc_keylen);
+	G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo),
+	    sc->sc_ekeylen);
+	if (sc->sc_flags & G_ELI_FLAG_AUTH)
+		G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo));
 	G_ELI_DEBUG(0, "    Crypto: %s",
 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
 	return (gp);
@@ -967,7 +881,7 @@
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
-	if (rootvnode != NULL || g_eli_tries == 0)
+	if (root_mounted() || g_eli_tries == 0)
 		return (NULL);
 
 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
@@ -987,7 +901,7 @@
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 	/* Should we attach it on boot? */
-	if ((md.md_flags & G_ELI_FLAG_BOOT) == 0)
+	if (!(md.md_flags & G_ELI_FLAG_BOOT))
 		return (NULL);
 	if (md.md_keys == 0x00) {
 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
@@ -1109,7 +1023,7 @@
 		int first = 1;
 
 #define ADD_FLAG(flag, name)	do {					\
-	if ((sc->sc_flags & (flag)) != 0) {				\
+	if (sc->sc_flags & (flag)) {					\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
@@ -1117,17 +1031,20 @@
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
+		ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
+		ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH");
 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
+		ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY");
 #undef  ADD_FLAG
 	}
 	sbuf_printf(sb, "</Flags>\n");
 
-	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) {
+	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
 		    sc->sc_nkey);
 	}
@@ -1144,10 +1061,16 @@
 		break;
 	}
 	sbuf_printf(sb, "</Crypto>\n");
-	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent, sc->sc_keylen);
-	sbuf_printf(sb, "%s<Cipher>%s</Cipher>\n", indent,
-	    g_eli_algo2str(sc->sc_algo));
+	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
+		sbuf_printf(sb,
+		    "%s<AuthenticationAlgorithm>%s</AuthenticationAlgorithm>\n",
+		    indent, g_eli_algo2str(sc->sc_aalgo));
+	}
+	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent,
+	    sc->sc_ekeylen);
+	sbuf_printf(sb, "%s<EncryptionAlgorithm>%s</EncryptionAlgorithm>\n", indent,
+	    g_eli_algo2str(sc->sc_ealgo));
 }
 
 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
-MODULE_DEPEND(geom_eli, crypto, 1, 1, 1);
+MODULE_DEPEND(g_eli, crypto, 1, 1, 1);
Index: g_eli.h
===================================================================
RCS file: /home/cvs/src/sys/geom/eli/g_eli.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/eli/g_eli.h -L sys/geom/eli/g_eli.h -u -r1.2 -r1.3
--- sys/geom/eli/g_eli.h
+++ sys/geom/eli/g_eli.h
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005 Pawel Jakub Dawidek <pjd at FreeBSD.org>
+ * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/eli/g_eli.h,v 1.1.2.5 2006/03/01 17:52:15 pjd Exp $
+ * $FreeBSD: src/sys/geom/eli/g_eli.h,v 1.13 2007/09/01 06:33:01 pjd Exp $
  */
 
 #ifndef	_G_ELI_H_
@@ -54,28 +54,44 @@
 /*
  * Version history:
  * 0 - Initial version number.
+ * 1 - Added data authentication support (md_aalgo field and
+ *     G_ELI_FLAG_AUTH flag).
+ * 2 - Added G_ELI_FLAG_READONLY.
+ *   - IV is generated from offset converted to little-endian
+ *     (flag G_ELI_FLAG_NATIVE_BYTE_ORDER will be set for older versions).
+ * 3 - Added 'configure' subcommand.
  */
-#define	G_ELI_VERSION		0
+#define	G_ELI_VERSION		3
 
+/* ON DISK FLAGS. */
 /* Use random, onetime keys. */
-#define	G_ELI_FLAG_ONETIME	0x00000001
+#define	G_ELI_FLAG_ONETIME		0x00000001
 /* Ask for the passphrase from the kernel, before mounting root. */
-#define	G_ELI_FLAG_BOOT		0x00000002
+#define	G_ELI_FLAG_BOOT			0x00000002
 /* Detach on last close, if we were open for writing. */
-#define	G_ELI_FLAG_WO_DETACH	0x00000004
+#define	G_ELI_FLAG_WO_DETACH		0x00000004
 /* Detach on last close. */
-#define	G_ELI_FLAG_RW_DETACH	0x00000008
+#define	G_ELI_FLAG_RW_DETACH		0x00000008
+/* Provide data authentication. */
+#define	G_ELI_FLAG_AUTH			0x00000010
+/* Provider is read-only, we should deny all write attempts. */
+#define	G_ELI_FLAG_RO			0x00000020
+/* RUNTIME FLAGS. */
 /* Provider was open for writing. */
-#define	G_ELI_FLAG_WOPEN	0x00010000
+#define	G_ELI_FLAG_WOPEN		0x00010000
 /* Destroy device. */
-#define	G_ELI_FLAG_DESTROY	0x00020000
+#define	G_ELI_FLAG_DESTROY		0x00020000
+/* Provider uses native byte-order for IV generation. */
+#define	G_ELI_FLAG_NATIVE_BYTE_ORDER	0x00040000
 
 #define	SHA512_MDLEN		64
+#define	G_ELI_AUTH_SECKEYLEN	SHA256_DIGEST_LENGTH
 
 #define	G_ELI_MAXMKEYS		2
 #define	G_ELI_MAXKEYLEN		64
 #define	G_ELI_USERKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_DATAKEYLEN	G_ELI_MAXKEYLEN
+#define	G_ELI_AUTHKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_IVKEYLEN		G_ELI_MAXKEYLEN
 #define	G_ELI_SALTLEN		64
 #define	G_ELI_DATAIVKEYLEN	(G_ELI_DATAKEYLEN + G_ELI_IVKEYLEN)
@@ -85,6 +101,7 @@
 #ifdef _KERNEL
 extern u_int g_eli_debug;
 extern u_int g_eli_overwrites;
+extern u_int g_eli_batch;
 
 #define	G_ELI_CRYPTO_HW		1
 #define	G_ELI_CRYPTO_SW		2
@@ -123,13 +140,21 @@
 struct g_eli_softc {
 	struct g_geom	*sc_geom;
 	u_int		 sc_crypto;
-	uint8_t		 sc_datakey[G_ELI_DATAKEYLEN];
+	uint8_t		 sc_mkey[G_ELI_DATAIVKEYLEN];
+	uint8_t		 sc_ekey[G_ELI_DATAKEYLEN];
+	u_int		 sc_ealgo;
+	u_int		 sc_ekeylen;
+	uint8_t		 sc_akey[G_ELI_AUTHKEYLEN];
+	u_int		 sc_aalgo;
+	u_int		 sc_akeylen;
+	u_int		 sc_alen;
+	SHA256_CTX	 sc_akeyctx;
 	uint8_t		 sc_ivkey[G_ELI_IVKEYLEN];
 	SHA256_CTX	 sc_ivctx;
-	u_int		 sc_algo;
-	u_int		 sc_keylen;
 	int		 sc_nkey;
 	uint32_t	 sc_flags;
+	u_int		 sc_bytes_per_sector;
+	u_int		 sc_data_per_sector;
 
 	/* Only for software cryptography. */
 	struct bio_queue_head sc_queue;
@@ -143,8 +168,9 @@
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	uint32_t	md_flags;	/* Additional flags. */
-	uint16_t	md_algo;	/* Encryption algorithm. */
+	uint16_t	md_ealgo;	/* Encryption algorithm. */
 	uint16_t	md_keylen;	/* Key length. */
+	uint16_t	md_aalgo;	/* Authentication algorithm. */
 	uint64_t	md_provsize;	/* Provider's size. */
 	uint32_t	md_sectorsize;	/* Sector size. */
 	uint8_t		md_keys;	/* Available keys. */
@@ -165,8 +191,9 @@
 	bcopy(md->md_magic, p, sizeof(md->md_magic)); p += sizeof(md->md_magic);
 	le32enc(p, md->md_version);	p += sizeof(md->md_version);
 	le32enc(p, md->md_flags);	p += sizeof(md->md_flags);
-	le16enc(p, md->md_algo);	p += sizeof(md->md_algo);
+	le16enc(p, md->md_ealgo);	p += sizeof(md->md_ealgo);
 	le16enc(p, md->md_keylen);	p += sizeof(md->md_keylen);
+	le16enc(p, md->md_aalgo);	p += sizeof(md->md_aalgo);
 	le64enc(p, md->md_provsize);	p += sizeof(md->md_provsize);
 	le32enc(p, md->md_sectorsize);	p += sizeof(md->md_sectorsize);
 	*p = md->md_keys;		p += sizeof(md->md_keys);
@@ -186,7 +213,7 @@
 
 	p = data + sizeof(md->md_magic) + sizeof(md->md_version);
 	md->md_flags = le32dec(p);	p += sizeof(md->md_flags);
-	md->md_algo = le16dec(p);	p += sizeof(md->md_algo);
+	md->md_ealgo = le16dec(p);	p += sizeof(md->md_ealgo);
 	md->md_keylen = le16dec(p);	p += sizeof(md->md_keylen);
 	md->md_provsize = le64dec(p);	p += sizeof(md->md_provsize);
 	md->md_sectorsize = le32dec(p);	p += sizeof(md->md_sectorsize);
@@ -202,6 +229,30 @@
 	return (0);
 }
 static __inline int
+eli_metadata_decode_v1v2v3(const u_char *data, struct g_eli_metadata *md)
+{
+	MD5_CTX ctx;
+	const u_char *p;
+
+	p = data + sizeof(md->md_magic) + sizeof(md->md_version);
+	md->md_flags = le32dec(p);	p += sizeof(md->md_flags);
+	md->md_ealgo = le16dec(p);	p += sizeof(md->md_ealgo);
+	md->md_keylen = le16dec(p);	p += sizeof(md->md_keylen);
+	md->md_aalgo = le16dec(p);	p += sizeof(md->md_aalgo);
+	md->md_provsize = le64dec(p);	p += sizeof(md->md_provsize);
+	md->md_sectorsize = le32dec(p);	p += sizeof(md->md_sectorsize);
+	md->md_keys = *p;		p += sizeof(md->md_keys);
+	md->md_iterations = le32dec(p);	p += sizeof(md->md_iterations);
+	bcopy(p, md->md_salt, sizeof(md->md_salt)); p += sizeof(md->md_salt);
+	bcopy(p, md->md_mkeys, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
+	MD5Init(&ctx);
+	MD5Update(&ctx, data, p - data);
+	MD5Final(md->md_hash, &ctx);
+	if (bcmp(md->md_hash, p, 16) != 0)
+		return (EINVAL);
+	return (0);
+}
+static __inline int
 eli_metadata_decode(const u_char *data, struct g_eli_metadata *md)
 {
 	int error;
@@ -212,6 +263,11 @@
 	case 0:
 		error = eli_metadata_decode_v0(data, md);
 		break;
+	case 1:
+	case 2:
+	case 3:
+		error = eli_metadata_decode_v1v2v3(data, md);
+		break;
 	default:
 		error = EINVAL;
 		break;
@@ -221,7 +277,7 @@
 #endif	/* !_OpenSSL */
 
 static __inline u_int
-g_eli_str2algo(const char *name)
+g_eli_str2ealgo(const char *name)
 {
 
 	if (strcasecmp("null", name) == 0)
@@ -230,11 +286,32 @@
 		return (CRYPTO_AES_CBC);
 	else if (strcasecmp("blowfish", name) == 0)
 		return (CRYPTO_BLF_CBC);
+	else if (strcasecmp("camellia", name) == 0)
+		return (CRYPTO_CAMELLIA_CBC);
 	else if (strcasecmp("3des", name) == 0)
 		return (CRYPTO_3DES_CBC);
 	return (CRYPTO_ALGORITHM_MIN - 1);
 }
 
+static __inline u_int
+g_eli_str2aalgo(const char *name)
+{
+
+	if (strcasecmp("hmac/md5", name) == 0)
+		return (CRYPTO_MD5_HMAC);
+	else if (strcasecmp("hmac/sha1", name) == 0)
+		return (CRYPTO_SHA1_HMAC);
+	else if (strcasecmp("hmac/ripemd160", name) == 0)
+		return (CRYPTO_RIPEMD160_HMAC);
+	else if (strcasecmp("hmac/sha256", name) == 0)
+		return (CRYPTO_SHA2_256_HMAC);
+	else if (strcasecmp("hmac/sha384", name) == 0)
+		return (CRYPTO_SHA2_384_HMAC);
+	else if (strcasecmp("hmac/sha512", name) == 0)
+		return (CRYPTO_SHA2_512_HMAC);
+	return (CRYPTO_ALGORITHM_MIN - 1);
+}
+
 static __inline const char *
 g_eli_algo2str(u_int algo)
 {
@@ -243,11 +320,25 @@
 	case CRYPTO_NULL_CBC:
 		return ("NULL");
 	case CRYPTO_AES_CBC:
-		return ("AES");
+		return ("AES-CBC");
 	case CRYPTO_BLF_CBC:
-		return ("Blowfish");
+		return ("Blowfish-CBC");
+	case CRYPTO_CAMELLIA_CBC:
+		return ("CAMELLIA-CBC");
 	case CRYPTO_3DES_CBC:
-		return ("3DES");
+		return ("3DES-CBC");
+	case CRYPTO_MD5_HMAC:
+		return ("HMAC/MD5");
+	case CRYPTO_SHA1_HMAC:
+		return ("HMAC/SHA1");
+	case CRYPTO_RIPEMD160_HMAC:
+		return ("HMAC/RIPEMD160");
+	case CRYPTO_SHA2_256_HMAC:
+		return ("HMAC/SHA256");
+	case CRYPTO_SHA2_384_HMAC:
+		return ("HMAC/SHA384");
+	case CRYPTO_SHA2_512_HMAC:
+		return ("HMAC/SHA512");
 	}
 	return ("unknown");
 }
@@ -262,8 +353,10 @@
 	printf("     magic: %s\n", md->md_magic);
 	printf("   version: %u\n", (u_int)md->md_version);
 	printf("     flags: 0x%x\n", (u_int)md->md_flags);
-	printf("      algo: %s\n", g_eli_algo2str(md->md_algo));
+	printf("     ealgo: %s\n", g_eli_algo2str(md->md_ealgo));
 	printf("    keylen: %u\n", (u_int)md->md_keylen);
+	if (md->md_flags & G_ELI_FLAG_AUTH)
+		printf("     aalgo: %s\n", g_eli_algo2str(md->md_aalgo));
 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
 	printf("sectorsize: %u\n", (u_int)md->md_sectorsize);
 	printf("      keys: 0x%02x\n", (u_int)md->md_keys);
@@ -301,7 +394,8 @@
 				keylen = 0;
 		}
 		return (keylen);
-	case CRYPTO_AES_CBC:
+	case CRYPTO_AES_CBC: /* FALLTHROUGH */
+	case CRYPTO_CAMELLIA_CBC:
 		switch (keylen) {
 		case 0:
 			return (128);
@@ -329,6 +423,27 @@
 	}
 }
 
+static __inline u_int
+g_eli_hashlen(u_int algo)
+{
+
+	switch (algo) {
+	case CRYPTO_MD5_HMAC:
+		return (16);
+	case CRYPTO_SHA1_HMAC:
+		return (20);
+	case CRYPTO_RIPEMD160_HMAC:
+		return (20);
+	case CRYPTO_SHA2_256_HMAC:
+		return (32);
+	case CRYPTO_SHA2_384_HMAC:
+		return (48);
+	case CRYPTO_SHA2_512_HMAC:
+		return (64);
+	}
+	return (0);
+}
+
 #ifdef _KERNEL
 int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md);
@@ -339,6 +454,17 @@
 
 int g_eli_access(struct g_provider *pp, int dr, int dw, int de);
 void g_eli_config(struct gctl_req *req, struct g_class *mp, const char *verb);
+
+void g_eli_read_done(struct bio *bp);
+void g_eli_write_done(struct bio *bp);
+int g_eli_crypto_rerun(struct cryptop *crp);
+void g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
+    size_t size);
+
+void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp);
+
+void g_eli_auth_read(struct g_eli_softc *sc, struct bio *bp);
+void g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp);
 #endif
 
 void g_eli_mkey_hmac(unsigned char *mkey, const unsigned char *key);
@@ -346,6 +472,9 @@
     const unsigned char *key, unsigned char *mkey, unsigned *nkeyp);
 int g_eli_mkey_encrypt(unsigned algo, const unsigned char *key, unsigned keylen,
     unsigned char *mkey);
+#ifdef _KERNEL
+void g_eli_mkey_propagate(struct g_eli_softc *sc, const unsigned char *mkey);
+#endif
 
 int g_eli_crypto_encrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize);
Index: g_eli_key.c
===================================================================
RCS file: /home/cvs/src/sys/geom/eli/g_eli_key.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/eli/g_eli_key.c -L sys/geom/eli/g_eli_key.c -u -r1.2 -r1.3
--- sys/geom/eli/g_eli_key.c
+++ sys/geom/eli/g_eli_key.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli_key.c,v 1.1.2.2 2006/03/01 17:52:15 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli_key.c,v 1.3 2006/06/05 21:38:54 pjd Exp $");
 
 #include <sys/param.h>
 #ifdef _KERNEL
@@ -123,10 +123,10 @@
 	nkey = 0;
 	for (nkey = 0; nkey < G_ELI_MAXMKEYS; nkey++, mmkey += G_ELI_MKEYLEN) {
 		bit = (1 << nkey);
-		if ((md->md_keys & bit) == 0)
+		if (!(md->md_keys & bit))
 			continue;
 		bcopy(mmkey, tmpmkey, G_ELI_MKEYLEN);
-		error = g_eli_crypto_decrypt(md->md_algo, tmpmkey,
+		error = g_eli_crypto_decrypt(md->md_ealgo, tmpmkey,
 		    G_ELI_MKEYLEN, enckey, md->md_keylen);
 		if (error != 0) {
 			bzero(tmpmkey, sizeof(tmpmkey));
@@ -177,3 +177,33 @@
 
 	return (error);
 }
+
+#ifdef _KERNEL
+/*
+ * When doing encryption only, copy IV key and encryption key.
+ * When doing encryption and authentication, copy IV key, generate encryption
+ * key and generate authentication key.
+ */
+void
+g_eli_mkey_propagate(struct g_eli_softc *sc, const unsigned char *mkey)
+{
+
+	/* Remember the Master Key. */
+	bcopy(mkey, sc->sc_mkey, sizeof(sc->sc_mkey));
+
+	bcopy(mkey, sc->sc_ivkey, sizeof(sc->sc_ivkey));
+	mkey += sizeof(sc->sc_ivkey);
+
+	if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
+		bcopy(mkey, sc->sc_ekey, sizeof(sc->sc_ekey));
+	} else {
+		/*
+		 * The encryption key is: ekey = HMAC_SHA512(Master-Key, 0x10)
+		 * The authentication key is: akey = HMAC_SHA512(Master-Key, 0x11)
+		 */
+		g_eli_crypto_hmac(mkey, G_ELI_MAXKEYLEN, "\x10", 1, sc->sc_ekey, 0);
+		g_eli_crypto_hmac(mkey, G_ELI_MAXKEYLEN, "\x11", 1, sc->sc_akey, 0);
+	}
+
+}
+#endif
Index: g_eli_ctl.c
===================================================================
RCS file: /home/cvs/src/sys/geom/eli/g_eli_ctl.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/eli/g_eli_ctl.c -L sys/geom/eli/g_eli_ctl.c -u -r1.2 -r1.3
--- sys/geom/eli/g_eli_ctl.c
+++ sys/geom/eli/g_eli_ctl.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli_ctl.c,v 1.1.2.4 2006/03/01 17:52:15 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli_ctl.c,v 1.13 2007/05/06 14:56:03 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -57,7 +57,7 @@
 	struct g_provider *pp;
 	const char *name;
 	u_char *key, mkey[G_ELI_DATAIVKEYLEN];
-	int *nargs, *detach;
+	int *nargs, *detach, *readonly;
 	int keysize, error;
 	u_int nkey;
 
@@ -79,6 +79,12 @@
 		return;
 	}
 
+	readonly = gctl_get_paraml(req, "readonly", sizeof(*readonly));
+	if (readonly == NULL) {
+		gctl_error(req, "No '%s' argument.", "readonly");
+		return;
+	}
+
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
@@ -124,8 +130,15 @@
 	}
 	G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
 
+	if (*detach && *readonly) {
+		bzero(&md, sizeof(md));
+		gctl_error(req, "Options -d and -r are mutually exclusive.");
+		return;
+	}
 	if (*detach)
 		md.md_flags |= G_ELI_FLAG_WO_DETACH;
+	if (*readonly)
+		md.md_flags |= G_ELI_FLAG_RO;
 	g_eli_create(req, mp, pp, &md, mkey, nkey);
 	bzero(mkey, sizeof(mkey));
 	bzero(&md, sizeof(md));
@@ -250,16 +263,49 @@
 	if (*detach)
 		md.md_flags |= G_ELI_FLAG_WO_DETACH;
 
-	name = gctl_get_asciiparam(req, "algo");
+	md.md_ealgo = CRYPTO_ALGORITHM_MIN - 1;
+	name = gctl_get_asciiparam(req, "aalgo");
 	if (name == NULL) {
-		gctl_error(req, "No '%s' argument.", "algo");
+		gctl_error(req, "No '%s' argument.", "aalgo");
 		return;
 	}
-	md.md_algo = g_eli_str2algo(name);
-	if (md.md_algo < CRYPTO_ALGORITHM_MIN ||
-	    md.md_algo > CRYPTO_ALGORITHM_MAX) {
-		gctl_error(req, "Invalid '%s' argument.", "algo");
-		return;
+	if (strcmp(name, "none") != 0) {
+		md.md_aalgo = g_eli_str2aalgo(name);
+		if (md.md_aalgo >= CRYPTO_ALGORITHM_MIN &&
+		    md.md_aalgo <= CRYPTO_ALGORITHM_MAX) {
+			md.md_flags |= G_ELI_FLAG_AUTH;
+		} else {
+			/*
+			 * For backward compatibility, check if the -a option
+			 * was used to provide encryption algorithm.
+			 */
+			md.md_ealgo = g_eli_str2ealgo(name);
+			if (md.md_ealgo < CRYPTO_ALGORITHM_MIN ||
+			    md.md_ealgo > CRYPTO_ALGORITHM_MAX) {
+				gctl_error(req,
+				    "Invalid authentication algorithm.");
+				return;
+			} else {
+				gctl_error(req, "warning: The -e option, not "
+				    "the -a option is now used to specify "
+				    "encryption algorithm to use.");
+			}
+		}
+	}
+
+	if (md.md_ealgo < CRYPTO_ALGORITHM_MIN ||
+	    md.md_ealgo > CRYPTO_ALGORITHM_MAX) {
+		name = gctl_get_asciiparam(req, "ealgo");
+		if (name == NULL) {
+			gctl_error(req, "No '%s' argument.", "ealgo");
+			return;
+		}
+		md.md_ealgo = g_eli_str2ealgo(name);
+		if (md.md_ealgo < CRYPTO_ALGORITHM_MIN ||
+		    md.md_ealgo > CRYPTO_ALGORITHM_MAX) {
+			gctl_error(req, "Invalid encryption algorithm.");
+			return;
+		}
 	}
 
 	keylen = gctl_get_paraml(req, "keylen", sizeof(*keylen));
@@ -267,7 +313,7 @@
 		gctl_error(req, "No '%s' argument.", "keylen");
 		return;
 	}
-	md.md_keylen = g_eli_keylen(md.md_algo, *keylen);
+	md.md_keylen = g_eli_keylen(md.md_ealgo, *keylen);
 	if (md.md_keylen == 0) {
 		gctl_error(req, "Invalid '%s' argument.", "keylen");
 		return;
@@ -309,6 +355,10 @@
 			gctl_error(req, "Invalid sector size.");
 			return;
 		}
+		if (*sectorsize > PAGE_SIZE) {
+			gctl_error(req, "warning: Using sectorsize bigger than "
+			    "the page size!");
+		}
 		md.md_sectorsize = *sectorsize;
 	}
 
@@ -318,6 +368,115 @@
 }
 
 static void
+g_eli_ctl_configure(struct gctl_req *req, struct g_class *mp)
+{
+	struct g_eli_softc *sc;
+	struct g_eli_metadata md;
+	struct g_provider *pp;
+	struct g_consumer *cp;
+	char param[16];
+	const char *prov;
+	u_char *sector;
+	int *nargs, *boot, *noboot;
+	int error;
+	u_int i;
+
+	g_topology_assert();
+
+	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
+	if (nargs == NULL) {
+		gctl_error(req, "No '%s' argument.", "nargs");
+		return;
+	}
+	if (*nargs <= 0) {
+		gctl_error(req, "Missing device(s).");
+		return;
+	}
+
+	boot = gctl_get_paraml(req, "boot", sizeof(*boot));
+	if (boot == NULL) {
+		gctl_error(req, "No '%s' argument.", "boot");
+		return;
+	}
+	noboot = gctl_get_paraml(req, "noboot", sizeof(*noboot));
+	if (noboot == NULL) {
+		gctl_error(req, "No '%s' argument.", "noboot");
+		return;
+	}
+	if (*boot && *noboot) {
+		gctl_error(req, "Options -b and -B are mutually exclusive.");
+		return;
+	}
+	if (!*boot && !*noboot) {
+		gctl_error(req, "No option given.");
+		return;
+	}
+
+	for (i = 0; i < *nargs; i++) {
+		snprintf(param, sizeof(param), "arg%d", i);
+		prov = gctl_get_asciiparam(req, param);
+		if (prov == NULL) {
+			gctl_error(req, "No 'arg%d' argument.", i);
+			return;
+		}
+		sc = g_eli_find_device(mp, prov);
+		if (sc == NULL) {
+			/*
+			 * We ignore not attached providers, userland part will
+			 * take care of them.
+			 */
+			G_ELI_DEBUG(1, "Skipping configuration of not attached "
+			    "provider %s.", prov);
+			continue;
+		}
+		if (*boot && (sc->sc_flags & G_ELI_FLAG_BOOT)) {
+			G_ELI_DEBUG(1, "BOOT flag already configured for %s.",
+			    prov);
+			continue;
+		} else if (!*boot && !(sc->sc_flags & G_ELI_FLAG_BOOT)) {
+			G_ELI_DEBUG(1, "BOOT flag not configured for %s.",
+			    prov);
+			continue;
+		}
+		if (sc->sc_flags & G_ELI_FLAG_RO) {
+			gctl_error(req, "Cannot change configuration of "
+			    "read-only provider %s.", prov);
+			continue;
+		}
+		cp = LIST_FIRST(&sc->sc_geom->consumer);
+		pp = cp->provider;
+		error = g_eli_read_metadata(mp, pp, &md);
+		if (error != 0) {
+			gctl_error(req,
+			    "Cannot read metadata from %s (error=%d).",
+			    prov, error);
+			continue;
+		}
+
+		if (*boot) {
+			md.md_flags |= G_ELI_FLAG_BOOT;
+			sc->sc_flags |= G_ELI_FLAG_BOOT;
+		} else {
+			md.md_flags &= ~G_ELI_FLAG_BOOT;
+			sc->sc_flags &= ~G_ELI_FLAG_BOOT;
+		}
+
+		sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO);
+		eli_metadata_encode(&md, sector);
+		error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
+		    pp->sectorsize);
+		if (error != 0) {
+			gctl_error(req,
+			    "Cannot store metadata on %s (error=%d).",
+			    prov, error);
+		}
+		bzero(&md, sizeof(md));
+		bzero(sector, sizeof(sector));
+		free(sector, M_ELI);
+	}
+}
+
+static void
 g_eli_ctl_setkey(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_eli_softc *sc;
@@ -341,6 +500,10 @@
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
+	if (sc->sc_flags & G_ELI_FLAG_RO) {
+		gctl_error(req, "Cannot change keys for read-only provider.");
+		return;
+	}
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	pp = cp->provider;
 
@@ -395,12 +558,10 @@
 	mkeydst = md.md_mkeys + nkey * G_ELI_MKEYLEN;
 	md.md_keys |= (1 << nkey);
 
-	bcopy(sc->sc_ivkey, mkeydst, sizeof(sc->sc_ivkey));
-	bcopy(sc->sc_datakey, mkeydst + sizeof(sc->sc_ivkey),
-	    sizeof(sc->sc_datakey));
+	bcopy(sc->sc_mkey, mkeydst, sizeof(sc->sc_mkey));
 
 	/* Encrypt Master Key with the new key. */
-	error = g_eli_mkey_encrypt(md.md_algo, key, md.md_keylen, mkeydst);
+	error = g_eli_mkey_encrypt(md.md_ealgo, key, md.md_keylen, mkeydst);
 	bzero(key, sizeof(key));
 	if (error != 0) {
 		bzero(&md, sizeof(md));
@@ -452,6 +613,10 @@
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
+	if (sc->sc_flags & G_ELI_FLAG_RO) {
+		gctl_error(req, "Cannot delete keys for read-only provider.");
+		return;
+	}
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	pp = cp->provider;
 
@@ -477,7 +642,7 @@
 			gctl_error(req, "No '%s' argument.", "force");
 			return;
 		}
-	
+
 		valp = gctl_get_paraml(req, "keyno", sizeof(*valp));
 		if (valp == NULL) {
 			gctl_error(req, "No '%s' argument.", "keyno");
@@ -519,6 +684,11 @@
 			G_ELI_DEBUG(0, "Cannot store metadata on %s "
 			    "(error=%d).", pp->name, error);
 		}
+		/*
+		 * Flush write cache so we don't overwrite data N times in cache
+		 * and only once on disk.
+		 */
+		g_io_flush(cp);
 	}
 	bzero(&md, sizeof(md));
 	bzero(sector, sizeof(sector));
@@ -534,9 +704,7 @@
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
-	u_char *sector;
-	int err, error = 0;
-	u_int i;
+	int error = 0;
 
 	g_topology_assert();
 
@@ -549,22 +717,31 @@
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	pp = cp->provider;
 
-	sector = malloc(pp->sectorsize, M_ELI, M_WAITOK);
-	for (i = 0; i <= g_eli_overwrites; i++) {
-		if (i == g_eli_overwrites)
-			bzero(sector, pp->sectorsize);
-		else
-			arc4rand(sector, pp->sectorsize, 0);
-		err = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
-		    pp->sectorsize);
-		if (err != 0) {
-			G_ELI_DEBUG(0, "Cannot erase metadata on %s "
-			    "(error=%d).", pp->name, err);
-			if (error == 0)
-				error = err;
+	if (sc->sc_flags & G_ELI_FLAG_RO) {
+		G_ELI_DEBUG(0, "WARNING: Metadata won't be erased on read-only "
+		    "provider: %s.", pp->name);
+	} else {
+		u_char *sector;
+		u_int i;
+		int err;
+
+		sector = malloc(pp->sectorsize, M_ELI, M_WAITOK);
+		for (i = 0; i <= g_eli_overwrites; i++) {
+			if (i == g_eli_overwrites)
+				bzero(sector, pp->sectorsize);
+			else
+				arc4rand(sector, pp->sectorsize, 0);
+			err = g_write_data(cp, pp->mediasize - pp->sectorsize,
+			    sector, pp->sectorsize);
+			if (err != 0) {
+				G_ELI_DEBUG(0, "Cannot erase metadata on %s "
+				    "(error=%d).", pp->name, err);
+				if (error == 0)
+					error = err;
+			}
 		}
+		free(sector, M_ELI);
 	}
-	free(sector, M_ELI);
 	if (error == 0)
 		G_ELI_DEBUG(0, "%s has been killed.", pp->name);
 	g_eli_destroy(sc, 1);
@@ -651,6 +828,8 @@
 		g_eli_ctl_detach(req, mp);
 	else if (strcmp(verb, "onetime") == 0)
 		g_eli_ctl_onetime(req, mp);
+	else if (strcmp(verb, "configure") == 0)
+		g_eli_ctl_configure(req, mp);
 	else if (strcmp(verb, "setkey") == 0)
 		g_eli_ctl_setkey(req, mp);
 	else if (strcmp(verb, "delkey") == 0)
Index: g_eli_crypto.c
===================================================================
RCS file: /home/cvs/src/sys/geom/eli/g_eli_crypto.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/eli/g_eli_crypto.c -L sys/geom/eli/g_eli_crypto.c -u -r1.2 -r1.3
--- sys/geom/eli/g_eli_crypto.c
+++ sys/geom/eli/g_eli_crypto.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli_crypto.c,v 1.1.2.2 2006/03/01 17:52:15 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli_crypto.c,v 1.5 2007/09/01 06:33:01 pjd Exp $");
 
 #include <sys/param.h>
 #ifdef _KERNEL
@@ -73,7 +73,7 @@
 	cri.cri_alg = algo;
 	cri.cri_key = __DECONST(void *, key);
 	cri.cri_klen = keysize;
-	error = crypto_newsession(&sid, &cri, 0);
+	error = crypto_newsession(&sid, &cri, CRYPTOCAP_F_SOFTWARE);
 	if (error != 0)
 		return (error);
 	p = malloc(sizeof(*crp) + sizeof(*crd) + sizeof(*uio) + sizeof(*iov),
@@ -97,7 +97,7 @@
 
 	crd->crd_skip = 0;
 	crd->crd_len = datasize;
-	crd->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT | CRD_F_KEY_EXPLICIT;
+	crd->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
 	if (enc)
 		crd->crd_flags |= CRD_F_ENCRYPT;
 	crd->crd_alg = algo;
@@ -158,6 +158,21 @@
 	case CRYPTO_BLF_CBC:
 		type = EVP_bf_cbc();
 		break;
+	case CRYPTO_CAMELLIA_CBC:
+		switch (keysize) {
+		case 128:
+			type = EVP_camellia_128_cbc();
+			break;
+		case 192:
+			type = EVP_camellia_192_cbc();
+			break;
+		case 256:
+			type = EVP_camellia_256_cbc();
+			break;
+		default:
+			return (EINVAL);
+		}
+		break;
 	case CRYPTO_3DES_CBC:
 		type = EVP_des_ede3_cbc();
 		break;
Index: pkcs5v2.h
===================================================================
RCS file: /home/cvs/src/sys/geom/eli/pkcs5v2.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/eli/pkcs5v2.h -L sys/geom/eli/pkcs5v2.h -u -r1.2 -r1.3
--- sys/geom/eli/pkcs5v2.h
+++ sys/geom/eli/pkcs5v2.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/eli/pkcs5v2.h,v 1.1.2.2 2006/03/01 17:52:15 pjd Exp $
+ * $FreeBSD: src/sys/geom/eli/pkcs5v2.h,v 1.2 2006/02/01 12:05:59 pjd Exp $
  */
 
 #ifndef _PKCS5V2_H_
Index: g_shsec.h
===================================================================
RCS file: /home/cvs/src/sys/geom/shsec/g_shsec.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/shsec/g_shsec.h -L sys/geom/shsec/g_shsec.h -u -r1.1.1.1 -r1.2
--- sys/geom/shsec/g_shsec.h
+++ sys/geom/shsec/g_shsec.h
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/shsec/g_shsec.h,v 1.3 2005/02/27 23:07:47 pjd Exp $
+ * $FreeBSD: src/sys/geom/shsec/g_shsec.h,v 1.4 2006/02/01 12:06:01 pjd Exp $
  */
 
 #ifndef	_G_SHSEC_H_
Index: g_shsec.c
===================================================================
RCS file: /home/cvs/src/sys/geom/shsec/g_shsec.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/shsec/g_shsec.c -L sys/geom/shsec/g_shsec.c -u -r1.1.1.1 -r1.2
--- sys/geom/shsec/g_shsec.c
+++ sys/geom/shsec/g_shsec.c
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/shsec/g_shsec.c,v 1.3 2005/02/27 23:07:47 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/shsec/g_shsec.c,v 1.6 2006/11/01 12:30:51 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -41,7 +41,7 @@
 #include <geom/shsec/g_shsec.h>
 
 
-static MALLOC_DEFINE(M_SHSEC, "shsec data", "GEOM_SHSEC Data");
+static MALLOC_DEFINE(M_SHSEC, "shsec_data", "GEOM_SHSEC Data");
 
 static uma_zone_t g_shsec_zone;
 
@@ -317,6 +317,7 @@
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
+	case BIO_FLUSH:
 		/*
 		 * Only those requests are supported.
 		 */
@@ -606,7 +607,7 @@
 	free(sc->sc_disks, M_SHSEC);
 	free(sc, M_SHSEC);
 
-	pp = LIST_FIRST(&gp->provider); 
+	pp = LIST_FIRST(&gp->provider);
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_SHSEC_DEBUG(0, "Device %s destroyed.", gp->name);
 
Index: g_uzip.c
===================================================================
RCS file: /home/cvs/src/sys/geom/uzip/g_uzip.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/uzip/g_uzip.c -L sys/geom/uzip/g_uzip.c -u -r1.1.1.2 -r1.2
--- sys/geom/uzip/g_uzip.c
+++ sys/geom/uzip/g_uzip.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/uzip/g_uzip.c,v 1.4.2.3 2006/01/25 15:55:27 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/uzip/g_uzip.c,v 1.12 2007/04/24 06:30:06 simokawa Exp $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
@@ -47,7 +47,7 @@
 #define DPRINTF(a)
 #endif
 
-MALLOC_DEFINE(M_GEOM_UZIP, "GEOM UZIP", "GEOM UZIP data structures");
+MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures");
 
 #define UZIP_CLASS_NAME	"UZIP"
 
@@ -162,6 +162,13 @@
 		ulen = MIN(sc->blksz - uoff, bp2->bio_length - upos);
 		len = sc->offsets[i + 1] - sc->offsets[i];
 
+		if (len == 0) {
+			/* All zero block: no cache update */
+			bzero(bp2->bio_data + upos, ulen);
+			upos += ulen;
+			bp2->bio_completed += ulen;
+			continue;
+		}
 		zs.next_in = bp->bio_data + pos;
 		zs.avail_in = len;
 		zs.next_out = sc->last_buf;
@@ -453,7 +460,7 @@
 	g_topology_lock();
 	pp2 = g_new_providerf(gp, "%s", gp->name);
 	pp2->sectorsize = 512;
-	pp2->mediasize = sc->nblocks * sc->blksz;
+	pp2->mediasize = (off_t)sc->nblocks * sc->blksz;
         pp2->flags = pp->flags & G_PF_CANDELETE;
         if (pp->stripesize > 0) {
                 pp2->stripesize = pp->stripesize;
@@ -522,5 +529,5 @@
 	.spoiled = g_uzip_spoiled,
 };
 
-DECLARE_GEOM_CLASS(g_uzip_class, geom_uzip);
-MODULE_DEPEND(geom_uzip, zlib, 1, 1, 1);
+DECLARE_GEOM_CLASS(g_uzip_class, g_uzip);
+MODULE_DEPEND(g_uzip, zlib, 1, 1, 1);
--- sys/geom/geom_gpt.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*-
- * Copyright (c) 2002 Marcel Moolenaar
- * Copyright (c) 2002 Poul-Henning Kamp
- * Copyright (c) 2002 Networks Associates Technology, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The names of the authors may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_gpt.c,v 1.32.2.2 2006/06/11 20:02:58 marcel Exp $");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/bio.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/diskmbr.h>
-#include <sys/endian.h>
-#include <sys/sbuf.h>
-#include <sys/uuid.h>
-#include <sys/gpt.h>
-#include <geom/geom.h>
-#include <geom/geom_slice.h>
-
-CTASSERT(offsetof(struct gpt_hdr, padding) == 92);
-CTASSERT(sizeof(struct gpt_ent) == 128);
-
-/*
- * XXX: GEOM is not dynamic enough. We are forced to use a compile-time
- * limit. The minimum number of partitions (128) as required by EFI is
- * most of the time just a waste of space.
- */
-#define	GPT_MAX_SLICES	128
-
-struct g_gpt_softc {
-	struct gpt_ent *part[GPT_MAX_SLICES];
-};
-
-static int
-is_gpt_hdr(struct gpt_hdr *hdr)
-{
-	uint32_t crc;
-
-	if (memcmp(hdr->hdr_sig, GPT_HDR_SIG, sizeof(hdr->hdr_sig)))
-		return (0);
-	crc = le32toh(hdr->hdr_crc_self);
-	hdr->hdr_crc_self = 0;
-	if (crc32(hdr, le32toh(hdr->hdr_size)) != crc)
-		return (0);
-	hdr->hdr_crc_self = htole32(crc);
-	/* We're happy... */
-	return (1);
-}
-
-static int
-is_pmbr(char *mbr)
-{
-	uint8_t *typ;
-	int i;
-	uint16_t magic;
-
-	magic = le16toh(*(uint16_t *)(uintptr_t)(mbr + DOSMAGICOFFSET));
-	if (magic != DOSMAGIC)
-		return (0);
-
-	for (i = 0; i < 4; i++) {
-		typ = mbr + DOSPARTOFF + i * sizeof(struct dos_partition) +
-		    offsetof(struct dos_partition, dp_typ);
-		if (*typ != 0 && *typ != DOSPTYP_PMBR)
-			return (0);
-	}
-
-	return (1);
-}
-
-static int
-g_gpt_start(struct bio *bp)
-{
-
-	return (0);
-}
-
-static void
-g_gpt_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
-    struct g_consumer *cp, struct g_provider *pp)
-{
-	struct g_slicer *gsp = gp->softc;
-	struct g_gpt_softc *gs = gsp->softc;
-	struct uuid uuid;
-
-	g_slice_dumpconf(sb, indent, gp, cp, pp);
-
-	if (pp != NULL) {
-		le_uuid_dec(&gs->part[pp->index]->ent_type, &uuid);
-		if (indent != NULL)
-			sbuf_printf(sb, "%s<type>", indent);
-		else
-			sbuf_printf(sb, " ty ");
-		sbuf_printf_uuid(sb, &uuid);
-		if (indent != NULL)
-			sbuf_printf(sb, "</type>\n");
-	}
-}
-
-static struct g_geom *
-g_gpt_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
-{
-	struct uuid tmp;
-	struct g_consumer *cp;
-	struct g_geom *gp;
-	struct g_gpt_softc *gs;
-	u_char *buf, *mbr;
-	struct gpt_ent *ent, *part;
-	struct gpt_hdr *hdr;
-	u_int i, secsz, tblsz;
-	int ps;
-	uint32_t entries, entsz;
-
-	g_trace(G_T_TOPOLOGY, "g_gpt_taste(%s,%s)", mp->name, pp->name);
-	g_topology_assert();
-
-	/*
-	 * XXX: I don't like to hardcode a maximum number of slices, since
-	 * it's wasting space most of the time and insufficient any time.
-	 * It's easier for now...
-	 */
-	gp = g_slice_new(mp, GPT_MAX_SLICES, pp, &cp, &gs, sizeof(*gs),
-	    g_gpt_start);
-	if (gp == NULL)
-		return (NULL);
-
-	g_topology_unlock();
-
-	do {
-		mbr = NULL;
-
-		secsz = cp->provider->sectorsize;
-		if (secsz < 512)
-			break;
-
-		/* XXX: we need to get the media size as well. */
-
-		/* Read both the MBR sector and the GPT sector. */
-		mbr = g_read_data(cp, 0, 2 * secsz, NULL);
-		if (mbr == NULL)
-			break;
-
-		if (!is_pmbr(mbr))
-			break;
-
-		hdr = (void*)(mbr + secsz);
-
-		/*
-		 * XXX: if we don't have a GPT header at LBA 1, we should
-		 * check if there's a backup GPT at the end of the medium. If
-		 * we have a valid backup GPT, we should restore the primary
-		 * GPT and claim this lunch.
-		 */
-		if (!is_gpt_hdr(hdr))
-			break;
-
-		entries = le32toh(hdr->hdr_entries);
-		entsz = le32toh(hdr->hdr_entsz);
-		tblsz = (entries * entsz + secsz - 1) & ~(secsz - 1);
-		buf = g_read_data(cp, le64toh(hdr->hdr_lba_table) * secsz,
-		    tblsz, NULL);
-		if (buf == NULL)
-			break;
-
-		for (i = 0; i < entries; i++) {
-			struct uuid unused = GPT_ENT_TYPE_UNUSED;
-			struct uuid freebsd = GPT_ENT_TYPE_FREEBSD;
-
-			if (i >= GPT_MAX_SLICES)
-				break;
-			ent = (void*)(buf + i * entsz);
-			le_uuid_dec(&ent->ent_type, &tmp);
-			if (!memcmp(&tmp, &unused, sizeof(unused)))
-				continue;
-			/* XXX: This memory leaks */
-			part = gs->part[i] = g_malloc(entsz, M_WAITOK);
-			if (part == NULL)
-				break;
-			part->ent_type = tmp;
-			le_uuid_dec(&ent->ent_uuid, &part->ent_uuid);
-			part->ent_lba_start = le64toh(ent->ent_lba_start);
-			part->ent_lba_end = le64toh(ent->ent_lba_end);
-			part->ent_attr = le64toh(ent->ent_attr);
-			/* XXX do we need to byte-swap UNICODE-16? */
-			bcopy(ent->ent_name, part->ent_name,
-			    sizeof(part->ent_name));
-			ps = (!memcmp(&tmp, &freebsd, sizeof(freebsd)))
-			    ? 's' : 'p';
-			g_topology_lock();
-			(void)g_slice_config(gp, i, G_SLICE_CONFIG_SET,
-			    part->ent_lba_start * secsz,
-			    (1 + part->ent_lba_end - part->ent_lba_start) *
-			    secsz, secsz, "%s%c%d", gp->name, ps, i + 1);
-			g_topology_unlock();
-		}
-		g_free(buf);
-	} while (0);
-
-	if (mbr != NULL)
-		g_free(mbr);
-
-	g_topology_lock();
-	g_access(cp, -1, 0, 0);
-	if (LIST_EMPTY(&gp->provider)) {
-		g_slice_spoiled(cp);
-		return (NULL);
-	}
-	return (gp);
-}
-
-static struct g_class g_gpt_class = {
-	.name = "GPT",
-	.version = G_VERSION,
-	.taste = g_gpt_taste,
-	.dumpconf = g_gpt_dumpconf,
-};
-
-DECLARE_GEOM_CLASS(g_gpt_class, g_gpt);
Index: geom_slice.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_slice.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/geom_slice.c -L sys/geom/geom_slice.c -u -r1.2 -r1.3
--- sys/geom/geom_slice.c
+++ sys/geom/geom_slice.c
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_slice.c,v 1.57.2.1 2005/10/08 22:11:38 rodrigc Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_slice.c,v 1.62 2007/05/05 17:52:22 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -172,6 +172,28 @@
 }
 
 static void
+g_slice_done(struct bio *bp)
+{
+
+	KASSERT(bp->bio_cmd == BIO_GETATTR &&
+	    strcmp(bp->bio_attribute, "GEOM::ident") == 0,
+	    ("bio_cmd=0x%x bio_attribute=%s", bp->bio_cmd, bp->bio_attribute));
+
+	if (bp->bio_error == 0 && bp->bio_data[0] != '\0') {
+		char idx[8];
+
+		/* Add index to the ident received. */
+		snprintf(idx, sizeof(idx), "s%d",
+		    bp->bio_parent->bio_to->index);
+		if (strlcat(bp->bio_data, idx, bp->bio_length) >=
+		    bp->bio_length) {
+			bp->bio_error = EFAULT;
+		}
+	}
+	g_std_done(bp);
+}
+
+static void
 g_slice_start(struct bio *bp)
 {
 	struct bio *bp2;
@@ -251,6 +273,16 @@
 		/* Give the real method a chance to override */
 		if (gsp->start != NULL && gsp->start(bp))
 			return;
+		if (!strcmp("GEOM::ident", bp->bio_attribute)) {
+			bp2 = g_clone_bio(bp);
+			if (bp2 == NULL) {
+				g_io_deliver(bp, ENOMEM);
+				return;
+			}
+			bp2->bio_done = g_slice_done;
+			g_io_request(bp2, cp);
+			return;
+		}
 		if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) {
 			struct g_kerneldump *gkd;
 
@@ -260,6 +292,8 @@
 				gkd->length = gsp->slices[idx].length;
 			/* now, pass it on downwards... */
 		}
+		/* FALLTHROUGH */
+	case BIO_FLUSH:
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			g_io_deliver(bp, ENOMEM);
@@ -338,8 +372,7 @@
 			return (0);
 		if (bootverbose)
 			printf("GEOM: Deconfigure %s\n", pp->name);
-		pp->flags |= G_PF_WITHER;
-		g_orphan_provider(pp, ENXIO);
+		g_wither_provider(pp, ENXIO);
 		gsl->provider = NULL;
 		gsp->nprovider--;
 		return (0);
Index: geom_subr.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_subr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/geom_subr.c -L sys/geom/geom_subr.c -u -r1.1.1.1 -r1.2
--- sys/geom/geom_subr.c
+++ sys/geom/geom_subr.c
@@ -34,7 +34,9 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_subr.c,v 1.87.2.1 2005/11/26 22:55:20 jdp Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_subr.c,v 1.91 2007/05/05 16:33:44 pjd Exp $");
+
+#include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -53,6 +55,10 @@
 #include <geom/geom_int.h>
 #include <machine/stdarg.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
 struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
 static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
 char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
@@ -318,6 +324,18 @@
 }
 
 /*
+ * Convenience function to destroy a particular provider.
+ */
+void
+g_wither_provider(struct g_provider *pp, int error)
+{
+
+	pp->flags |= G_PF_WITHER;
+	if (!(pp->flags & G_PF_ORPHAN))
+		g_orphan_provider(pp, error);
+}
+
+/*
  * This function is called (repeatedly) until the has withered away.
  */
 void
@@ -772,20 +790,35 @@
 }
 
 int
+g_handleattr_str(struct bio *bp, const char *attribute, char *str)
+{
+
+	return (g_handleattr(bp, attribute, str, 0));
+}
+
+int
 g_handleattr(struct bio *bp, const char *attribute, void *val, int len)
 {
-	int error;
+	int error = 0;
 
 	if (strcmp(bp->bio_attribute, attribute))
 		return (0);
-	if (bp->bio_length != len) {
-		printf("bio_length %jd len %d -> EFAULT\n",
-		    (intmax_t)bp->bio_length, len);
-		error = EFAULT;
-	} else {
-		error = 0;
+	if (len == 0) {
+		bzero(bp->bio_data, bp->bio_length);
+		if (strlcpy(bp->bio_data, val, bp->bio_length) >=
+		    bp->bio_length) {
+			printf("%s: %s bio_length %jd len %zu -> EFAULT\n",
+			    __func__, bp->bio_to->name,
+			    (intmax_t)bp->bio_length, strlen(val));
+			error = EFAULT;
+		}
+	} else if (bp->bio_length == len) {
 		bcopy(val, bp->bio_data, len);
 		bp->bio_completed = len;
+	} else {
+		printf("%s: %s bio_length %jd len %d -> EFAULT\n", __func__,
+		    bp->bio_to->name, (intmax_t)bp->bio_length, len);
+		error = EFAULT;
 	}
 	g_io_deliver(bp, error);
 	return (1);
@@ -908,7 +941,7 @@
 	return (0);
 }
 
-#ifdef DIAGNOSTIC
+#if defined(DIAGNOSTIC) || defined(DDB)
 /*
  * This function walks (topologically unsafely) the mesh and return a
  * non-zero integer if it finds the argument pointer is an object.
@@ -942,3 +975,208 @@
 	return(0);
 }
 #endif
+
+#ifdef DDB
+
+#define	gprintf(...)	do {						\
+	printf("%*s", indent, "");					\
+	printf(__VA_ARGS__);						\
+} while (0)
+#define	gprintln(...)	do {						\
+	gprintf(__VA_ARGS__);						\
+	printf("\n");							\
+} while (0)
+
+#define	ADDFLAG(obj, flag, sflag)	do {				\
+	if ((obj)->flags & (flag)) {					\
+		if (comma)						\
+			strlcat(str, ",", size);			\
+		strlcat(str, (sflag), size);				\
+		comma = 1;						\
+	}								\
+} while (0)
+
+static char *
+provider_flags_to_string(struct g_provider *pp, char *str, size_t size)
+{
+	int comma = 0;
+
+	bzero(str, size);
+	if (pp->flags == 0) {
+		strlcpy(str, "NONE", size);
+		return (str);
+	}
+	ADDFLAG(pp, G_PF_CANDELETE, "G_PF_CANDELETE");
+	ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER");
+	ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN");
+	return (str);
+}
+
+static char *
+geom_flags_to_string(struct g_geom *gp, char *str, size_t size)
+{
+	int comma = 0;
+
+	bzero(str, size);
+	if (gp->flags == 0) {
+		strlcpy(str, "NONE", size);
+		return (str);
+	}
+	ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER");
+	return (str);
+}
+static void
+db_show_geom_consumer(int indent, struct g_consumer *cp)
+{
+
+	if (indent == 0) {
+		gprintln("consumer: %p", cp);
+		gprintln("  class:    %s (%p)", cp->geom->class->name,
+		    cp->geom->class);
+		gprintln("  geom:     %s (%p)", cp->geom->name, cp->geom);
+		if (cp->provider == NULL)
+			gprintln("  provider: none");
+		else {
+			gprintln("  provider: %s (%p)", cp->provider->name,
+			    cp->provider);
+		}
+		gprintln("  access:   r%dw%de%d", cp->acr, cp->acw, cp->ace);
+		gprintln("  spoiled:  %d", cp->spoiled);
+		gprintln("  nstart:   %u", cp->nstart);
+		gprintln("  nend:     %u", cp->nend);
+	} else {
+		gprintf("consumer: %p (%s), access=r%dw%de%d", cp,
+		    cp->provider != NULL ? cp->provider->name : "none",
+		    cp->acr, cp->acw, cp->ace);
+		if (cp->spoiled)
+			printf(", spoiled=%d", cp->spoiled);
+		printf("\n");
+	}
+}
+
+static void
+db_show_geom_provider(int indent, struct g_provider *pp)
+{
+	struct g_consumer *cp;
+	char flags[64];
+
+	if (indent == 0) {
+		gprintln("provider: %s (%p)", pp->name, pp);
+		gprintln("  class:        %s (%p)", pp->geom->class->name,
+		    pp->geom->class);
+		gprintln("  geom:         %s (%p)", pp->geom->name, pp->geom);
+		gprintln("  mediasize:    %jd", (intmax_t)pp->mediasize);
+		gprintln("  sectorsize:   %u", pp->sectorsize);
+		gprintln("  stripesize:   %u", pp->stripesize);
+		gprintln("  stripeoffset: %u", pp->stripeoffset);
+		gprintln("  access:       r%dw%de%d", pp->acr, pp->acw,
+		    pp->ace);
+		gprintln("  flags:        %s (0x%04x)",
+		    provider_flags_to_string(pp, flags, sizeof(flags)),
+		    pp->flags);
+		gprintln("  error:        %d", pp->error);
+		gprintln("  nstart:       %u", pp->nstart);
+		gprintln("  nend:         %u", pp->nend);
+		if (LIST_EMPTY(&pp->consumers))
+			gprintln("  consumers:    none");
+	} else {
+		gprintf("provider: %s (%p), access=r%dw%de%d",
+		    pp->name, pp, pp->acr, pp->acw, pp->ace);
+		if (pp->flags != 0) {
+			printf(", flags=%s (0x%04x)",
+			    provider_flags_to_string(pp, flags, sizeof(flags)),
+			    pp->flags);
+		}
+		printf("\n");
+	}
+	if (!LIST_EMPTY(&pp->consumers)) {
+		LIST_FOREACH(cp, &pp->consumers, consumers)
+			db_show_geom_consumer(indent + 2, cp);
+	}
+}
+
+static void
+db_show_geom_geom(int indent, struct g_geom *gp)
+{
+	struct g_provider *pp;
+	struct g_consumer *cp;
+	char flags[64];
+
+	if (indent == 0) {
+		gprintln("geom: %s (%p)", gp->name, gp);
+		gprintln("  class:     %s (%p)", gp->class->name, gp->class);
+		gprintln("  flags:     %s (0x%04x)",
+		    geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags);
+		gprintln("  rank:      %d", gp->rank);
+		if (LIST_EMPTY(&gp->provider))
+			gprintln("  providers: none");
+		if (LIST_EMPTY(&gp->consumer))
+			gprintln("  consumers: none");
+	} else {
+		gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank);
+		if (gp->flags != 0) {
+			printf(", flags=%s (0x%04x)",
+			    geom_flags_to_string(gp, flags, sizeof(flags)),
+			    gp->flags);
+		}
+		printf("\n");
+	}
+	if (!LIST_EMPTY(&gp->provider)) {
+		LIST_FOREACH(pp, &gp->provider, provider)
+			db_show_geom_provider(indent + 2, pp);
+	}
+	if (!LIST_EMPTY(&gp->consumer)) {
+		LIST_FOREACH(cp, &gp->consumer, consumer)
+			db_show_geom_consumer(indent + 2, cp);
+	}
+}
+
+static void
+db_show_geom_class(struct g_class *mp)
+{
+	struct g_geom *gp;
+
+	printf("class: %s (%p)\n", mp->name, mp);
+	LIST_FOREACH(gp, &mp->geom, geom)
+		db_show_geom_geom(2, gp);
+}
+
+/*
+ * Print the GEOM topology or the given object.
+ */
+DB_SHOW_COMMAND(geom, db_show_geom)
+{
+	struct g_class *mp;
+
+	if (!have_addr) {
+		/* No address given, print the entire topology. */
+		LIST_FOREACH(mp, &g_classes, class) {
+			db_show_geom_class(mp);
+			printf("\n");
+		}
+	} else {
+		switch (g_valid_obj((void *)addr)) {
+		case 1:
+			db_show_geom_class((struct g_class *)addr);
+			break;
+		case 2:
+			db_show_geom_geom(0, (struct g_geom *)addr);
+			break;
+		case 3:
+			db_show_geom_consumer(0, (struct g_consumer *)addr);
+			break;
+		case 4:
+			db_show_geom_provider(0, (struct g_provider *)addr);
+			break;
+		default:
+			printf("Not a GEOM object.\n");
+			break;
+		}
+	}
+}
+
+#undef	gprintf
+#undef	gprintln
+#undef	ADDFLAG
+
+#endif	/* DDB */
Index: geom_dev.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_dev.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/geom/geom_dev.c -L sys/geom/geom_dev.c -u -r1.3 -r1.4
--- sys/geom/geom_dev.c
+++ sys/geom/geom_dev.c
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_dev.c,v 1.89 2005/03/18 06:57:58 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_dev.c,v 1.94 2007/05/05 17:02:19 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -220,7 +220,7 @@
 			break;
  		if (cp->nstart == cp->nend)
 			break;
-		tsleep(&i, PRIBIO, "gdevwclose", hz / 10);
+		pause("gdevwclose", hz / 10);
 		i += hz / 10;
 	}
 	if (cp->acr == 0 && cp->acw == 0 && cp->nstart != cp->nend) {
@@ -245,6 +245,7 @@
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_kerneldump kd;
+	off_t offset, length;
 	int i, error;
 	u_int u;
 
@@ -294,6 +295,25 @@
 		if (!error)
 			dev->si_flags |= SI_DUMPDEV;
 		break;
+	case DIOCGFLUSH:
+		error = g_io_flush(cp);
+		break;
+	case DIOCGDELETE:
+		offset = ((off_t *)data)[0];
+		length = ((off_t *)data)[1];
+		if ((offset % cp->provider->sectorsize) != 0 ||
+		    (length % cp->provider->sectorsize) != 0 ||
+		     length <= 0 || length > MAXPHYS) {
+			printf("%s: offset=%jd length=%jd\n", __func__, offset,
+			    length);
+			error = EINVAL;
+			break;
+		}
+		error = g_delete_data(cp, offset, length);
+		break;
+	case DIOCGIDENT:
+		error = g_io_getattr("GEOM::ident", cp, &i, data);
+		break;
 
 	default:
 		if (cp->provider->geom->ioctl != NULL) {
@@ -358,7 +378,7 @@
 		bp2 = g_clone_bio(bp);
 		if (bp2 != NULL)
 			break;
-		tsleep(&bp, PRIBIO, "gdstrat", hz / 10);
+		pause("gdstrat", hz / 10);
 	}
 	KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place"));
 	bp2->bio_done = g_dev_done;
@@ -408,7 +428,7 @@
 
 	/* Wait for the cows to come home */
 	while (cp->nstart != cp->nend)
-		msleep(&dev, NULL, PRIBIO, "gdevorphan", hz / 10);
+		pause("gdevorphan", hz / 10);
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
--- sys/geom/geom_apple.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*-
- * Copyright (c) 2002 Peter Grehan.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * GEOM module for Apple Partition Maps
- *  As described in 'Inside Macintosh Vol 3: About the SCSI Manager -
- *    The Structure of Block Devices"
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_apple.c,v 1.16.8.1 2005/12/29 05:59:51 sobomax Exp $");
-
-#include <sys/param.h>
-#include <sys/endian.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/bio.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-
-#include <sys/sbuf.h>
-#include <geom/geom.h>
-#include <geom/geom_slice.h>
-
-#define APPLE_CLASS_NAME "APPLE"
-
-#define NAPMPART  16	/* Max partitions */
-
-struct apm_partition {
-	char       am_sig[2];
-	u_int32_t  am_mapcnt;
-	u_int32_t  am_start;
-	u_int32_t  am_partcnt;
-	char       am_name[32];
-	char       am_type[32];	
-};
-
-struct g_apple_softc {
-	u_int16_t dd_bsiz;
-	u_int32_t dd_blkcnt;
-	u_int16_t dd_drvrcnt;
-	u_int32_t am_mapcnt0;
-	struct apm_partition apmpart[NAPMPART];
-};
-
-static void
-g_dec_drvrdesc(u_char *ptr, struct g_apple_softc *sc)
-{
-	sc->dd_bsiz = be16dec(ptr + 2);
-	sc->dd_blkcnt = be32dec(ptr + 4);
-	sc->dd_drvrcnt = be32dec(ptr + 16);
-}
-
-static void
-g_dec_apple_partition(u_char *ptr, struct apm_partition *d)
-{
-	d->am_sig[0] = ptr[0];
-	d->am_sig[1] = ptr[1];
-	d->am_mapcnt = be32dec(ptr + 4);
-	d->am_start = be32dec(ptr + 8);
-	d->am_partcnt = be32dec(ptr + 12);
-	memcpy(d->am_name, ptr + 16, 32);
-	memcpy(d->am_type, ptr + 48, 32);
-}
-
-static int
-g_apple_start(struct bio *bp)
-{
-	struct g_provider *pp;
-	struct g_geom *gp;
-	struct g_slicer *gsp;
-
-	pp = bp->bio_to;
-	gp = pp->geom;
-	gsp = gp->softc;
-	if (bp->bio_cmd == BIO_GETATTR) {
-		if (g_handleattr_off_t(bp, "APM::offset",
-		    gsp->slices[pp->index].offset))
-			return (1);
-	}
-	return (0);
-}
-
-static void
-g_apple_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 
-    struct g_consumer *cp __unused, struct g_provider *pp)
-{
-	struct g_apple_softc *mp;
-	struct g_slicer *gsp;
-
-	gsp = gp->softc;
-	mp = gsp->softc;
-	g_slice_dumpconf(sb, indent, gp, cp, pp);
-	if (pp != NULL) {
-		if (indent == NULL) {
-			sbuf_printf(sb, " ty %s",
-			    mp->apmpart[pp->index].am_type);
-                        if (*mp->apmpart[pp->index].am_name)
-                                sbuf_printf(sb, " sn %s",
-                                    mp->apmpart[pp->index].am_name);
-		} else {
-			sbuf_printf(sb, "%s<name>%s</name>\n", indent,
-			    mp->apmpart[pp->index].am_name);
-			sbuf_printf(sb, "%s<type>%s</type>\n", indent,
-			    mp->apmpart[pp->index].am_type);
-		}
-	}
-}
-
-#if 0
-static void
-g_apple_print()
-{
-
-	/* XXX */
-}
-#endif
-
-static struct g_geom *
-g_apple_taste(struct g_class *mp, struct g_provider *pp, int insist)
-{
-	struct g_geom *gp;
-	struct g_consumer *cp;
-	int i;
-	struct g_apple_softc *ms;
-	struct apm_partition *apm;
-	u_int sectorsize;
-	u_char *buf;
-
-	g_trace(G_T_TOPOLOGY, "apple_taste(%s,%s)", mp->name, pp->name);
-	g_topology_assert();
-	gp = g_slice_new(mp, NAPMPART, pp, &cp, &ms, sizeof *ms, g_apple_start);
-	if (gp == NULL)
-		return (NULL);
-	g_topology_unlock();
-	do {
-		if (gp->rank != 2 && insist == 0)
-			break;
-
-		sectorsize = cp->provider->sectorsize;
-		if (sectorsize != 512)
-			break;
-
-		buf = g_read_data(cp, 0, sectorsize, NULL);
-		if (buf == NULL)
-			break;
-
-		/*
-		 * Test for the sector 0 driver record signature, and 
-		 * validate sector and disk size
-		 */
-		if (buf[0] != 'E' && buf[1] != 'R') {
-			g_free(buf);
-			break;
-		}
-		g_dec_drvrdesc(buf, ms);
-		g_free(buf);
-
-		if (ms->dd_bsiz != 512) {
-			break;
-		}
-
-		/*
-		 * Read in the first partition map
-		 */
-		buf = g_read_data(cp, sectorsize, sectorsize,  NULL);
-		if (buf == NULL)
-			break;
-
-		/*
-		 * Decode the first partition: it's another indication of
-		 * validity, as well as giving the size of the partition
-		 * map
-		 */
-		apm = &ms->apmpart[0];
-		g_dec_apple_partition(buf, apm);
-		g_free(buf);
-		
-		if (apm->am_sig[0] != 'P' || apm->am_sig[1] != 'M')
-			break;
-		ms->am_mapcnt0 = apm->am_mapcnt;
-	       
-		buf = g_read_data(cp, 2 * sectorsize, 
-		    (NAPMPART - 1) * sectorsize,  NULL);
-		if (buf == NULL)
-			break;
-
-		for (i = 1; i < NAPMPART; i++) {
-			g_dec_apple_partition(buf + ((i - 1) * sectorsize),
-			    &ms->apmpart[i]);
-		}
-
-		for (i = 0; i < NAPMPART; i++) {
-			apm = &ms->apmpart[i];
-
-			/*
-			 * Validate partition sig and global mapcount
-			 */
-			if (apm->am_sig[0] != 'P' ||
-			    apm->am_sig[1] != 'M')
-				continue;
-			if (apm->am_mapcnt != ms->am_mapcnt0)
-				continue;
-
-			if (bootverbose) {
-				printf("APM Slice %d (%s/%s) on %s:\n", 
-				    i + 1, apm->am_name, apm->am_type, 
-				    gp->name);
-				/* g_apple_print(i, dp + i); */
-			}
-			g_topology_lock();
-			g_slice_config(gp, i, G_SLICE_CONFIG_SET,
-			    (off_t)apm->am_start << 9ULL,
-			    (off_t)apm->am_partcnt << 9ULL,
-			    sectorsize,
-			    "%ss%d", gp->name, i + 1);
-			g_topology_unlock();
-		}
-		g_free(buf);
-		break;
-	} while(0);
-	g_topology_lock();
-	g_access(cp, -1, 0, 0);
-	if (LIST_EMPTY(&gp->provider)) {
-		g_slice_spoiled(cp);
-		return (NULL);
-	}
-	return (gp);
-}
-
-
-static struct g_class g_apple_class	= {
-	.name = APPLE_CLASS_NAME,
-	.version = G_VERSION,
-	.taste = g_apple_taste,
-	.dumpconf = g_apple_dumpconf,
-};
-
-DECLARE_GEOM_CLASS(g_apple_class, g_apple);
Index: geom_slice.h
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_slice.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/geom_slice.h -L sys/geom/geom_slice.h -u -r1.2 -r1.3
--- sys/geom/geom_slice.h
+++ sys/geom/geom_slice.h
@@ -32,7 +32,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/geom_slice.h,v 1.17 2003/05/31 19:25:05 phk Exp $
+ * $FreeBSD: src/sys/geom/geom_slice.h,v 1.18 2006/02/18 11:21:17 pjd Exp $
  */
 
 #ifndef _GEOM_GEOM_SLICE_H_
Index: geom_vfs.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_vfs.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/geom_vfs.c -L sys/geom/geom_vfs.c -u -r1.2 -r1.3
--- sys/geom/geom_vfs.c
+++ sys/geom/geom_vfs.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_vfs.c,v 1.9.2.1 2006/02/20 00:53:13 yar Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_vfs.c,v 1.11 2007/01/23 10:01:17 kib Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -50,6 +50,7 @@
 	.bop_write =	bufwrite,
 	.bop_strategy =	g_vfs_strategy,	
 	.bop_sync =	bufsync,	
+	.bop_bdflush =	bufbdflush
 };
 
 struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;
@@ -149,7 +150,7 @@
 		return (error);
 	}
 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
-	vnode_create_vobject_off(vp, pp->mediasize, curthread);
+	vnode_create_vobject(vp, pp->mediasize, curthread);
 	VFS_UNLOCK_GIANT(vfslocked);
 	*cpp = cp;
 	bo = &vp->v_bufobj;
Index: geom_ctl.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_ctl.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/geom_ctl.c -L sys/geom/geom_ctl.c -u -r1.1.1.1 -r1.2
--- sys/geom/geom_ctl.c
+++ sys/geom/geom_ctl.c
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_ctl.c,v 1.35 2005/04/08 09:28:08 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_ctl.c,v 1.39 2007/03/30 16:32:08 delphij Exp $");
 
 #include "opt_geom.h"
 
@@ -86,8 +86,6 @@
 /*
  * Report an error back to the user in ascii format.  Return whatever copyout
  * returned, or EINVAL if it succeeded.
- * XXX: should not be static.
- * XXX: should take printf like args.
  */
 int
 gctl_error(struct gctl_req *req, const char *fmt, ...)
@@ -270,8 +268,9 @@
 	}
 }
 
-void
-gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len)
+int
+gctl_set_param(struct gctl_req *req, const char *param, void const *ptr,
+    int len)
 {
 	int i;
 	struct gctl_req_arg *ap;
@@ -280,20 +279,35 @@
 		ap = &req->arg[i];
 		if (strcmp(param, ap->name))
 			continue;
-		if (!(ap->flag & GCTL_PARAM_WR)) {
-			gctl_error(req, "No write access %s argument", param);
-			return;
-		}
+		if (!(ap->flag & GCTL_PARAM_WR))
+			return (EPERM);
+		ap->flag |= GCTL_PARAM_CHANGED;
 		if (ap->len < len) {
-			gctl_error(req, "Wrong length %s argument", param);
-			return;
+			bcopy(ptr, ap->kvalue, ap->len);
+			return (ENOSPC);
 		}
 		bcopy(ptr, ap->kvalue, len);
-		ap->flag |= GCTL_PARAM_CHANGED;
-		return;
+		return (0);
+	}
+	return (EINVAL);
+}
+
+void
+gctl_set_param_err(struct gctl_req *req, const char *param, void const *ptr,
+    int len)
+{
+
+	switch (gctl_set_param(req, param, ptr, len)) {
+	case EPERM:
+		gctl_error(req, "No write access %s argument", param);
+		break;
+	case ENOSPC:
+		gctl_error(req, "Wrong length %s argument", param);
+		break;
+	case EINVAL:
+		gctl_error(req, "Missing %s argument", param);
+		break;
 	}
-	gctl_error(req, "Missing %s argument", param);
-	return;
 }
 
 void *
@@ -373,7 +387,6 @@
 		if (!strcmp(p, cp->name))
 			return (cp);
 	}
-	gctl_error(req, "Class not found");
 	return (NULL);
 }
 
@@ -385,17 +398,17 @@
 	struct g_geom *gp;
 
 	p = gctl_get_asciiparam(req, arg);
-	if (p != NULL) {
-		LIST_FOREACH(mp, &g_classes, class) {
-			if (mpr != NULL && mpr != mp)
-				continue;
-			LIST_FOREACH(gp, &mp->geom, geom) {
-				if (!strcmp(p, gp->name))
-					return (gp);
-			}
+	if (p == NULL)
+		return (NULL);
+	LIST_FOREACH(mp, &g_classes, class) {
+		if (mpr != NULL && mpr != mp)
+			continue;
+		LIST_FOREACH(gp, &mp->geom, geom) {
+			if (!strcmp(p, gp->name))
+				return (gp);
 		}
 	}
-	gctl_error(req, "Geom not found");
+	gctl_error(req, "Geom not found: \"%s\"", p);
 	return (NULL);
 }
 
@@ -411,7 +424,7 @@
 	pp = g_provider_by_name(p);
 	if (pp != NULL)
 		return (pp);
-	gctl_error(req, "Provider not found");
+	gctl_error(req, "Provider not found: \"%s\"", p);
 	return (NULL);
 }
 
@@ -429,11 +442,16 @@
 		gctl_error(req, "Class not found");
 		return;
 	}
-	verb = gctl_get_param(req, "verb", NULL);
-	if (mp->ctlreq == NULL)
+	if (mp->ctlreq == NULL) {
 		gctl_error(req, "Class takes no requests");
-	else
-		mp->ctlreq(req, mp, verb);
+		return;
+	}
+	verb = gctl_get_param(req, "verb", NULL);
+	if (verb == NULL) {
+		gctl_error(req, "Verb missing");
+		return;
+	}
+	mp->ctlreq(req, mp, verb);
 	g_topology_assert();
 }
 
Index: geom_sunlabel.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_sunlabel.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/geom_sunlabel.c -L sys/geom/geom_sunlabel.c -u -r1.1.1.2 -r1.2
--- sys/geom/geom_sunlabel.c
+++ sys/geom/geom_sunlabel.c
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_sunlabel.c,v 1.44.2.1 2005/12/29 05:59:51 sobomax Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_sunlabel.c,v 1.46 2005/11/30 22:15:00 sobomax Exp $");
 
 #include <sys/param.h>
 #include <sys/endian.h>
Index: geom.h
===================================================================
RCS file: /home/cvs/src/sys/geom/geom.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/geom.h -L sys/geom/geom.h -u -r1.2 -r1.3
--- sys/geom/geom.h
+++ sys/geom/geom.h
@@ -32,7 +32,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/geom.h,v 1.90.2.3 2006/09/19 11:44:16 pjd Exp $
+ * $FreeBSD: src/sys/geom/geom.h,v 1.100 2007/05/05 16:35:22 pjd Exp $
  */
 
 #ifndef _GEOM_GEOM_H_
@@ -95,7 +95,7 @@
 	g_fini_t		*fini;
 	g_ctl_destroy_geom_t	*destroy_geom;
 	/*
-	 * Defaults values for geom methods
+	 * Default values for geom methods
 	 */
 	g_start_t		*start;
 	g_spoiled_t		*spoiled;
@@ -230,6 +230,7 @@
 int g_handleattr(struct bio *bp, const char *attribute, void *val, int len);
 int g_handleattr_int(struct bio *bp, const char *attribute, int val);
 int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val);
+int g_handleattr_str(struct bio *bp, const char *attribute, char *str);
 struct g_consumer * g_new_consumer(struct g_geom *gp);
 struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...);
 struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...);
@@ -239,9 +240,12 @@
 void g_std_spoiled(struct g_consumer *cp);
 void g_wither_geom(struct g_geom *gp, int error);
 void g_wither_geom_close(struct g_geom *gp, int error);
+void g_wither_provider(struct g_provider *pp, int error);
 
-#ifdef DIAGNOSTIC
+#if defined(DIAGNOSTIC) || defined(DDB)
 int g_valid_obj(void const *ptr);
+#endif
+#ifdef DIAGNOSTIC
 #define G_VALID_CLASS(foo) \
     KASSERT(g_valid_obj(foo) == 1, ("%p is not a g_class", foo))
 #define G_VALID_GEOM(foo) \
@@ -265,11 +269,13 @@
 void g_destroy_bio(struct bio *);
 void g_io_deliver(struct bio *bp, int error);
 int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
+int g_io_flush(struct g_consumer *cp);
 void g_io_request(struct bio *bp, struct g_consumer *cp);
 struct bio *g_new_bio(void);
 struct bio *g_alloc_bio(void);
 void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error);
 int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length);
+int g_delete_data(struct g_consumer *cp, off_t offset, off_t length);
 void g_print_bio(struct bio *bp);
 
 /* geom_kern.c / geom_kernsim.c */
@@ -340,7 +346,8 @@
 #endif /* _KERNEL */
 
 /* geom_ctl.c */
-void gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len);
+int gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len);
+void gctl_set_param_err(struct gctl_req *req, const char *param, void const *ptr, int len);
 void *gctl_get_param(struct gctl_req *req, const char *param, int *len);
 char const *gctl_get_asciiparam(struct gctl_req *req, const char *param);
 void *gctl_get_paraml(struct gctl_req *req, const char *param, int len);
Index: geom_ccd.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_ccd.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/geom_ccd.c -L sys/geom/geom_ccd.c -u -r1.1.1.1 -r1.2
--- sys/geom/geom_ccd.c
+++ sys/geom/geom_ccd.c
@@ -50,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_ccd.c,v 1.153 2005/01/06 18:27:29 imp Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_ccd.c,v 1.155 2006/04/13 20:35:31 cracauer Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -72,6 +72,8 @@
 /* sc_flags */
 #define CCDF_UNIFORM	0x02	/* use LCCD of sizes for uniform interleave */
 #define CCDF_MIRROR	0x04	/* use mirroring */
+#define CCDF_NO_OFFSET	0x08	/* do not leave space in front */
+#define CCDF_LINUX	0x10	/* use Linux compatibility mode */
 
 /* Mask of user-settable ccd flags. */
 #define CCDF_USERMASK	(CCDF_UNIFORM|CCDF_MIRROR)
@@ -136,6 +138,7 @@
 	u_int32_t	 sc_secsize;		/* # bytes per sector */
 	int		 sc_pick;		/* side of mirror picked */
 	daddr_t		 sc_blk[2];		/* mirror localization */
+	u_int32_t	 sc_offset;		/* actual offset used */
 };
 
 static g_start_t g_ccd_start;
@@ -215,6 +218,20 @@
 
 	maxsecsize = 0;
 	minsize = 0;
+
+	if (cs->sc_flags & CCDF_LINUX) {
+		cs->sc_offset = 0;
+		cs->sc_ileave *= 2;
+		if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
+			gctl_error(req, "Mirror mode for Linux raids is "
+			                "only supported with 2 devices");
+	} else {
+		if (cs->sc_flags & CCDF_NO_OFFSET)
+			cs->sc_offset = 0;
+		else
+			cs->sc_offset = CCD_OFFSET;
+
+	}
 	for (ix = 0; ix < cs->sc_ndisks; ix++) {
 		ci = &cs->sc_cinfo[ix];
 
@@ -222,7 +239,7 @@
 		sectorsize = ci->ci_provider->sectorsize;
 		if (sectorsize > maxsecsize)
 			maxsecsize = sectorsize;
-		size = mediasize / DEV_BSIZE - CCD_OFFSET;
+		size = mediasize / DEV_BSIZE - cs->sc_offset;
 
 		/* Truncate to interleave boundary */
 
@@ -604,7 +621,7 @@
 	if (cbp == NULL)
 		return (ENOMEM);
 	cbp->bio_done = g_std_done;
-	cbp->bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
+	cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
 	cbp->bio_data = addr;
 	if (cs->sc_ileave == 0)
               cbc = dbtob((off_t)(ci->ci_size - cbn));
@@ -740,6 +757,11 @@
 	sc->sc_unit = *unit;
 	sc->sc_ileave = *ileave;
 
+	if (gctl_get_param(req, "no_offset", NULL))
+		sc->sc_flags |= CCDF_NO_OFFSET;
+	if (gctl_get_param(req, "linux", NULL))
+		sc->sc_flags |= CCDF_LINUX;
+
 	if (gctl_get_param(req, "uniform", NULL))
 		sc->sc_flags |= CCDF_UNIFORM;
 	if (gctl_get_param(req, "mirror", NULL))
@@ -782,7 +804,7 @@
 	else
 		sbuf_printf(sb, "concatenated\n");
 	sbuf_finish(sb);
-	gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
+	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 }
 
@@ -833,7 +855,7 @@
 		sbuf_printf(sb, "\n");
 	}
 	sbuf_finish(sb);
-	gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
+	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 }
 
Index: geom_fox.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_fox.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/geom_fox.c -L sys/geom/geom_fox.c -u -r1.1.1.2 -r1.2
--- sys/geom/geom_fox.c
+++ sys/geom/geom_fox.c
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/geom_fox.c,v 1.9.2.1 2005/12/29 05:59:51 sobomax Exp $
+ * $FreeBSD: src/sys/geom/geom_fox.c,v 1.11 2005/11/30 22:15:00 sobomax Exp $
  */
 
 /* This is a GEOM module for handling path selection for multi-path
Index: geom_dump.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_dump.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/geom_dump.c -L sys/geom/geom_dump.c -u -r1.1.1.1 -r1.2
--- sys/geom/geom_dump.c
+++ sys/geom/geom_dump.c
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_dump.c,v 1.31 2004/03/10 08:49:08 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_dump.c,v 1.32 2005/11/12 20:02:02 marcel Exp $");
 
 #include <sys/param.h>
 #include <sys/sbuf.h>
@@ -145,11 +145,10 @@
 	KASSERT(flag != EV_CANCEL, ("g_conftxt was cancelled"));
 	sb = p;
 	g_topology_assert();
-	LIST_FOREACH(mp, &g_classes, class)
-		if (!strcmp(mp->name, "DISK"))
-			break;
-	if (mp != NULL)
-		g_conftxt_class(sb, mp);
+	LIST_FOREACH(mp, &g_classes, class) {
+		if (!strcmp(mp->name, "DISK") || !strcmp(mp->name, "MD"))
+			g_conftxt_class(sb, mp);
+	}
 	sbuf_finish(sb);
 }
 
Index: geom_aes.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_aes.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/geom_aes.c -L sys/geom/geom_aes.c -u -r1.1.1.2 -r1.2
--- sys/geom/geom_aes.c
+++ sys/geom/geom_aes.c
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_aes.c,v 1.28.2.1 2005/12/29 05:59:51 sobomax Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_aes.c,v 1.30 2005/11/30 22:15:00 sobomax Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
Index: geom_event.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_event.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/geom_event.c -L sys/geom/geom_event.c -u -r1.2 -r1.3
--- sys/geom/geom_event.c
+++ sys/geom/geom_event.c
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_event.c,v 1.53.2.2 2006/09/19 11:07:59 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_event.c,v 1.56 2007/09/27 20:18:34 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/malloc.h>
@@ -145,6 +145,8 @@
 	G_VALID_PROVIDER(pp);
 	g_trace(G_T_TOPOLOGY, "g_orphan_register(%s)", pp->name);
 
+	g_cancel_event(pp);
+
 	wf = pp->flags & G_PF_WITHER;
 	pp->flags &= ~G_PF_WITHER;
 
Index: geom_disk.h
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_disk.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/geom_disk.h -L sys/geom/geom_disk.h -u -r1.1.1.1 -r1.2
--- sys/geom/geom_disk.h
+++ sys/geom/geom_disk.h
@@ -31,7 +31,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/geom_disk.h,v 1.4.2.1 2005/11/26 22:55:20 jdp Exp $
+ * $FreeBSD: src/sys/geom/geom_disk.h,v 1.7 2007/05/05 17:12:15 pjd Exp $
  */
 
 #ifndef _GEOM_GEOM_DISK_H_
@@ -42,6 +42,7 @@
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
+#include <sys/disk.h>
 
 struct disk;
 
@@ -83,6 +84,7 @@
 	u_int			d_maxsize;
 	u_int			d_stripeoffset;
 	u_int			d_stripesize;
+	char			d_ident[DISK_IDENT_SIZE];
 
 	/* Fields private to the driver */
 	void			*d_drv1;
@@ -91,6 +93,7 @@
 #define DISKFLAG_NEEDSGIANT	0x1
 #define DISKFLAG_OPEN		0x2
 #define DISKFLAG_CANDELETE	0x4
+#define DISKFLAG_CANFLUSHCACHE	0x8
 
 struct disk *disk_alloc(void);
 void disk_create(struct disk *disk, int version);
@@ -98,7 +101,8 @@
 void disk_gone(struct disk *disk);
 
 #define DISK_VERSION_00		0x58561059
-#define DISK_VERSION		DISK_VERSION_00
+#define DISK_VERSION_01		0x5856105a
+#define DISK_VERSION		DISK_VERSION_01
 
 #endif /* _KERNEL */
 #endif /* _GEOM_GEOM_DISK_H_ */
Index: geom_disk.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_disk.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/geom_disk.c -L sys/geom/geom_disk.c -u -r1.1.1.1 -r1.2
--- sys/geom/geom_disk.c
+++ sys/geom/geom_disk.c
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_disk.c,v 1.96.2.1 2005/11/26 22:55:20 jdp Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_disk.c,v 1.104 2007/05/05 18:09:17 pjd Exp $");
 
 #include "opt_geom.h"
 
@@ -133,7 +133,7 @@
 		if (dp->d_open != NULL) {
 			g_disk_lock_giant(dp);
 			error = dp->d_open(dp);
-			if (error != 0)
+			if (bootverbose && error != 0)
 				printf("Opened disk %s -> %d\n",
 				    pp->name, error);
 			g_disk_unlock_giant(dp);
@@ -202,12 +202,14 @@
 	if (bp2->bio_error == 0)
 		bp2->bio_error = bp->bio_error;
 	bp2->bio_completed += bp->bio_completed;
+	if ((bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) &&
+	    (dp = bp2->bio_to->geom->softc)) {
+		devstat_end_transaction_bio(dp->d_devstat, bp);
+	}
 	g_destroy_bio(bp);
 	bp2->bio_inbed++;
 	if (bp2->bio_children == bp2->bio_inbed) {
 		bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed;
-		if ((dp = bp2->bio_to->geom->softc))
-			devstat_end_transaction_bio(dp->d_devstat, bp2);
 		g_io_deliver(bp2, bp2->bio_error);
 	}
 	mtx_unlock(&g_disk_done_mtx);
@@ -261,7 +263,6 @@
 			error = ENOMEM;
 			break;
 		}
-		devstat_start_transaction_bio(dp->d_devstat, bp);
 		do {
 			bp2->bio_offset += off;
 			bp2->bio_length -= off;
@@ -285,6 +286,7 @@
 			bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
 			bp2->bio_bcount = bp2->bio_length;
 			bp2->bio_disk = dp;
+			devstat_start_transaction_bio(dp->d_devstat, bp2);
 			g_disk_lock_giant(dp);
 			dp->d_strategy(bp2);
 			g_disk_unlock_giant(dp);
@@ -299,11 +301,31 @@
 			break;
 		else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0))
 			break;
+		else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
+			break;
 		else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
 			g_disk_kerneldump(bp, dp);
 		else 
 			error = ENOIOCTL;
 		break;
+	case BIO_FLUSH:
+		g_trace(G_T_TOPOLOGY, "g_disk_flushcache(%s)",
+		    bp->bio_to->name);
+		if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) {
+			g_io_deliver(bp, ENODEV);
+			return;
+		}
+		bp2 = g_clone_bio(bp);
+		if (bp2 == NULL) {
+			g_io_deliver(bp, ENOMEM);
+			return;
+		}
+		bp2->bio_done = g_disk_done;
+		bp2->bio_disk = dp;
+		g_disk_lock_giant(dp);
+		dp->d_strategy(bp2);
+		g_disk_unlock_giant(dp);
+		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
@@ -376,6 +398,45 @@
 	g_free(dp);
 }
 
+/*
+ * We only allow [a-zA-Z0-9-_@#%.:] characters, the rest is converted to 'x<HH>'.
+ */
+static void
+g_disk_ident_adjust(char *ident, size_t size)
+{
+	char newid[DISK_IDENT_SIZE], tmp[4];
+	size_t len;
+	char *p;
+
+	bzero(newid, sizeof(newid));
+	len = 0;
+	for (p = ident; *p != '\0' && len < sizeof(newid) - 1; p++) {
+		switch (*p) {
+		default:
+			if ((*p < 'a' || *p > 'z') &&
+			    (*p < 'A' || *p > 'Z') &&
+			    (*p < '0' || *p > '9')) {
+				snprintf(tmp, sizeof(tmp), "x%02hhx", *p);
+				strlcat(newid, tmp, sizeof(newid));
+				len += 3;
+				break;
+			}
+			/* FALLTHROUGH */
+		case '-':
+		case '_':
+		case '@':
+		case '#':
+		case '%':
+		case '.':
+		case ':':
+			newid[len++] = *p;
+			break;
+		}
+	}
+	bzero(ident, size);
+	strlcpy(ident, newid, size);
+}
+
 struct disk *
 disk_alloc()
 {
@@ -388,7 +449,7 @@
 void
 disk_create(struct disk *dp, int version)
 {
-	if (version != DISK_VERSION_00) {
+	if (version != DISK_VERSION_00 && version != DISK_VERSION_01) {
 		printf("WARNING: Attempt to add disk %s%d %s",
 		    dp->d_name, dp->d_unit,
 		    " using incompatible ABI version of disk(9)\n");
@@ -405,6 +466,7 @@
 		    dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
 		    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
 	dp->d_geom = NULL;
+	g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
 	g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL);
 }
 
@@ -428,7 +490,7 @@
 	gp = dp->d_geom;
 	if (gp != NULL)
 		LIST_FOREACH(pp, &gp->provider, provider)
-			g_orphan_provider(pp, ENXIO);
+			g_wither_provider(pp, ENXIO);
 }
 
 static void
Index: geom_bsd.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_bsd.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/geom_bsd.c -L sys/geom/geom_bsd.c -u -r1.2 -r1.3
--- sys/geom/geom_bsd.c
+++ sys/geom/geom_bsd.c
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_bsd.c,v 1.73.2.2 2006/03/23 22:40:28 sobomax Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_bsd.c,v 1.78.2.1 2007/12/18 01:24:27 jhb Exp $");
 
 #include <sys/param.h>
 #include <sys/endian.h>
@@ -55,6 +55,8 @@
 #include <sys/md5.h>
 #include <sys/errno.h>
 #include <sys/disklabel.h>
+#include <sys/gpt.h>
+#include <sys/uuid.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
@@ -449,8 +451,6 @@
  *
  * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and
  * if we find valid, consistent magic on it, build a geom on it.
- * any magic bits which indicate that we should automatically put a BSD
- * geom on it.
  *
  * There may be cases where the operator would like to put a BSD-geom on
  * providers which do not meet all of the requirements.  This can be done
@@ -463,6 +463,8 @@
  * not implemented here.
  */
 
+static struct uuid freebsd_slice = GPT_ENT_TYPE_FREEBSD;
+
 static struct g_geom *
 g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
@@ -474,6 +476,7 @@
 	struct g_slicer *gsp;
 	u_char hash[16];
 	MD5_CTX md5sum;
+	struct uuid uuid;
 
 	g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
@@ -529,6 +532,14 @@
 				break;
 		}
 
+		/* Same thing if we are inside a GPT */
+		error = g_getattr("GPT::type", cp, &uuid);
+		if (!error) {
+			if (memcmp(&uuid, &freebsd_slice, sizeof(uuid)) != 0 &&
+			    flags == G_TF_NORMAL)
+				break;
+		}
+
 		/* Get sector size, we need it to read data. */
 		secsize = cp->provider->sectorsize;
 		if (secsize < 512)
@@ -635,8 +646,8 @@
 	gsp = gp->softc;
 	ms = gsp->softc;
 	if (!strcmp(verb, "read mbroffset")) {
-		gctl_set_param(req, "mbroffset",
-		    &ms->mbroffset, sizeof(ms->mbroffset));
+		gctl_set_param_err(req, "mbroffset", &ms->mbroffset,
+		    sizeof(ms->mbroffset));
 		return;
 	} else if (!strcmp(verb, "write label")) {
 		label = gctl_get_paraml(req, "label", LABELSIZE);
Index: geom_pc98.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_pc98.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/geom_pc98.c -L sys/geom/geom_pc98.c -u -r1.1.1.2 -r1.2
--- sys/geom/geom_pc98.c
+++ sys/geom/geom_pc98.c
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_pc98.c,v 1.53.2.2 2005/12/29 05:59:51 sobomax Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_pc98.c,v 1.56 2005/11/30 22:15:00 sobomax Exp $");
 
 #include <sys/param.h>
 #include <sys/endian.h>
Index: geom_io.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_io.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/geom_io.c -L sys/geom/geom_io.c -u -r1.2 -r1.3
--- sys/geom/geom_io.c
+++ sys/geom/geom_io.c
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_io.c,v 1.64.2.3 2006/09/03 16:28:40 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_io.c,v 1.75 2007/05/05 16:35:22 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -43,6 +43,7 @@
 #include <sys/bio.h>
 #include <sys/ktr.h>
 #include <sys/proc.h>
+#include <sys/stack.h>
 
 #include <sys/errno.h>
 #include <geom/geom.h>
@@ -113,6 +114,15 @@
 	struct bio *bp;
 
 	bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
+#ifdef KTR
+	if (KTR_COMPILE & KTR_GEOM) {
+		struct stack st;
+
+		CTR1(KTR_GEOM, "g_new_bio(): %p", bp);
+		stack_save(&st);
+		CTRSTACK(KTR_GEOM, &st, 3, 0);
+	}
+#endif
 	return (bp);
 }
 
@@ -122,13 +132,30 @@
 	struct bio *bp;
 
 	bp = uma_zalloc(biozone, M_WAITOK | M_ZERO);
+#ifdef KTR
+	if (KTR_COMPILE & KTR_GEOM) {
+		struct stack st;
+
+		CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp);
+		stack_save(&st);
+		CTRSTACK(KTR_GEOM, &st, 3, 0);
+	}
+#endif
 	return (bp);
 }
 
 void
 g_destroy_bio(struct bio *bp)
 {
-
+#ifdef KTR
+	if (KTR_COMPILE & KTR_GEOM) {
+		struct stack st;
+
+		CTR1(KTR_GEOM, "g_destroy_bio(): %p", bp);
+		stack_save(&st);
+		CTRSTACK(KTR_GEOM, &st, 3, 0);
+	}
+#endif
 	uma_zfree(biozone, bp);
 }
 
@@ -147,6 +174,15 @@
 		bp2->bio_attribute = bp->bio_attribute;
 		bp->bio_children++;
 	}
+#ifdef KTR
+	if (KTR_COMPILE & KTR_GEOM) {
+		struct stack st;
+
+		CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2);
+		stack_save(&st);
+		CTRSTACK(KTR_GEOM, &st, 3, 0);
+	}
+#endif
 	return(bp2);
 }
 
@@ -163,6 +199,15 @@
 	bp2->bio_data = bp->bio_data;
 	bp2->bio_attribute = bp->bio_attribute;
 	bp->bio_children++;
+#ifdef KTR
+	if (KTR_COMPILE & KTR_GEOM) {
+		struct stack st;
+
+		CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2);
+		stack_save(&st);
+		CTRSTACK(KTR_GEOM, &st, 3, 0);
+	}
+#endif
 	return(bp2);
 }
 
@@ -199,6 +244,26 @@
 	return (error);
 }
 
+int
+g_io_flush(struct g_consumer *cp)
+{
+	struct bio *bp;
+	int error;
+
+	g_trace(G_T_BIO, "bio_flush(%s)", cp->provider->name);
+	bp = g_alloc_bio();
+	bp->bio_cmd = BIO_FLUSH;
+	bp->bio_done = NULL;
+	bp->bio_attribute = NULL;
+	bp->bio_offset = cp->provider->mediasize;
+	bp->bio_length = 0;
+	bp->bio_data = NULL;
+	g_io_request(bp, cp);
+	error = biowait(bp, "gflush");
+	g_destroy_bio(bp);
+	return (error);
+}
+
 static int
 g_io_check(struct bio *bp)
 {
@@ -217,6 +282,7 @@
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
+	case BIO_FLUSH:
 		if (cp->acw == 0)
 			return (EPERM);
 		break;
@@ -259,10 +325,33 @@
 
 	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
 	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
-	KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request"));
 	pp = cp->provider;
 	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
+#ifdef DIAGNOSTIC
+	KASSERT(bp->bio_driver1 == NULL,
+	    ("bio_driver1 used by the consumer (geom %s)", cp->geom->name));
+	KASSERT(bp->bio_driver2 == NULL,
+	    ("bio_driver2 used by the consumer (geom %s)", cp->geom->name));
+	KASSERT(bp->bio_pflags == 0,
+	    ("bio_pflags used by the consumer (geom %s)", cp->geom->name));
+	/*
+	 * Remember consumer's private fields, so we can detect if they were
+	 * modified by the provider.
+	 */
+	bp->_bio_caller1 = bp->bio_caller1;
+	bp->_bio_caller2 = bp->bio_caller2;
+	bp->_bio_cflags = bp->bio_cflags;
+#endif
 
+	if (bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_GETATTR)) {
+		KASSERT(bp->bio_data != NULL,
+		    ("NULL bp->data in g_io_request(cmd=%hhu)", bp->bio_cmd));
+	}
+	if (bp->bio_cmd & (BIO_DELETE|BIO_FLUSH)) {
+		KASSERT(bp->bio_data == NULL,
+		    ("non-NULL bp->data in g_io_request(cmd=%hhu)",
+		    bp->bio_cmd));
+	}
 	if (bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) {
 		KASSERT(bp->bio_offset % cp->provider->sectorsize == 0,
 		    ("wrong offset %jd for sectorsize %u",
@@ -316,6 +405,14 @@
 	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
 	pp = bp->bio_to;
 	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
+#ifdef DIAGNOSTIC
+	KASSERT(bp->bio_caller1 == bp->_bio_caller1,
+	    ("bio_caller1 used by the provider %s", pp->name));
+	KASSERT(bp->bio_caller2 == bp->_bio_caller2,
+	    ("bio_caller2 used by the provider %s", pp->name));
+	KASSERT(bp->bio_cflags == bp->_bio_cflags,
+	    ("bio_cflags used by the provider %s", pp->name));
+#endif
 	cp = bp->bio_from;
 	if (cp == NULL) {
 		bp->bio_error = error;
@@ -395,7 +492,7 @@
 		g_bioq_unlock(&g_bio_run_down);
 		if (pace > 0) {
 			CTR1(KTR_GEOM, "g_down pacing self (pace %d)", pace);
-			msleep(&error, NULL, PRIBIO, "g_down", hz/10);
+			pause("g_down", hz/10);
 			pace--;
 		}
 		error = g_io_check(bp);
@@ -549,6 +646,28 @@
 	return (error);
 }
 
+int
+g_delete_data(struct g_consumer *cp, off_t offset, off_t length)
+{
+	struct bio *bp;
+	int error;
+
+	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
+	    length <= MAXPHYS, ("g_delete_data(): invalid length %jd",
+	    (intmax_t)length));
+
+	bp = g_alloc_bio();
+	bp->bio_cmd = BIO_DELETE;
+	bp->bio_done = NULL;
+	bp->bio_offset = offset;
+	bp->bio_length = length;
+	bp->bio_data = NULL;
+	g_io_request(bp, cp);
+	error = biowait(bp, "gdelete");
+	g_destroy_bio(bp);
+	return (error);
+}
+
 void
 g_print_bio(struct bio *bp)
 {
@@ -564,6 +683,10 @@
 		cmd = "GETATTR";
 		printf("%s[%s(attr=%s)]", pname, cmd, bp->bio_attribute);
 		return;
+	case BIO_FLUSH:
+		cmd = "FLUSH";
+		printf("%s[%s]", pname, cmd);
+		return;
 	case BIO_READ:
 		cmd = "READ";
 	case BIO_WRITE:
Index: geom_vol_ffs.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_vol_ffs.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/geom_vol_ffs.c -L sys/geom/geom_vol_ffs.c -u -r1.1.1.2 -r1.2
--- sys/geom/geom_vol_ffs.c
+++ sys/geom/geom_vol_ffs.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_vol_ffs.c,v 1.14.2.1 2005/12/29 05:59:51 sobomax Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_vol_ffs.c,v 1.16 2005/11/30 22:15:00 sobomax Exp $");
 
 #include <sys/param.h>
 #include <sys/errno.h>
Index: geom_mbr.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_mbr.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/geom_mbr.c -L sys/geom/geom_mbr.c -u -r1.1.1.2 -r1.2
--- sys/geom/geom_mbr.c
+++ sys/geom/geom_mbr.c
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_mbr.c,v 1.65.2.2 2005/12/29 05:59:51 sobomax Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_mbr.c,v 1.68 2005/11/30 22:15:00 sobomax Exp $");
 
 #include <sys/param.h>
 #include <sys/errno.h>
Index: geom_kern.c
===================================================================
RCS file: /home/cvs/src/sys/geom/geom_kern.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/geom_kern.c -L sys/geom/geom_kern.c -u -r1.1.1.1 -r1.2
--- sys/geom/geom_kern.c
+++ sys/geom/geom_kern.c
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_kern.c,v 1.39 2005/04/19 06:23:58 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_kern.c,v 1.41 2007/06/05 00:00:51 jeff Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -88,9 +88,9 @@
 	struct thread *tp = FIRST_THREAD_IN_PROC(p);
 
 	mtx_assert(&Giant, MA_NOTOWNED);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(tp);
 	sched_prio(tp, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(tp);
 	for(;;) {
 		g_io_schedule_up(tp);
 	}
@@ -111,9 +111,9 @@
 	struct thread *tp = FIRST_THREAD_IN_PROC(p);
 
 	mtx_assert(&Giant, MA_NOTOWNED);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(tp);
 	sched_prio(tp, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(tp);
 	for(;;) {
 		g_io_schedule_down(tp);
 	}
@@ -134,9 +134,9 @@
 	struct thread *tp = FIRST_THREAD_IN_PROC(p);
 
 	mtx_assert(&Giant, MA_NOTOWNED);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(tp);
 	sched_prio(tp, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(tp);
 	for(;;) {
 		g_run_events();
 		tsleep(&g_wait_event, PRIBIO, "-", hz/10);
@@ -229,18 +229,19 @@
 
 TUNABLE_INT("kern.geom.debugflags", &g_debugflags);
 SYSCTL_INT(_kern_geom, OID_AUTO, debugflags, CTLFLAG_RW,
-	&g_debugflags, 0, "");
+	&g_debugflags, 0, "Set various trace levels for GEOM debugging");
 
 SYSCTL_INT(_kern_geom, OID_AUTO, collectstats, CTLFLAG_RW,
-	&g_collectstats, 0, "");
+	&g_collectstats, 0,
+	"Control statistics collection on GEOM providers and consumers");
 
 SYSCTL_INT(_debug_sizeof, OID_AUTO, g_class, CTLFLAG_RD,
-	0, sizeof(struct g_class), "");
+	0, sizeof(struct g_class), "sizeof(struct g_class)");
 SYSCTL_INT(_debug_sizeof, OID_AUTO, g_geom, CTLFLAG_RD,
-	0, sizeof(struct g_geom), "");
+	0, sizeof(struct g_geom), "sizeof(struct g_geom)");
 SYSCTL_INT(_debug_sizeof, OID_AUTO, g_provider, CTLFLAG_RD,
-	0, sizeof(struct g_provider), "");
+	0, sizeof(struct g_provider), "sizeof(struct g_provider)");
 SYSCTL_INT(_debug_sizeof, OID_AUTO, g_consumer, CTLFLAG_RD,
-	0, sizeof(struct g_consumer), "");
+	0, sizeof(struct g_consumer), "sizeof(struct g_consumer)");
 SYSCTL_INT(_debug_sizeof, OID_AUTO, g_bioq, CTLFLAG_RD,
-	0, sizeof(struct g_bioq), "");
+	0, sizeof(struct g_bioq), "sizeof(struct g_bioq)");
Index: g_stripe.h
===================================================================
RCS file: /home/cvs/src/sys/geom/stripe/g_stripe.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/stripe/g_stripe.h -L sys/geom/stripe/g_stripe.h -u -r1.1.1.1 -r1.2
--- sys/geom/stripe/g_stripe.h
+++ sys/geom/stripe/g_stripe.h
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/stripe/g_stripe.h,v 1.7.2.1 2005/08/30 15:14:40 pjd Exp $
+ * $FreeBSD: src/sys/geom/stripe/g_stripe.h,v 1.9 2006/02/01 12:06:01 pjd Exp $
  */
 
 #ifndef	_G_STRIPE_H_
Index: g_stripe.c
===================================================================
RCS file: /home/cvs/src/sys/geom/stripe/g_stripe.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/stripe/g_stripe.c -L sys/geom/stripe/g_stripe.c -u -r1.1.1.1 -r1.2
--- sys/geom/stripe/g_stripe.c
+++ sys/geom/stripe/g_stripe.c
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/stripe/g_stripe.c,v 1.25.2.2 2005/08/30 15:14:40 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/stripe/g_stripe.c,v 1.32 2007/06/04 18:25:06 dwmalone Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -42,7 +42,7 @@
 
 
 #define	MAX_IO_SIZE	(DFLTPHYS * 2)
-static MALLOC_DEFINE(M_STRIPE, "stripe data", "GEOM_STRIPE Data");
+static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data");
 
 static uma_zone_t g_stripe_zone;
 
@@ -80,7 +80,7 @@
 	int error, fast;
 
 	fast = g_stripe_fast;
-	error = sysctl_handle_int(oidp, &fast, sizeof(fast), req);
+	error = sysctl_handle_int(oidp, &fast, 0, req);
 	if (error == 0 && req->newptr != NULL)
 		g_stripe_fast = fast;
 	return (error);
@@ -520,6 +520,42 @@
 }
 
 static void
+g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp)
+{
+	struct bio_queue_head queue;
+	struct g_consumer *cp;
+	struct bio *cbp;
+	u_int no;
+
+	bioq_init(&queue);
+	for (no = 0; no < sc->sc_ndisks; no++) {
+		cbp = g_clone_bio(bp);
+		if (cbp == NULL) {
+			for (cbp = bioq_first(&queue); cbp != NULL;
+			    cbp = bioq_first(&queue)) {
+				bioq_remove(&queue, cbp);
+				g_destroy_bio(cbp);
+			}
+			if (bp->bio_error == 0)
+				bp->bio_error = ENOMEM;
+			g_io_deliver(bp, bp->bio_error);
+			return;
+		}
+		bioq_insert_tail(&queue, cbp);
+		cbp->bio_done = g_std_done;
+		cbp->bio_caller1 = sc->sc_disks[no];
+		cbp->bio_to = sc->sc_disks[no]->provider;
+	}
+	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
+		bioq_remove(&queue, cbp);
+		G_STRIPE_LOGREQ(cbp, "Sending request.");
+		cp = cbp->bio_caller1;
+		cbp->bio_caller1 = NULL;
+		g_io_request(cbp, cp);
+	}
+}
+
+static void
 g_stripe_start(struct bio *bp)
 {
 	off_t offset, start, length, nstripe;
@@ -542,10 +578,10 @@
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
-		/*
-		 * Only those requests are supported.
-		 */
 		break;
+	case BIO_FLUSH:
+		g_stripe_flush(sc, bp);
+		return;
 	case BIO_GETATTR:
 		/* To which provider it should be delivered? */
 	default:
@@ -846,7 +882,7 @@
 	free(sc->sc_disks, M_STRIPE);
 	free(sc, M_STRIPE);
 
-	pp = LIST_FIRST(&gp->provider); 
+	pp = LIST_FIRST(&gp->provider);
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
 
Index: g_bde_lock.c
===================================================================
RCS file: /home/cvs/src/sys/geom/bde/g_bde_lock.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/bde/g_bde_lock.c -L sys/geom/bde/g_bde_lock.c -u -r1.1.1.2 -r1.2
--- sys/geom/bde/g_bde_lock.c
+++ sys/geom/bde/g_bde_lock.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/bde/g_bde_lock.c,v 1.16.2.1 2005/12/29 05:34:46 sobomax Exp $
+ * $FreeBSD: src/sys/geom/bde/g_bde_lock.c,v 1.17 2005/11/30 19:07:28 sobomax Exp $
  */
 /* This souce file contains routines which operates on the lock sectors, both
  * for the kernel and the userland program gbde(1).
Index: g_bde_work.c
===================================================================
RCS file: /home/cvs/src/sys/geom/bde/g_bde_work.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/bde/g_bde_work.c -L sys/geom/bde/g_bde_work.c -u -r1.2 -r1.3
--- sys/geom/bde/g_bde_work.c
+++ sys/geom/bde/g_bde_work.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/bde/g_bde_work.c,v 1.26 2005/03/11 15:42:51 ume Exp $
+ * $FreeBSD: src/sys/geom/bde/g_bde_work.c,v 1.28 2006/08/04 07:56:34 yar Exp $
  */
 /*
  * This source file contains the state-engine which makes things happen in the
@@ -90,7 +90,7 @@
 static u_int g_bde_nwork;
 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
 
-static MALLOC_DEFINE(M_GBDE, "GBDE", "GBDE data structures");
+static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures");
 
 static struct g_bde_work *
 g_bde_new_work(struct g_bde_softc *sc)
Index: g_bde_crypt.c
===================================================================
RCS file: /home/cvs/src/sys/geom/bde/g_bde_crypt.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/bde/g_bde_crypt.c -L sys/geom/bde/g_bde_crypt.c -u -r1.1.1.1 -r1.2
--- sys/geom/bde/g_bde_crypt.c
+++ sys/geom/bde/g_bde_crypt.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/bde/g_bde_crypt.c,v 1.22.2.1 2005/10/09 03:30:30 delphij Exp $
+ * $FreeBSD: src/sys/geom/bde/g_bde_crypt.c,v 1.23 2005/07/20 18:08:16 phk Exp $
  */
 /* This source file contains the functions responsible for the crypto, keying
  * and mapping operations on the I/O requests.
Index: geom_vinum_share.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_share.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_share.c -L sys/geom/vinum/geom_vinum_share.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_share.c
+++ sys/geom/vinum/geom_vinum_share.c
@@ -41,7 +41,7 @@
 /* This file is shared between kernel and userland. */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_share.c,v 1.4 2004/11/15 12:30:59 le Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_share.c,v 1.5 2007/04/12 17:40:44 le Exp $");
 
 #include <sys/param.h>
 #ifdef _KERNEL
@@ -403,12 +403,9 @@
 				break;
 			}
 			ptr = token[j];
-			if (*ptr == '/') {
-				ptr++;
-				while (*ptr != '/')
-					ptr++;
-				ptr++;
-			}
+
+			if (strncmp(ptr, "/dev/", 5) == 0)
+				ptr += 5;
 			strncpy(d->device, ptr, GV_MAXDRIVENAME);
 		} else {
 			/* We assume this is the drive name. */
Index: geom_vinum_plex.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_plex.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_plex.c -L sys/geom/vinum/geom_vinum_plex.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_plex.c
+++ sys/geom/vinum/geom_vinum_plex.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_plex.c,v 1.15.2.1 2005/08/19 08:48:04 le Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_plex.c,v 1.17 2006/01/06 18:03:17 le Exp $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
@@ -88,14 +88,11 @@
 gv_plex_done(struct bio *bp)
 {
 	struct gv_plex *p;
-	struct gv_bioq *bq;
 
 	p = bp->bio_from->geom->softc;
 	bp->bio_cflags |= GV_BIO_DONE;
-	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
-	bq->bp = bp;
 	mtx_lock(&p->bqueue_mtx);
-	TAILQ_INSERT_TAIL(&p->bqueue, bq, queue);
+	bioq_insert_tail(p->bqueue, bp);
 	wakeup(p);
 	mtx_unlock(&p->bqueue_mtx);
 }
@@ -236,7 +233,6 @@
 gv_plex_start(struct bio *bp)
 {
 	struct gv_plex *p;
-	struct gv_bioq *bq;
 
 	switch(bp->bio_cmd) {
 	case BIO_READ:
@@ -260,10 +256,8 @@
 		return;
 	}
 
-	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
-	bq->bp = bp;
 	mtx_lock(&p->bqueue_mtx);
-	TAILQ_INSERT_TAIL(&p->bqueue, bq, queue);
+	bioq_disksort(p->bqueue, bp);
 	wakeup(p);
 	mtx_unlock(&p->bqueue_mtx);
 }
@@ -274,7 +268,6 @@
 	struct bio *bp;
 	struct gv_plex *p;
 	struct gv_sd *s;
-	struct gv_bioq *bq;
 
 	p = arg;
 	KASSERT(p != NULL, ("NULL p"));
@@ -286,20 +279,15 @@
 			break;
 
 		/* Take the first BIO from our queue. */
-		bq = TAILQ_FIRST(&p->bqueue);
-		if (bq == NULL) {
+		bp = bioq_takefirst(p->bqueue);
+		if (bp == NULL) {
 			msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
 			continue;
 		}
-		TAILQ_REMOVE(&p->bqueue, bq, queue);
 		mtx_unlock(&p->bqueue_mtx);
 
-		bp = bq->bp;
-
 		/* A completed request. */
 		if (bp->bio_cflags & GV_BIO_DONE) {
-			g_free(bq);
-
 			if (bp->bio_cflags & GV_BIO_SYNCREQ ||
 			    bp->bio_cflags & GV_BIO_REBUILD) {
 				s = bp->bio_to->private;
@@ -327,19 +315,16 @@
 			if (gv_stripe_active(p, bp)) {
 				/* Park the bio on the waiting queue. */
 				mtx_lock(&p->bqueue_mtx);
-				TAILQ_INSERT_TAIL(&p->wqueue, bq, queue);
+				bioq_disksort(p->wqueue, bp);
 				mtx_unlock(&p->bqueue_mtx);
 			} else {
-				g_free(bq);
 				bp->bio_cflags &= ~GV_BIO_ONHOLD;
 				g_io_request(bp, bp->bio_caller2);
 			}
 
 		/* A normal request to this plex. */
-		} else {
-			g_free(bq);
+		} else
 			gv_plex_normal_request(p, bp);
-		}
 
 		mtx_lock(&p->bqueue_mtx);
 	}
@@ -380,7 +365,7 @@
 static int
 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
 {
-	struct bio *cbp, *pbp;
+	struct bio *pbp;
 	int err, finished, i;
 
 	err = 0;
@@ -393,12 +378,12 @@
 		finished = 0;
 
 	} else if (wp->parity != NULL) {
-		cbp = wp->parity;
+		pbp = wp->parity;
 		wp->parity = NULL;
 
 		/* Check if the parity is correct. */
 		for (i = 0; i < wp->length; i++) {
-			if (bp->bio_data[i] != cbp->bio_data[i]) {
+			if (bp->bio_data[i] != pbp->bio_data[i]) {
 				err = 1;
 				break;
 			}
@@ -410,7 +395,7 @@
 
 			/* ... but we rebuild it. */
 			if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
-				g_io_request(cbp, cbp->bio_caller2);
+				g_io_request(pbp, pbp->bio_caller2);
 				finished = 0;
 			}
 		}
@@ -421,7 +406,7 @@
 		 */
 		if (finished) {
 			bp->bio_parent->bio_inbed++;
-			g_destroy_bio(cbp);
+			g_destroy_bio(pbp);
 		}
 
 	}
@@ -459,7 +444,11 @@
 				TAILQ_REMOVE(&p->packets, wp, list);
 				/* Bring the waiting bios back into the game. */
 				mtx_lock(&p->bqueue_mtx);
-				TAILQ_CONCAT(&p->bqueue, &p->wqueue, queue);
+				pbp = bioq_takefirst(p->wqueue);
+				while (pbp != NULL) {
+					bioq_disksort(p->bqueue, pbp);
+					pbp = bioq_takefirst(p->wqueue);
+				}
 				mtx_unlock(&p->bqueue_mtx);
 			}
 			g_free(wp);
@@ -499,7 +488,11 @@
 				TAILQ_REMOVE(&p->packets, wp, list);
 				/* Bring the waiting bios back into the game. */
 				mtx_lock(&p->bqueue_mtx);
-				TAILQ_CONCAT(&p->bqueue, &p->wqueue, queue);
+				pbp = bioq_takefirst(p->wqueue);
+				while (pbp != NULL) {
+					bioq_disksort(p->bqueue, pbp);
+					pbp = bioq_takefirst(p->wqueue);
+				}
 				mtx_unlock(&p->bqueue_mtx);
 				g_free(wp);
 			}
@@ -662,10 +655,8 @@
 		    gv_stripe_active(p, pbp)) {
 			/* Park the bio on the waiting queue. */
 			pbp->bio_cflags |= GV_BIO_ONHOLD;
-			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
-			bq->bp = pbp;
 			mtx_lock(&p->bqueue_mtx);
-			TAILQ_INSERT_TAIL(&p->wqueue, bq, queue);
+			bioq_disksort(p->wqueue, pbp);
 			mtx_unlock(&p->bqueue_mtx);
 		} else
 			g_io_request(pbp, pbp->bio_caller2);
@@ -776,8 +767,19 @@
 			gv_update_vol_size(p->vol_sc, p->size);
 
 		/*
-		 * If necessary, create a bio queue mutex and a worker thread.
+		 * If necessary, create bio queues, queue mutex and a worker
+		 * thread.
 		 */
+		if (p->bqueue == NULL) {
+			p->bqueue = g_malloc(sizeof(struct bio_queue_head),
+			    M_WAITOK | M_ZERO);
+			bioq_init(p->bqueue);
+		}
+		if (p->wqueue == NULL) {
+			p->wqueue = g_malloc(sizeof(struct bio_queue_head),
+			    M_WAITOK | M_ZERO);
+			bioq_init(p->wqueue);
+		}
 		if (mtx_initialized(&p->bqueue_mtx) == 0)
 			mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
 		if (!(p->flags & GV_PLEX_THREAD_ACTIVE)) {
@@ -798,8 +800,12 @@
 		p->geom = gp;
 
 		TAILQ_INIT(&p->packets);
-		TAILQ_INIT(&p->bqueue);
-		TAILQ_INIT(&p->wqueue);
+		p->bqueue = g_malloc(sizeof(struct bio_queue_head),
+		    M_WAITOK | M_ZERO);
+		bioq_init(p->bqueue);
+		p->wqueue = g_malloc(sizeof(struct bio_queue_head),
+		    M_WAITOK | M_ZERO);
+		bioq_init(p->wqueue);
 		mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
 		kthread_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
 		    p->name);
Index: geom_vinum_rename.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_rename.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_rename.c -L sys/geom/vinum/geom_vinum_rename.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_rename.c
+++ sys/geom/vinum/geom_vinum_rename.c
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_rename.c,v 1.3.2.1 2005/11/26 11:06:11 le Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_rename.c,v 1.3 2005/11/20 12:14:18 le Exp $");
 
 #include <sys/param.h>
 #include <sys/libkern.h>
Index: geom_vinum_var.h
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_var.h -L sys/geom/vinum/geom_vinum_var.h -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_var.h
+++ sys/geom/vinum/geom_vinum_var.h
@@ -35,7 +35,7 @@
  * otherwise) arising in any way out of the use of this software, even if
  * advised of the possibility of such damage.
  *  
- * $FreeBSD: src/sys/geom/vinum/geom_vinum_var.h,v 1.8.2.1 2005/08/19 08:48:04 le Exp $
+ * $FreeBSD: src/sys/geom/vinum/geom_vinum_var.h,v 1.11 2006/01/06 18:03:17 le Exp $
  */
 
 #ifndef	_GEOM_VINUM_VAR_H_
@@ -198,7 +198,11 @@
 	LIST_HEAD(,gv_sd)	subdisks;	/* Subdisks on this drive. */
 	LIST_ENTRY(gv_drive)	drive;		/* Entry in the vinum config. */
 
-	TAILQ_HEAD(,gv_bioq)	bqueue;		/* BIO queue of this drive. */
+#ifdef _KERNEL
+	struct bio_queue_head	*bqueue;	/* BIO queue of this drive. */
+#else
+	char			*padding;
+#endif
 	struct mtx		bqueue_mtx;	/* Mtx. to protect the queue. */
 
 	struct g_geom	*geom;			/* The geom of this drive. */
@@ -277,8 +281,12 @@
 	off_t	synced;			/* Count of synced bytes. */
 
 	struct mtx		bqueue_mtx; /* Lock for the BIO queue. */
-	TAILQ_HEAD(,gv_bioq)	bqueue;	/* BIO queue. */
-	TAILQ_HEAD(,gv_bioq)	wqueue;	/* Waiting BIO queue. */
+#ifdef _KERNEL
+	struct bio_queue_head	*bqueue; /* BIO queue. */
+	struct bio_queue_head	*wqueue; /* Waiting BIO queue. */
+#else
+	char			*bpad, *wpad;
+#endif
 	TAILQ_HEAD(,gv_raid5_packet)	packets; /* RAID5 sub-requests. */
 
 	LIST_HEAD(,gv_sd)   subdisks;	/* List of attached subdisks. */
@@ -307,7 +315,11 @@
 #define	GV_VOL_THREAD_DEAD	0x04	/* The thread has died. */
 
 	struct mtx		bqueue_mtx; /* Lock for the BIO queue. */
-	TAILQ_HEAD(,gv_bioq)	bqueue;	/* BIO queue. */
+#ifdef _KERNEL
+	struct bio_queue_head	*bqueue; /* BIO queue. */
+#else
+	char			*padding;
+#endif
 
 	LIST_HEAD(,gv_plex)   plexes;	/* List of attached plexes. */
 	LIST_ENTRY(gv_volume) volume;	/* Entry in vinum config. */
Index: geom_vinum.h
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum.h -L sys/geom/vinum/geom_vinum.h -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum.h
+++ sys/geom/vinum/geom_vinum.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/vinum/geom_vinum.h,v 1.9.2.1 2005/11/26 11:06:11 le Exp $
+ * $FreeBSD: src/sys/geom/vinum/geom_vinum.h,v 1.13 2007/04/12 17:54:35 le Exp $
  */
 
 #ifndef	_GEOM_VINUM_H_
@@ -57,6 +57,7 @@
 
 /* geom_vinum_rm.c */
 void	gv_remove(struct g_geom *, struct gctl_req *);
+int	gv_resetconfig(struct g_geom *, struct gctl_req *);
 int	gv_rm_sd(struct gv_softc *sc, struct gctl_req *req,
 	    struct gv_sd *s, int flags);
 
@@ -71,6 +72,7 @@
 
 /* geom_vinum_subr.c */
 void	gv_adjust_freespace(struct gv_sd *, off_t);
+void	gv_free_sd(struct gv_sd *);
 struct g_geom	*find_vinum_geom(void);
 struct gv_drive	*gv_find_drive(struct gv_softc *, char *);
 struct gv_plex	*gv_find_plex(struct gv_softc *, char *);
@@ -89,5 +91,7 @@
 int	gv_sd_to_plex(struct gv_plex *, struct gv_sd *, int);
 void	gv_update_plex_config(struct gv_plex *);
 void	gv_update_vol_size(struct gv_volume *, off_t);
+off_t	gv_vol_size(struct gv_volume *);
+off_t	gv_plex_size(struct gv_plex *);
 
 #endif /* !_GEOM_VINUM_H_ */
Index: geom_vinum_init.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_init.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_init.c -L sys/geom/vinum/geom_vinum_init.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_init.c
+++ sys/geom/vinum/geom_vinum_init.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_init.c,v 1.10.2.1 2005/10/09 04:36:44 delphij Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_init.c,v 1.11 2005/08/28 18:16:31 le Exp $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
Index: geom_vinum_move.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_move.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_move.c -L sys/geom/vinum/geom_vinum_move.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_move.c
+++ sys/geom/vinum/geom_vinum_move.c
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_move.c,v 1.2.2.1 2005/11/26 11:06:11 le Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_move.c,v 1.3 2006/02/08 21:32:45 le Exp $");
 
 #include <sys/param.h>
 #include <sys/libkern.h>
@@ -113,7 +113,7 @@
 
 	cp = cursd->consumer;
 
-	if (cp->acr || cp->acw || cp->ace) {
+	if (cp != NULL && (cp->acr || cp->acw || cp->ace)) {
 		gctl_error(req, "subdisk '%s' is busy", cursd->name);
 		return (-1);
 	}
@@ -178,7 +178,8 @@
 	}
 
 	/* Replace the old sd by the new one. */
-	g_detach(cp);
+	if (cp != NULL)
+		g_detach(cp);
 	LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2) {
 		if (s == cursd) {
 			p->sdcount--;
@@ -196,13 +197,15 @@
 	gv_drive_modify(d);
 
 	/* And reconnect the consumer ... */
-	newsd->consumer = cp;
-	err = g_attach(cp, newsd->provider);
-	if (err) {
-		g_destroy_consumer(cp);
-		gctl_error(req, "proposed move would create a loop in GEOM "
-		    "config");
-		return (err);
+	if (cp != NULL) {
+		newsd->consumer = cp;
+		err = g_attach(cp, newsd->provider);
+		if (err) {
+			g_destroy_consumer(cp);
+			gctl_error(req, "proposed move would create a loop "
+			    "in GEOM config");
+			return (err);
+		}
 	}
 
 	LIST_INSERT_HEAD(&sc->subdisks, newsd, sd);
Index: geom_vinum_state.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_state.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_state.c -L sys/geom/vinum/geom_vinum_state.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_state.c
+++ sys/geom/vinum/geom_vinum_state.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_state.c,v 1.7 2005/01/21 18:27:23 le Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_state.c,v 1.8 2006/03/30 14:01:25 le Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -340,6 +340,12 @@
 
 	KASSERT(v != NULL, ("gv_update_vol_state: NULL v"));
 
+	/* The volume can't be up without plexes. */
+	if (v->plexcount == 0) {
+		v->state = GV_VOL_DOWN;
+		return;
+	}
+
 	LIST_FOREACH(p, &v->plexes, in_volume) {
 		/* One of our plexes is accessible, and so are we. */
 		if (p->state > GV_PLEX_DEGRADED) {
Index: geom_vinum_subr.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_subr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_subr.c -L sys/geom/vinum/geom_vinum_subr.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_subr.c
+++ sys/geom/vinum/geom_vinum_subr.c
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_subr.c,v 1.13 2005/01/19 13:57:09 le Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_subr.c,v 1.16 2007/04/12 17:54:35 le Exp $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -54,6 +54,8 @@
 #include <geom/vinum/geom_vinum.h>
 #include <geom/vinum/geom_vinum_share.h>
 
+static off_t gv_plex_smallest_sd(struct gv_plex *, off_t);
+
 /* Find the VINUM class and it's associated geom. */
 struct g_geom *
 find_vinum_geom(void)
@@ -235,6 +237,20 @@
 	return;
 }
 
+static off_t
+gv_plex_smallest_sd(struct gv_plex *p, off_t smallest)
+{
+	struct gv_sd *s;
+
+	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
+
+	LIST_FOREACH(s, &p->subdisks, in_plex) {
+		if (s->size < smallest)
+			smallest = s->size;
+	}
+	return (smallest);
+}
+
 int
 gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
 {
@@ -246,6 +262,15 @@
 	if (s->plex_sc == p)
 		return (0);
 
+	/* Check correct size of this subdisk. */
+	s2 = LIST_FIRST(&p->subdisks);
+	if (s2 != NULL && gv_is_striped(p) && (s2->size != s->size)) {
+		printf("GEOM_VINUM: need equal sized subdisks for "
+		    "this plex organisation - %s (%jd) <-> %s (%jd)\n",
+		    s2->name, s2->size, s->name, s->size);
+		return (-1);
+	}
+
 	/* Find the correct plex offset for this subdisk, if needed. */
 	if (s->plex_offset == -1) {
 		if (p->sdcount) {
@@ -271,7 +296,7 @@
 		break;
 
 	case GV_PLEX_RAID5:
-		p->size = (p->sdcount - 1) * s->size;
+		p->size = (p->sdcount - 1) * gv_plex_smallest_sd(p, s->size);
 		break;
 
 	default:
@@ -320,6 +345,60 @@
 	v->size = size;
 }
 
+/* Calculates the plex size. */
+off_t
+gv_plex_size(struct gv_plex *p)
+{
+	struct gv_sd *s;
+	off_t size;
+
+	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
+
+	if (p->sdcount == 0)
+		return (0);
+
+	/* Adjust the size of our plex. */
+	size = 0;
+	switch (p->org) {
+	case GV_PLEX_CONCAT:
+		LIST_FOREACH(s, &p->subdisks, in_plex)
+			size += s->size;
+		break;
+	case GV_PLEX_STRIPED:
+		s = LIST_FIRST(&p->subdisks);
+		size = p->sdcount * s->size;
+		break;
+	case GV_PLEX_RAID5:
+		s = LIST_FIRST(&p->subdisks);
+		size = (p->sdcount - 1) * s->size;
+		break;
+	}
+
+	return (size);
+}
+
+/* Returns the size of a volume. */
+off_t
+gv_vol_size(struct gv_volume *v)
+{
+	struct gv_plex *p;
+	off_t minplexsize;
+
+	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
+
+	p = LIST_FIRST(&v->plexes);
+	if (p == NULL)
+		return (0);
+
+	minplexsize = p->size;
+	LIST_FOREACH(p, &v->plexes, plex) {
+		if (p->size < minplexsize) {
+			minplexsize = p->size;
+		}
+	}
+	return (minplexsize);
+}
+
 void
 gv_update_plex_config(struct gv_plex *p)
 {
@@ -613,6 +692,64 @@
 }
 
 void
+gv_free_sd(struct gv_sd *s)
+{
+	struct gv_drive *d;
+	struct gv_freelist *fl, *fl2;
+
+	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
+
+	d = s->drive_sc;
+	if (d == NULL)
+		return;
+
+	/*
+	 * First, find the free slot that's immediately before or after this
+	 * subdisk.
+	 */
+	fl = NULL;
+	LIST_FOREACH(fl, &d->freelist, freelist) {
+		if (fl->offset == s->drive_offset + s->size)
+			break;
+		if (fl->offset + fl->size == s->drive_offset)
+			break;
+	}
+
+	/* If there is no free slot behind this subdisk, so create one. */
+	if (fl == NULL) {
+
+		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
+		fl->size = s->size;
+		fl->offset = s->drive_offset;
+
+		if (d->freelist_entries == 0) {
+			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
+		} else {
+			LIST_FOREACH(fl2, &d->freelist, freelist) {
+				if (fl->offset < fl2->offset) {
+					LIST_INSERT_BEFORE(fl2, fl, freelist);
+					break;
+				} else if (LIST_NEXT(fl2, freelist) == NULL) {
+					LIST_INSERT_AFTER(fl2, fl, freelist);
+					break;
+				}
+			}
+		}
+
+		d->freelist_entries++;
+
+	/* Expand the free slot we just found. */
+	} else {
+		fl->size += s->size;
+		if (fl->offset > s->drive_offset)
+			fl->offset = s->drive_offset;
+	}
+
+	d->avail += s->size;
+	d->sdcount--;
+}
+
+void
 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
 {
 	struct gv_drive *d;
@@ -792,6 +929,8 @@
 		d->flags &= ~GV_DRIVE_THREAD_ACTIVE;
 		d->flags &= ~GV_DRIVE_THREAD_DIE;
 		d->flags &= ~GV_DRIVE_THREAD_DEAD;
+		g_free(d->bqueue);
+		d->bqueue = NULL;
 		mtx_destroy(&d->bqueue_mtx);
 	}
 }
@@ -807,6 +946,10 @@
 		p->flags &= ~GV_PLEX_THREAD_ACTIVE;
 		p->flags &= ~GV_PLEX_THREAD_DIE;
 		p->flags &= ~GV_PLEX_THREAD_DEAD;
+		g_free(p->bqueue);
+		g_free(p->wqueue);
+		p->bqueue = NULL;
+		p->wqueue = NULL;
 		mtx_destroy(&p->bqueue_mtx);
 	}
 }
@@ -822,6 +965,8 @@
 		v->flags &= ~GV_VOL_THREAD_ACTIVE;
 		v->flags &= ~GV_VOL_THREAD_DIE;
 		v->flags &= ~GV_VOL_THREAD_DEAD;
+		g_free(v->bqueue);
+		v->bqueue = NULL;
 		mtx_destroy(&v->bqueue_mtx);
 	}
 }
Index: geom_vinum_volume.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_volume.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_volume.c -L sys/geom/vinum/geom_vinum_volume.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_volume.c
+++ sys/geom/vinum/geom_vinum_volume.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_volume.c,v 1.8.2.2 2005/10/09 04:35:42 delphij Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_volume.c,v 1.11 2006/01/06 18:03:17 le Exp $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
@@ -79,14 +79,11 @@
 gv_volume_done(struct bio *bp)
 {
 	struct gv_volume *v;
-	struct gv_bioq *bq;
 
 	v = bp->bio_from->geom->softc;
 	bp->bio_cflags |= GV_BIO_DONE;
-	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
-	bq->bp = bp;
 	mtx_lock(&v->bqueue_mtx);
-	TAILQ_INSERT_TAIL(&v->bqueue, bq, queue);
+	bioq_insert_tail(v->bqueue, bp);
 	wakeup(v);
 	mtx_unlock(&v->bqueue_mtx);
 }
@@ -95,7 +92,6 @@
 gv_volume_start(struct bio *bp)
 {
 	struct gv_volume *v;
-	struct gv_bioq *bq;
 
 	switch(bp->bio_cmd) {
 	case BIO_READ:
@@ -114,10 +110,8 @@
 		return;
 	}
 
-	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
-	bq->bp = bp;
 	mtx_lock(&v->bqueue_mtx);
-	TAILQ_INSERT_TAIL(&v->bqueue, bq, queue);
+	bioq_disksort(v->bqueue, bp);
 	wakeup(v);
 	mtx_unlock(&v->bqueue_mtx);
 }
@@ -127,7 +121,6 @@
 {
 	struct bio *bp;
 	struct gv_volume *v;
-	struct gv_bioq *bq;
 
 	v = arg;
 	KASSERT(v != NULL, ("NULL v"));
@@ -138,17 +131,13 @@
 			break;
 
 		/* Take the first BIO from our queue. */
-		bq = TAILQ_FIRST(&v->bqueue);
-		if (bq == NULL) {
+		bp = bioq_takefirst(v->bqueue);
+		if (bp == NULL) {
 			msleep(v, &v->bqueue_mtx, PRIBIO, "-", hz/10);
 			continue;
 		}
-		TAILQ_REMOVE(&v->bqueue, bq, queue);
 		mtx_unlock(&v->bqueue_mtx);
 
-		bp = bq->bp;
-		g_free(bq);
-
 		if (bp->bio_cflags & GV_BIO_DONE)
 			gv_vol_completed_request(v, bp);
 		else
@@ -169,7 +158,6 @@
 	struct bio *pbp;
 	struct g_geom *gp;
 	struct g_consumer *cp, *cp2;
-	struct gv_bioq *bq;
 
 	pbp = bp->bio_parent;
 
@@ -196,10 +184,8 @@
 
 		g_destroy_bio(bp);
 		pbp->bio_children--;
-		bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
-		bq->bp = pbp;
 		mtx_lock(&v->bqueue_mtx);
-		TAILQ_INSERT_TAIL(&v->bqueue, bq, queue);
+		bioq_disksort(v->bqueue, pbp);
 		mtx_unlock(&v->bqueue_mtx);
 		return;
 
@@ -370,11 +356,15 @@
 		gp->access = gv_volume_access;
 		gp->softc = v;
 		first++;
-		TAILQ_INIT(&v->bqueue);
 	} else
 		gp = v->geom;
 
-	/* Create bio queue mutex and worker thread, if necessary. */
+	/* Create bio queue, queue mutex, and worker thread, if necessary. */
+	if (v->bqueue == NULL) {
+		v->bqueue = g_malloc(sizeof(struct bio_queue_head),
+		    M_WAITOK | M_ZERO);
+		bioq_init(v->bqueue);
+	}
 	if (mtx_initialized(&v->bqueue_mtx) == 0)
 		mtx_init(&v->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
 
Index: geom_vinum_rm.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_rm.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_rm.c -L sys/geom/vinum/geom_vinum_rm.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_rm.c
+++ sys/geom/vinum/geom_vinum_rm.c
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_rm.c,v 1.6.2.3 2005/11/26 11:06:11 le Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_rm.c,v 1.13 2007/04/12 17:54:35 le Exp $");
 
 #include <sys/param.h>
 #include <sys/libkern.h>
@@ -38,7 +38,6 @@
 #include <geom/vinum/geom_vinum.h>
 #include <geom/vinum/geom_vinum_share.h>
 
-static void	gv_free_sd(struct gv_sd *);
 static int	gv_rm_drive(struct gv_softc *, struct gctl_req *,
 		    struct gv_drive *, int);
 static int	gv_rm_plex(struct gv_softc *, struct gctl_req *,
@@ -125,6 +124,45 @@
 	gv_save_config_all(sc);
 }
 
+/* Resets configuration */
+int
+gv_resetconfig(struct g_geom *gp, struct gctl_req *req)
+{
+	struct gv_softc *sc;
+	struct gv_drive *d, *d2;
+	struct gv_volume *v, *v2;
+	struct gv_plex *p, *p2;
+	struct gv_sd *s, *s2;
+	int flags;
+
+	d = NULL;
+	d2 = NULL;
+	p = NULL;
+	p2 = NULL;
+	s = NULL;
+	s2 = NULL;
+	flags = GV_FLAG_R;
+	sc = gp->softc;
+	/* First loop through to make sure no volumes are up */
+        LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
+		if (gv_is_open(v->geom)) {
+			gctl_error(req, "volume '%s' is busy", v->name);
+			return (-1);
+		}
+	}
+	/* Then if not, we remove everything. */
+	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2)
+		gv_rm_vol(sc, req, v, flags);
+	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2)
+		gv_rm_plex(sc, req, p, flags);
+	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2)
+		gv_rm_sd(sc, req, s, flags);
+	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2)
+		gv_rm_drive(sc, req, d, flags);
+	gv_save_config_all(sc);
+	return (0);
+}
+
 /* Remove a volume. */
 static int
 gv_rm_vol(struct gv_softc *sc, struct gctl_req *req, struct gv_volume *v, int flags)
@@ -178,6 +216,7 @@
 gv_rm_plex(struct gv_softc *sc, struct gctl_req *req, struct gv_plex *p, int flags)
 {
 	struct g_geom *gp;
+	struct gv_volume *v;
 	struct gv_sd *s, *s2;
 	int err;
 
@@ -207,7 +246,6 @@
 
 	/* Remove the subdisks our plex has. */
 	LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2) {
-		p->sdcount--;
 #if 0
 		LIST_REMOVE(s, in_plex);
 		s->plex_sc = NULL;
@@ -218,12 +256,15 @@
 			return (err);
 	}
 
+	v = p->vol_sc;
 	/* Clean up and let our geom fade away. */
 	LIST_REMOVE(p, plex);
 	if (p->vol_sc != NULL) {
 		p->vol_sc->plexcount--;
 		LIST_REMOVE(p, in_volume);
 		p->vol_sc = NULL;
+		/* Correctly update the volume size. */
+		gv_update_vol_size(v, gv_vol_size(v));
 	}
 
 	gv_kill_plex_thread(p);
@@ -242,14 +283,28 @@
 gv_rm_sd(struct gv_softc *sc, struct gctl_req *req, struct gv_sd *s, int flags)
 {
 	struct g_provider *pp;
+	struct gv_plex *p;
+	struct gv_volume *v;
 
 	KASSERT(s != NULL, ("gv_rm_sd: NULL s"));
 
 	pp = s->provider;
+	p = s->plex_sc;
+	v = NULL;
 
 	/* Clean up. */
-	if (s->plex_sc)
+	if (p != NULL) {
 		LIST_REMOVE(s, in_plex);
+
+		p->sdcount--;
+		/* Update the plexsize. */
+		p->size = gv_plex_size(p);
+		v = p->vol_sc;
+		if (v != NULL) {
+			/* Update the size of our plex' volume. */
+			gv_update_vol_size(v, gv_vol_size(v));
+		}
+	}
 	if (s->drive_sc)
 		LIST_REMOVE(s, from_drive);
 	LIST_REMOVE(s, sd);
@@ -342,60 +397,3 @@
 
 	return (err);
 }
-
-static void
-gv_free_sd(struct gv_sd *s)
-{
-	struct gv_drive *d;
-	struct gv_freelist *fl, *fl2;
-
-	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
-
-	d = s->drive_sc;
-	if (d == NULL)
-		return;
-
-	/*
-	 * First, find the free slot that's immediately before or after this
-	 * subdisk.
-	 */
-	fl = NULL;
-	LIST_FOREACH(fl, &d->freelist, freelist) {
-		if (fl->offset == s->drive_offset + s->size)
-			break;
-		if (fl->offset + fl->size == s->drive_offset)
-			break;
-	}
-
-	/* If there is no free slot behind this subdisk, so create one. */
-	if (fl == NULL) {
-
-		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
-		fl->size = s->size;
-		fl->offset = s->drive_offset;
-
-		if (d->freelist_entries == 0) {
-			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
-		} else {
-			LIST_FOREACH(fl2, &d->freelist, freelist) {
-				if (fl->offset < fl2->offset) {
-					LIST_INSERT_BEFORE(fl2, fl, freelist);
-					break;
-				} else if (LIST_NEXT(fl2, freelist) == NULL) {
-					LIST_INSERT_AFTER(fl2, fl, freelist);
-					break;
-				}
-			}
-		}
-
-		d->freelist_entries++;
-
-	/* Expand the free slot we just found. */
-	} else {
-		fl->size += s->size;
-		if (fl->offset > s->drive_offset)
-			fl->offset = s->drive_offset;
-	}
-
-	d->avail += s->size;
-}
Index: geom_vinum_raid5.h
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_raid5.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_raid5.h -L sys/geom/vinum/geom_vinum_raid5.h -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_raid5.h
+++ sys/geom/vinum/geom_vinum_raid5.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/vinum/geom_vinum_raid5.h,v 1.6 2005/01/06 18:27:30 imp Exp $
+ * $FreeBSD: src/sys/geom/vinum/geom_vinum_raid5.h,v 1.7 2006/08/17 22:50:33 imp Exp $
  */
 
 #ifndef _GEOM_VINUM_RAID5_H_
@@ -45,7 +45,7 @@
 				pbp = pbp->bio_caller1;		\
 			pbp->bio_caller1 = cbp;			\
 		}						\
-	} while (0);
+	} while (0)
 
 struct gv_raid5_packet {
 	caddr_t	data;		/* Data buffer of this sub-request- */
Index: geom_vinum.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum.c -L sys/geom/vinum/geom_vinum.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum.c
+++ sys/geom/vinum/geom_vinum.c
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum.c,v 1.16.2.3 2005/12/10 14:36:17 le Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum.c,v 1.21 2006/03/30 14:01:25 le Exp $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
@@ -237,13 +237,16 @@
 
 		/* Find the volume this plex should be attached to. */
 		v = gv_find_vol(sc, p->volume);
-		if (v != NULL) {
-			if (v->plexcount)
-				p->flags |= GV_PLEX_ADDED;
-			p->vol_sc = v;
-			v->plexcount++;
-			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
+		if (v == NULL) {
+			gctl_error(req, "volume '%s' not found", p->volume);
+			g_free(p);
+			continue;
 		}
+		if (v->plexcount)
+			p->flags |= GV_PLEX_ADDED;
+		p->vol_sc = v;
+		v->plexcount++;
+		LIST_INSERT_HEAD(&v->plexes, p, in_volume);
 
 		p->vinumconf = sc;
 		p->flags |= GV_PLEX_NEWBORN;
@@ -272,7 +275,7 @@
 
 		/* drive not found - XXX */
 		if (d == NULL) {
-			printf("FOO: drive '%s' not found\n", s->drive);
+			gctl_error(req, "drive '%s' not found", s->drive);
 			g_free(s);
 			continue;
 		}
@@ -282,7 +285,7 @@
 
 		/* plex not found - XXX */
 		if (p == NULL) {
-			printf("FOO: plex '%s' not found\n", s->plex);
+			gctl_error(req, "plex '%s' not found\n", s->plex);
 			g_free(s);
 			continue;
 		}
@@ -304,8 +307,34 @@
 		 */
 		error = gv_sd_to_plex(p, s, 1);
 		if (error) {
-			printf("FOO: couldn't give sd '%s' to plex '%s'\n",
-			    s->name, p->name);
+			gctl_error(req, "GEOM_VINUM: couldn't give sd '%s' "
+			    "to plex '%s'\n", s->name, p->name);
+			if (s->drive_sc)
+				LIST_REMOVE(s, from_drive);
+			gv_free_sd(s);
+			g_free(s);
+			/*
+			 * If this subdisk can't be created, we won't create
+			 * the attached plex either, if it is also a new one.
+			 */
+			if (!(p->flags & GV_PLEX_NEWBORN))
+				continue;
+			LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2) {
+				if (s->drive_sc)
+					LIST_REMOVE(s, from_drive);
+				p->sdcount--;
+				LIST_REMOVE(s, in_plex);
+				LIST_REMOVE(s, sd);
+				gv_free_sd(s);
+				g_free(s);
+			}
+			if (p->vol_sc != NULL) {
+				LIST_REMOVE(p, in_volume);
+				p->vol_sc->plexcount--;
+			}
+			LIST_REMOVE(p, plex);
+			g_free(p);
+			continue;
 		}
 		s->flags |= GV_SD_NEWBORN;
 
@@ -396,6 +425,9 @@
 
 	} else if (!strcmp(verb, "rename")) {
 		gv_rename(gp, req);
+	
+	} else if (!strcmp(verb, "resetconfig")) {
+		gv_resetconfig(gp, req);
 
 	} else if (!strcmp(verb, "start")) {
 		gv_start_obj(gp, req);
Index: geom_vinum_drive.c
===================================================================
RCS file: /home/cvs/src/sys/geom/vinum/geom_vinum_drive.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/vinum/geom_vinum_drive.c -L sys/geom/vinum/geom_vinum_drive.c -u -r1.1.1.1 -r1.2
--- sys/geom/vinum/geom_vinum_drive.c
+++ sys/geom/vinum/geom_vinum_drive.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_drive.c,v 1.18.2.4 2005/12/10 14:36:17 le Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/vinum/geom_vinum_drive.c,v 1.25 2006/01/06 18:03:17 le Exp $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
@@ -77,7 +77,8 @@
 	LIST_INSERT_HEAD(&d->freelist, fl, freelist);
 	d->freelist_entries = 1;
 
-	TAILQ_INIT(&d->bqueue);
+	d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
+	bioq_init(d->bqueue);
 	mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
 	kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
 	d->flags |= GV_DRIVE_THREAD_ACTIVE;
@@ -235,15 +236,12 @@
 gv_drive_done(struct bio *bp)
 {
 	struct gv_drive *d;
-	struct gv_bioq *bq;
 
 	/* Put the BIO on the worker queue again. */
 	d = bp->bio_from->geom->softc;
 	bp->bio_cflags |= GV_BIO_DONE;
-	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
-	bq->bp = bp;
 	mtx_lock(&d->bqueue_mtx);
-	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
+	bioq_insert_tail(d->bqueue, bp);
 	wakeup(d);
 	mtx_unlock(&d->bqueue_mtx);
 }
@@ -254,7 +252,6 @@
 {
 	struct gv_drive *d;
 	struct gv_sd *s;
-	struct gv_bioq *bq;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
@@ -279,10 +276,8 @@
 	 * Put the BIO on the worker queue, where the worker thread will pick
 	 * it up.
 	 */
-	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
-	bq->bp = bp;
 	mtx_lock(&d->bqueue_mtx);
-	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
+	bioq_disksort(d->bqueue, bp);
 	wakeup(d);
 	mtx_unlock(&d->bqueue_mtx);
 
@@ -296,7 +291,6 @@
 	struct g_provider *pp;
 	struct gv_drive *d;
 	struct gv_sd *s;
-	struct gv_bioq *bq, *bq2;
 	int error;
 
 	d = arg;
@@ -308,16 +302,13 @@
 			break;
 
 		/* Take the first BIO from out queue. */
-		bq = TAILQ_FIRST(&d->bqueue);
-		if (bq == NULL) {
+		bp = bioq_takefirst(d->bqueue);
+		if (bp == NULL) {
 			msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
 			continue;
  		}
-		TAILQ_REMOVE(&d->bqueue, bq, queue);
 		mtx_unlock(&d->bqueue_mtx);
  
-		bp = bq->bp;
-		g_free(bq);
 		pp = bp->bio_to;
 		gp = pp->geom;
 
@@ -371,11 +362,8 @@
 		mtx_lock(&d->bqueue_mtx);
 	}
 
-	TAILQ_FOREACH_SAFE(bq, &d->bqueue, queue, bq2) {
-		TAILQ_REMOVE(&d->bqueue, bq, queue);
+	while ((bp = bioq_takefirst(d->bqueue)) != NULL) {
 		mtx_unlock(&d->bqueue_mtx);
-		bp = bq->bp;
-		g_free(bq);
 		if (bp->bio_cflags & GV_BIO_DONE) 
 			g_std_done(bp);
 		else
@@ -504,15 +492,19 @@
 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
 			d->freelist_entries = 1;
 
-			TAILQ_INIT(&d->bqueue);
-
 			/* Save it into the main configuration. */
 			LIST_INSERT_HEAD(&sc->drives, d, drive);
 		}
 
 		/*
-		 * Create a bio queue mutex and a worker thread, if necessary.
+		 * Create bio queue, queue mutex and a worker thread, if
+		 * necessary.
 		 */
+		if (d->bqueue == NULL) {
+			d->bqueue = g_malloc(sizeof(struct bio_queue_head),
+			    M_WAITOK | M_ZERO);
+			bioq_init(d->bqueue);
+		}
 		if (mtx_initialized(&d->bqueue_mtx) == 0)
 			mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
 
--- /dev/null
+++ sys/geom/virstor/binstream.h
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 2005 Ivan Voras <ivoras at gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/geom/virstor/binstream.h,v 1.1 2007/09/23 07:34:23 pjd Exp $
+ */
+
+// $Id: binstream.h,v 1.1 2006/07/05 10:47:54 ivoras Exp $
+
+
+#ifndef _BIN_STREAM_
+#define _BIN_STREAM_
+
+#ifndef uint8_t
+#define uint8_t unsigned char
+#endif
+
+typedef struct {
+	unsigned char  *data;
+	int		pos;
+}	bin_stream_t;
+
+
+/* "Open" a binary stream for reading */
+void		bs_open   (bin_stream_t * bs, void *data);
+
+/* "Reset" position in binary stream to zero */
+void		bs_reset  (bin_stream_t * bs);
+
+
+/* Write a zero-terminated string; return next position */
+unsigned	bs_write_str(bin_stream_t * bs, char *data);
+
+/* Write an arbitrary buffer; return next position */
+unsigned	bs_write_buf(bin_stream_t * bs, char *data, unsigned data_size);
+
+/* Write a 8bit uint; return next position. */
+unsigned	bs_write_u8(bin_stream_t * bs, uint8_t data);
+
+/* Write a 16bit uint; return next position. */
+unsigned	bs_write_u16(bin_stream_t * bs, uint16_t data);
+
+/* Write a 32bit uint; return next position. */
+unsigned	bs_write_u32(bin_stream_t * bs, uint32_t data);
+
+/* Write a 64bit uint; return next position. */
+unsigned	bs_write_u64(bin_stream_t * bs, uint64_t data);
+
+
+/*
+ * Read a null-terminated string from stream into a buffer; buf_size is size
+ * of the buffer, including the final \0. Returns buf pointer or NULL if
+ * garbage input.
+ */
+char           *bs_read_str(bin_stream_t * bs, char *buf, unsigned buf_size);
+
+/* Read an arbitrary buffer. */
+void		bs_read_buf(bin_stream_t * bs, char *buf, unsigned buf_size);
+
+/* Read a 8bit uint * return it */
+uint8_t		bs_read_u8(bin_stream_t * bs);
+
+/* Read a 16bit uint * return it */
+uint16_t	bs_read_u16(bin_stream_t * bs);
+
+/* Read a 8bit uint * return it */
+uint32_t	bs_read_u32(bin_stream_t * bs);
+
+/* Read a 8bit uint * return it */
+uint64_t	bs_read_u64(bin_stream_t * bs);
+
+#endif
--- /dev/null
+++ sys/geom/virstor/g_virstor.c
@@ -0,0 +1,1864 @@
+/*-
+ * Copyright (c) 2006-2007 Ivan Voras <ivoras at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Implementation notes:
+ * - "Components" are wrappers around providers that make up the
+ *   virtual storage (i.e. a virstor has "physical" components)
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/geom/virstor/g_virstor.c,v 1.3 2007/09/24 06:14:27 pjd Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/bio.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
+#include <sys/mutex.h>
+#include <vm/uma.h>
+#include <geom/geom.h>
+
+#include <geom/virstor/g_virstor.h>
+#include <geom/virstor/g_virstor_md.h>
+
+/* Declare malloc(9) label */
+static MALLOC_DEFINE(M_GVIRSTOR, "gvirstor", "GEOM_VIRSTOR Data");
+
+/* GEOM class methods */
+static g_init_t g_virstor_init;
+static g_fini_t g_virstor_fini;
+static g_taste_t g_virstor_taste;
+static g_ctl_req_t g_virstor_config;
+static g_ctl_destroy_geom_t g_virstor_destroy_geom;
+
+/* Declare & initialize class structure ("geom class") */
+struct g_class g_virstor_class = {
+	.name =		G_VIRSTOR_CLASS_NAME,
+	.version =	G_VERSION,
+	.init =		g_virstor_init,
+	.fini =		g_virstor_fini,
+	.taste =	g_virstor_taste,
+	.ctlreq =	g_virstor_config,
+	.destroy_geom = g_virstor_destroy_geom
+	/* The .dumpconf and the rest are only usable for a geom instance, so
+	 * they will be set when such instance is created. */
+};
+
+/* Declare sysctl's and loader tunables */
+SYSCTL_DECL(_kern_geom);
+SYSCTL_NODE(_kern_geom, OID_AUTO, virstor, CTLFLAG_RW, 0, "GEOM_GVIRSTOR information");
+
+static u_int g_virstor_debug = 2; /* XXX: lower to 2 when released to public */
+TUNABLE_INT("kern.geom.virstor.debug", &g_virstor_debug);
+SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, debug, CTLFLAG_RW, &g_virstor_debug,
+    0, "Debug level (2=production, 5=normal, 15=excessive)");
+
+static u_int g_virstor_chunk_watermark = 100;
+TUNABLE_INT("kern.geom.virstor.chunk_watermark", &g_virstor_chunk_watermark);
+SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, chunk_watermark, CTLFLAG_RW,
+    &g_virstor_chunk_watermark, 0,
+    "Minimum number of free chunks before issuing administrative warning");
+
+static u_int g_virstor_component_watermark = 1;
+TUNABLE_INT("kern.geom.virstor.component_watermark",
+    &g_virstor_component_watermark);
+SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, component_watermark, CTLFLAG_RW,
+    &g_virstor_component_watermark, 0,
+    "Minimum number of free components before issuing administrative warning");
+
+static int read_metadata(struct g_consumer *, struct g_virstor_metadata *);
+static int write_metadata(struct g_consumer *, struct g_virstor_metadata *);
+static int clear_metadata(struct g_virstor_component *);
+static int add_provider_to_geom(struct g_virstor_softc *, struct g_provider *,
+    struct g_virstor_metadata *);
+static struct g_geom *create_virstor_geom(struct g_class *,
+    struct g_virstor_metadata *);
+static void virstor_check_and_run(struct g_virstor_softc *);
+static u_int virstor_valid_components(struct g_virstor_softc *);
+static int virstor_geom_destroy(struct g_virstor_softc *, boolean_t,
+    boolean_t);
+static void remove_component(struct g_virstor_softc *,
+    struct g_virstor_component *, boolean_t);
+static void bioq_dismantle(struct bio_queue_head *);
+static int allocate_chunk(struct g_virstor_softc *,
+    struct g_virstor_component **, u_int *, u_int *);
+static void delay_destroy_consumer(void *, int);
+static void dump_component(struct g_virstor_component *comp);
+#if 0
+static void dump_me(struct virstor_map_entry *me, unsigned int nr);
+#endif
+
+static void virstor_ctl_stop(struct gctl_req *, struct g_class *);
+static void virstor_ctl_add(struct gctl_req *, struct g_class *);
+static void virstor_ctl_remove(struct gctl_req *, struct g_class *);
+static struct g_virstor_softc * virstor_find_geom(const struct g_class *,
+    const char *);
+static void update_metadata(struct g_virstor_softc *);
+static void fill_metadata(struct g_virstor_softc *, struct g_virstor_metadata *,
+    u_int, u_int);
+
+static void g_virstor_orphan(struct g_consumer *);
+static int g_virstor_access(struct g_provider *, int, int, int);
+static void g_virstor_start(struct bio *);
+static void g_virstor_dumpconf(struct sbuf *, const char *, struct g_geom *,
+    struct g_consumer *, struct g_provider *);
+static void g_virstor_done(struct bio *);
+
+static void invalid_call(void);
+/*
+ * Initialise GEOM class (per-class callback)
+ */
+static void
+g_virstor_init(struct g_class *mp __unused)
+{
+
+	/* Catch map struct size mismatch at compile time; Map entries must
+	 * fit into MAXPHYS exactly, with no wasted space. */
+	CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS);
+
+	/* Init UMA zones, TAILQ's, other global vars */
+}
+
+/*
+ * Finalise GEOM class (per-class callback)
+ */
+static void
+g_virstor_fini(struct g_class *mp __unused)
+{
+
+	/* Deinit UMA zones & global vars */
+}
+
+/*
+ * Config (per-class callback)
+ */
+static void
+g_virstor_config(struct gctl_req *req, struct g_class *cp, char const *verb)
+{
+	uint32_t *version;
+
+	g_topology_assert();
+
+	version = gctl_get_paraml(req, "version", sizeof(*version));
+	if (version == NULL) {
+		gctl_error(req, "Failed to get 'version' argument");
+		return;
+	}
+	if (*version != G_VIRSTOR_VERSION) {
+		gctl_error(req, "Userland and kernel versions out of sync");
+		return;
+	}
+
+	g_topology_unlock();
+	if (strcmp(verb, "add") == 0)
+		virstor_ctl_add(req, cp);
+	else if (strcmp(verb, "stop") == 0 || strcmp(verb, "destroy") == 0)
+		virstor_ctl_stop(req, cp);
+	else if (strcmp(verb, "remove") == 0)
+		virstor_ctl_remove(req, cp);
+	else
+		gctl_error(req, "unknown verb: '%s'", verb);
+	g_topology_lock();
+}
+
+/*
+ * "stop" verb from userland
+ */
+static void
+virstor_ctl_stop(struct gctl_req *req, struct g_class *cp)
+{
+	int *force, *nargs;
+	int i;
+
+	nargs = gctl_get_paraml(req, "nargs", sizeof *nargs);
+	if (nargs == NULL) {
+		gctl_error(req, "Error fetching argument '%s'", "nargs");
+		return;
+	}
+	if (*nargs < 1) {
+		gctl_error(req, "Invalid number of arguments");
+		return;
+	}
+	force = gctl_get_paraml(req, "force", sizeof *force);
+	if (force == NULL) {
+		gctl_error(req, "Error fetching argument '%s'", "force");
+		return;
+	}
+
+	g_topology_lock();
+	for (i = 0; i < *nargs; i++) {
+		char param[8];
+		const char *name;
+		struct g_virstor_softc *sc;
+		int error;
+
+		sprintf(param, "arg%d", i);
+		name = gctl_get_asciiparam(req, param);
+		sc = virstor_find_geom(cp, name);
+		LOG_MSG(LVL_INFO, "Stopping %s by the userland command",
+		    sc->geom->name);
+		update_metadata(sc);
+		if ((error = virstor_geom_destroy(sc, TRUE, TRUE)) != 0) {
+			LOG_MSG(LVL_ERROR, "Cannot destroy %s: %d",
+			    sc->geom->name, error);
+		}
+	}
+	g_topology_unlock();
+}
+
+/*
+ * "add" verb from userland - add new component(s) to the structure.
+ * This will be done all at once in here, without going through the
+ * .taste function for new components.
+ */
+static void
+virstor_ctl_add(struct gctl_req *req, struct g_class *cp)
+{
+	/* Note: while this is going on, I/O is being done on
+	 * the g_up and g_down threads. The idea is to make changes
+	 * to softc members in a way that can atomically activate
+	 * them all at once. */
+	struct g_virstor_softc *sc;
+	int *hardcode, *nargs;
+	const char *geom_name;	/* geom to add a component to */
+	struct g_consumer *fcp;
+	struct g_virstor_bio_q *bq;
+	u_int added;
+	int error;
+	int i;
+
+	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
+	if (nargs == NULL) {
+		gctl_error(req, "Error fetching argument '%s'", "nargs");
+		return;
+	}
+	if (*nargs < 2) {
+		gctl_error(req, "Invalid number of arguments");
+		return;
+	}
+	hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
+	if (hardcode == NULL) {
+		gctl_error(req, "Error fetching argument '%s'", "hardcode");
+		return;
+	}
+
+	/* Find "our" geom */
+	geom_name = gctl_get_asciiparam(req, "arg0");
+	if (geom_name == NULL) {
+		gctl_error(req, "Error fetching argument '%s'", "geom_name (arg0)");
+		return;
+	}
+	sc = virstor_find_geom(cp, geom_name);
+	if (sc == NULL) {
+		gctl_error(req, "Don't know anything about '%s'", geom_name);
+		return;
+	}
+
+	if (virstor_valid_components(sc) != sc->n_components) {
+		LOG_MSG(LVL_ERROR, "Cannot add components to incomplete "
+		    "virstor %s", sc->geom->name);
+		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
+		return;
+	}
+
+	fcp = sc->components[0].gcons;
+	added = 0;
+	g_topology_lock();
+	for (i = 1; i < *nargs; i++) {
+		struct g_virstor_metadata md;
+		char aname[8];
+		const char *prov_name;
+		struct g_provider *pp;
+		struct g_consumer *cp;
+		u_int nc;
+		u_int j;
+
+		snprintf(aname, sizeof aname, "arg%d", i);
+		prov_name = gctl_get_asciiparam(req, aname);
+		if (strncmp(prov_name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
+			prov_name += strlen(_PATH_DEV);
+
+		pp = g_provider_by_name(prov_name);
+		if (pp == NULL) {
+			/* This is the most common error so be verbose about it */
+			if (added != 0) {
+				gctl_error(req, "Invalid provider: '%s' (added"
+				    " %u components)", prov_name, added);
+				update_metadata(sc);
+			} else {
+				gctl_error(req, "Invalid provider: '%s'",
+				    prov_name);
+			}
+			g_topology_unlock();
+			return;
+		}
+		cp = g_new_consumer(sc->geom);
+		if (cp == NULL) {
+			gctl_error(req, "Cannot create consumer");
+			g_topology_unlock();
+			return;
+		}
+		error = g_attach(cp, pp);
+		if (error != 0) {
+			gctl_error(req, "Cannot attach a consumer to %s",
+			    pp->name);
+			g_destroy_consumer(cp);
+			g_topology_unlock();
+			return;
+		}
+		if (fcp->acr != 0 || fcp->acw != 0 || fcp->ace != 0) {
+			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
+			if (error != 0) {
+				gctl_error(req, "Access request failed for %s",
+				    pp->name);
+				g_destroy_consumer(cp);
+				g_topology_unlock();
+				return;
+			}
+		}
+		if (fcp->provider->sectorsize != pp->sectorsize) {
+			gctl_error(req, "Sector size doesn't fit for %s",
+			    pp->name);
+			g_destroy_consumer(cp);
+			g_topology_unlock();
+			return;
+		}
+		for (j = 0; j < sc->n_components; j++) {
+			if (strcmp(sc->components[j].gcons->provider->name,
+			    pp->name) == 0) {
+				gctl_error(req, "Component %s already in %s",
+				    pp->name, sc->geom->name);
+				g_destroy_consumer(cp);
+				g_topology_unlock();
+				return;
+			}
+		}
+		sc->components = realloc(sc->components,
+		    sizeof(*sc->components) * (sc->n_components + 1),
+		    M_GVIRSTOR, M_WAITOK);
+
+		nc = sc->n_components;
+		sc->components[nc].gcons = cp;
+		sc->components[nc].sc = sc;
+		sc->components[nc].index = nc;
+		sc->components[nc].chunk_count = cp->provider->mediasize /
+		    sc->chunk_size;
+		sc->components[nc].chunk_next = 0;
+		sc->components[nc].chunk_reserved = 0;
+
+		if (sc->components[nc].chunk_count < 4) {
+			gctl_error(req, "Provider too small: %s",
+			    cp->provider->name);
+			g_destroy_consumer(cp);
+			g_topology_unlock();
+			return;
+		}
+		fill_metadata(sc, &md, nc, *hardcode);
+		write_metadata(cp, &md);
+		/* The new component becomes visible when n_components is
+		 * incremented */
+		sc->n_components++;
+		added++;
+
+	}
+	/* This call to update_metadata() is critical. In case there's a
+	 * power failure in the middle of it and some components are updated
+	 * while others are not, there will be trouble on next .taste() iff
+	 * a non-updated component is detected first */
+	update_metadata(sc);
+	g_topology_unlock();
+	LOG_MSG(LVL_INFO, "Added %d component(s) to %s", added,
+	    sc->geom->name);
+	/* Fire off BIOs previously queued because there wasn't any
+	 * physical space left. If the BIOs still can't be satisfied
+	 * they will again be added to the end of the queue (during
+	 * which the mutex will be recursed) */
+	bq = malloc(sizeof(*bq), M_GVIRSTOR, M_WAITOK);
+	bq->bio = NULL;
+	mtx_lock(&sc->delayed_bio_q_mtx);
+	/* First, insert a sentinel to the queue end, so we don't
+	 * end up in an infinite loop if there's still no free
+	 * space available. */
+	STAILQ_INSERT_TAIL(&sc->delayed_bio_q, bq, linkage);
+	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
+		bq = STAILQ_FIRST(&sc->delayed_bio_q);
+		if (bq->bio != NULL) {
+			g_virstor_start(bq->bio);
+			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
+			free(bq, M_GVIRSTOR);
+		} else {
+			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
+			free(bq, M_GVIRSTOR);
+			break;
+		}
+	}
+	mtx_unlock(&sc->delayed_bio_q_mtx);
+
+}
+
+/*
+ * Find a geom handled by the class
+ */
+static struct g_virstor_softc *
+virstor_find_geom(const struct g_class *cp, const char *name)
+{
+	struct g_geom *gp;
+
+	LIST_FOREACH(gp, &cp->geom, geom) {
+		if (strcmp(name, gp->name) == 0)
+			return (gp->softc);
+	}
+	return (NULL);
+}
+
+/*
+ * Update metadata on all components to reflect the current state
+ * of these fields:
+ *    - chunk_next
+ *    - flags
+ *    - md_count
+ * Expects things to be set up so write_metadata() can work, i.e.
+ * the topology lock must be held.
+ */
+static void
+update_metadata(struct g_virstor_softc *sc)
+{
+	struct g_virstor_metadata md;
+	int n;
+
+	if (virstor_valid_components(sc) != sc->n_components)
+		return; /* Incomplete device */
+	LOG_MSG(LVL_DEBUG, "Updating metadata on components for %s",
+	    sc->geom->name);
+	/* Update metadata on components */
+	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__,
+	    sc->geom->class->name, sc->geom->name);
+	g_topology_assert();
+	for (n = 0; n < sc->n_components; n++) {
+		read_metadata(sc->components[n].gcons, &md);
+		md.chunk_next = sc->components[n].chunk_next;
+		md.flags = sc->components[n].flags;
+		md.md_count = sc->n_components;
+		write_metadata(sc->components[n].gcons, &md);
+	}
+}
+
+/*
+ * Fills metadata (struct md) from information stored in softc and the nc'th
+ * component of virstor
+ */
+static void
+fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md,
+    u_int nc, u_int hardcode)
+{
+	struct g_virstor_component *c;
+
+	bzero(md, sizeof *md);
+	c = &sc->components[nc];
+
+	strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic);
+	md->md_version = G_VIRSTOR_VERSION;
+	strncpy(md->md_name, sc->geom->name, sizeof md->md_name);
+	md->md_id = sc->id;
+	md->md_virsize = sc->virsize;
+	md->md_chunk_size = sc->chunk_size;
+	md->md_count = sc->n_components;
+
+	if (hardcode) {
+		strncpy(md->provider, c->gcons->provider->name,
+		    sizeof md->provider);
+	}
+	md->no = nc;
+	md->provsize = c->gcons->provider->mediasize;
+	md->chunk_count = c->chunk_count;
+	md->chunk_next = c->chunk_next;
+	md->chunk_reserved = c->chunk_reserved;
+	md->flags = c->flags;
+}
+
+/*
+ * Remove a component from virstor device.
+ * Can only be done if the component is unallocated.
+ */
+static void
+virstor_ctl_remove(struct gctl_req *req, struct g_class *cp)
+{
+	/* As this is executed in parallel to I/O, operations on virstor
+	 * structures must be as atomic as possible. */
+	struct g_virstor_softc *sc;
+	int *nargs;
+	const char *geom_name;
+	u_int removed;
+	int i;
+
+	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
+	if (nargs == NULL) {
+		gctl_error(req, "Error fetching argument '%s'", "nargs");
+		return;
+	}
+	if (*nargs < 2) {
+		gctl_error(req, "Invalid number of arguments");
+		return;
+	}
+	/* Find "our" geom */
+	geom_name = gctl_get_asciiparam(req, "arg0");
+	if (geom_name == NULL) {
+		gctl_error(req, "Error fetching argument '%s'",
+		    "geom_name (arg0)");
+		return;
+	}
+	sc = virstor_find_geom(cp, geom_name);
+	if (sc == NULL) {
+		gctl_error(req, "Don't know anything about '%s'", geom_name);
+		return;
+	}
+
+	if (virstor_valid_components(sc) != sc->n_components) {
+		LOG_MSG(LVL_ERROR, "Cannot remove components from incomplete "
+		    "virstor %s", sc->geom->name);
+		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
+		return;
+	}
+
+	removed = 0;
+	for (i = 1; i < *nargs; i++) {
+		char param[8];
+		const char *prov_name;
+		int j, found;
+		struct g_virstor_component *newcomp, *compbak;
+
+		sprintf(param, "arg%d", i);
+		prov_name = gctl_get_asciiparam(req, param);
+		if (strncmp(prov_name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
+			prov_name += strlen(_PATH_DEV);
+
+		found = -1;
+		for (j = 0; j < sc->n_components; j++) {
+			if (strcmp(sc->components[j].gcons->provider->name,
+			    prov_name) == 0) {
+				found = j;
+				break;
+			}
+		}
+		if (found == -1) {
+			LOG_MSG(LVL_ERROR, "No %s component in %s",
+			    prov_name, sc->geom->name);
+			continue;
+		}
+
+		compbak = sc->components;
+		newcomp = malloc(sc->n_components * sizeof(*sc->components),
+		    M_GVIRSTOR, M_WAITOK | M_ZERO);
+		bcopy(sc->components, newcomp, found * sizeof(*sc->components));
+		bcopy(&sc->components[found + 1], newcomp + found,
+		    found * sizeof(*sc->components));
+		if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) {
+			LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be "
+			    "removed from %s",
+			    prov_name, sc->geom->name);
+			free(newcomp, M_GVIRSTOR);
+			/* We'll consider this non-fatal error */
+			continue;
+		}
+		/* Renumerate unallocated components */
+		for (j = 0; j < sc->n_components-1; j++) {
+			if ((sc->components[j].flags &
+			    VIRSTOR_PROVIDER_ALLOCATED) == 0) {
+				sc->components[j].index = j;
+			}
+		}
+		/* This is the critical section. If a component allocation
+		 * event happens while both variables are not yet set,
+		 * there will be trouble. Something will panic on encountering
+		 * NULL sc->components[x].gcomp member.
+		 * Luckily, component allocation happens very rarely and
+		 * removing components is an abnormal action in any case. */
+		sc->components = newcomp;
+		sc->n_components--;
+		/* End critical section */
+
+		g_topology_lock();
+		if (clear_metadata(&compbak[found]) != 0) {
+			LOG_MSG(LVL_WARNING, "Trouble ahead: cannot clear "
+			    "metadata on %s", prov_name);
+		}
+		g_detach(compbak[found].gcons);
+		g_destroy_consumer(compbak[found].gcons);
+		g_topology_unlock();
+
+		free(compbak, M_GVIRSTOR);
+
+		removed++;
+	}
+
+	/* This call to update_metadata() is critical. In case there's a
+	 * power failure in the middle of it and some components are updated
+	 * while others are not, there will be trouble on next .taste() iff
+	 * a non-updated component is detected first */
+	g_topology_lock();
+	update_metadata(sc);
+	g_topology_unlock();
+	LOG_MSG(LVL_INFO, "Removed %d component(s) from %s", removed,
+	    sc->geom->name);
+}
+
+/*
+ * Clear metadata sector on component
+ */
+static int
+clear_metadata(struct g_virstor_component *comp)
+{
+	char *buf;
+	int error;
+
+	LOG_MSG(LVL_INFO, "Clearing metadata on %s",
+	    comp->gcons->provider->name);
+	g_topology_assert();
+	error = g_access(comp->gcons, 0, 1, 0);
+	if (error != 0)
+		return (error);
+	buf = malloc(comp->gcons->provider->sectorsize, M_GVIRSTOR,
+	    M_WAITOK | M_ZERO);
+	error = g_write_data(comp->gcons,
+	    comp->gcons->provider->mediasize -
+	    comp->gcons->provider->sectorsize,
+	    buf,
+	    comp->gcons->provider->sectorsize);
+	free(buf, M_GVIRSTOR);
+	g_access(comp->gcons, 0, -1, 0);
+	return (error);
+}
+
+/*
+ * Destroy geom forcibly.
+ */
+static int
+g_virstor_destroy_geom(struct gctl_req *req __unused, struct g_class *mp,
+    struct g_geom *gp)
+{
+	struct g_virstor_softc *sc;
+	int exitval;
+
+	sc = gp->softc;
+	KASSERT(sc != NULL, ("%s: NULL sc", __func__));
+	
+	exitval = 0;
+	LOG_MSG(LVL_DEBUG, "%s called for %s, sc=%p", __func__, gp->name,
+	    gp->softc);
+
+	if (sc != NULL) {
+#ifdef INVARIANTS
+		char *buf;
+		int error;
+		off_t off;
+		int isclean, count;
+		int n;
+
+		LOG_MSG(LVL_INFO, "INVARIANTS detected");
+		LOG_MSG(LVL_INFO, "Verifying allocation "
+		    "table for %s", sc->geom->name);
+		count = 0;
+		for (n = 0; n < sc->chunk_count; n++) {
+			if (sc->map[n].flags || VIRSTOR_MAP_ALLOCATED != 0)
+				count++;
+		}
+		LOG_MSG(LVL_INFO, "Device %s has %d allocated chunks",
+		    sc->geom->name, count);
+		n = off = count = 0;
+		isclean = 1;
+		if (virstor_valid_components(sc) != sc->n_components) {
+			/* This is a incomplete virstor device (not all
+			 * components have been found) */
+			LOG_MSG(LVL_ERROR, "Device %s is incomplete",
+			    sc->geom->name);
+			goto bailout;
+		}
+		error = g_access(sc->components[0].gcons, 1, 0, 0);
+		KASSERT(error == 0, ("%s: g_access failed (%d)", __func__,
+		    error));
+		/* Compare the whole on-disk allocation table with what's
+		 * currently in memory */
+		while (n < sc->chunk_count) {
+			buf = g_read_data(sc->components[0].gcons, off,
+			    sc->sectorsize, &error);
+			KASSERT(buf != NULL, ("g_read_data returned NULL (%d) "
+			    "for read at %jd", error, off));
+			if (bcmp(buf, &sc->map[n], sc->sectorsize) != 0) {
+				LOG_MSG(LVL_ERROR, "ERROR in allocation table, "
+				    "entry %d, offset %jd", n, off);
+				isclean = 0;
+				count++;
+			}
+			n += sc->me_per_sector;
+			off += sc->sectorsize;
+			g_free(buf);
+		}
+		error = g_access(sc->components[0].gcons, -1, 0, 0);
+		KASSERT(error == 0, ("%s: g_access failed (%d) on exit",
+		    __func__, error));
+		if (isclean != 1) {
+			LOG_MSG(LVL_ERROR, "ALLOCATION TABLE CORRUPTED FOR %s "
+			    "(%d sectors don't match, max %zu allocations)",
+			    sc->geom->name, count,
+			    count * sc->me_per_sector);
+		} else {
+			LOG_MSG(LVL_INFO, "Allocation table ok for %s",
+			    sc->geom->name);
+		}
+bailout:
+#endif
+		update_metadata(sc);
+		virstor_geom_destroy(sc, FALSE, FALSE);
+		exitval = EAGAIN;
+	} else
+		exitval = 0;
+	return (exitval);
+}
+
+/*
+ * Taste event (per-class callback)
+ * Examines a provider and creates geom instances if needed
+ */
+static struct g_geom *
+g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags)
+{
+	struct g_virstor_metadata md;
+	struct g_geom *gp;
+	struct g_consumer *cp;
+	struct g_virstor_softc *sc;
+	int error;
+
+	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
+	g_topology_assert();
+	LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name);
+
+	/* We need a dummy geom to attach a consumer to the given provider */
+	gp = g_new_geomf(mp, "virstor:taste.helper");
+	gp->start = (void *)invalid_call;	/* XXX: hacked up so the        */
+	gp->access = (void *)invalid_call;	/* compiler doesn't complain.   */
+	gp->orphan = (void *)invalid_call;	/* I really want these to fail. */
+
+	cp = g_new_consumer(gp);
+	g_attach(cp, pp);
+	error = read_metadata(cp, &md);
+	g_detach(cp);
+	g_destroy_consumer(cp);
+	g_destroy_geom(gp);
+
+	if (error != 0)
+		return (NULL);
+
+	if (strcmp(md.md_magic, G_VIRSTOR_MAGIC) != 0)
+		return (NULL);
+	if (md.md_version != G_VIRSTOR_VERSION) {
+		LOG_MSG(LVL_ERROR, "Kernel module version invalid "
+		    "to handle %s (%s) : %d should be %d",
+		    md.md_name, pp->name, md.md_version, G_VIRSTOR_VERSION);
+		return (NULL);
+	}
+	if (md.provsize != pp->mediasize)
+		return (NULL);
+
+	/* If the provider name is hardcoded, use the offered provider only
+	 * if it's been offered with its proper name (the one used in
+	 * the label command). */
+	if (md.provider[0] != '\0') {
+		if (strcmp(md.provider, pp->name) != 0)
+			return (NULL);
+	}
+
+	/* Iterate all geoms this class already knows about to see if a new
+	 * geom instance of this class needs to be created (in case the provider
+	 * is first from a (possibly) multi-consumer geom) or it just needs
+	 * to be added to an existing instance. */
+	sc = NULL;
+	gp = NULL;
+	LIST_FOREACH(gp, &mp->geom, geom) {
+		sc = gp->softc;
+		if (sc == NULL)
+			continue;
+		if (strcmp(md.md_name, sc->geom->name) != 0)
+			continue;
+		if (md.md_id != sc->id)
+			continue;
+		break;
+	}
+	if (gp != NULL) { /* We found an existing geom instance; add to it */
+		LOG_MSG(LVL_INFO, "Adding %s to %s", pp->name, md.md_name);
+		error = add_provider_to_geom(sc, pp, &md);
+		if (error != 0) {
+			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
+			    pp->name, md.md_name, error);
+			return (NULL);
+		}
+	} else { /* New geom instance needs to be created */
+		gp = create_virstor_geom(mp, &md);
+		if (gp == NULL) {
+			LOG_MSG(LVL_ERROR, "Error creating new instance of "
+			    "class %s: %s", mp->name, md.md_name);
+			LOG_MSG(LVL_DEBUG, "Error creating %s at %s",
+			    md.md_name, pp->name);
+			return (NULL);
+		}
+		sc = gp->softc;
+		LOG_MSG(LVL_INFO, "Adding %s to %s (first found)", pp->name,
+		    md.md_name);
+		error = add_provider_to_geom(sc, pp, &md);
+		if (error != 0) {
+			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
+			    pp->name, md.md_name, error);
+			virstor_geom_destroy(sc, TRUE, FALSE);
+			return (NULL);
+		}
+	}
+
+	return (gp);
+}
+
+/*
+ * Destroyes consumer passed to it in arguments. Used as a callback
+ * on g_event queue.
+ */
+static void
+delay_destroy_consumer(void *arg, int flags __unused)
+{
+	struct g_consumer *c = arg;
+	KASSERT(c != NULL, ("%s: invalid consumer", __func__));
+	LOG_MSG(LVL_DEBUG, "Consumer %s destroyed with delay",
+	    c->provider->name);
+	g_detach(c);
+	g_destroy_consumer(c);
+}
+
+/*
+ * Remove a component (consumer) from geom instance; If it's the first
+ * component being removed, orphan the provider to announce geom's being
+ * dismantled
+ */
+static void
+remove_component(struct g_virstor_softc *sc, struct g_virstor_component *comp,
+    boolean_t delay)
+{
+	struct g_consumer *c;
+
+	KASSERT(comp->gcons != NULL, ("Component with no consumer in %s",
+	    sc->geom->name));
+	c = comp->gcons;
+
+	comp->gcons = NULL;
+	KASSERT(c->provider != NULL, ("%s: no provider", __func__));
+	LOG_MSG(LVL_DEBUG, "Component %s removed from %s", c->provider->name,
+	    sc->geom->name);
+	if (sc->provider != NULL) {
+		/* Whither, GEOM? */
+		sc->provider->flags |= G_PF_WITHER;
+		g_orphan_provider(sc->provider, ENXIO);
+		sc->provider = NULL;
+		LOG_MSG(LVL_INFO, "Removing provider %s", sc->geom->name);
+	}
+
+	if (c->acr > 0 || c->acw > 0 || c->ace > 0)
+		g_access(c, -c->acr, -c->acw, -c->ace);
+	if (delay) {
+		/* Destroy consumer after it's tasted */
+		g_post_event(delay_destroy_consumer, c, M_WAITOK, NULL);
+	} else {
+		g_detach(c);
+		g_destroy_consumer(c);
+	}
+}
+
+/*
+ * Destroy geom - called internally
+ * See g_virstor_destroy_geom for the other one
+ */
+static int
+virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force,
+    boolean_t delay)
+{
+	struct g_provider *pp;
+	struct g_geom *gp;
+	int n;
+
+	g_topology_assert();
+
+	if (sc == NULL)
+		return (ENXIO);
+
+	pp = sc->provider;
+	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
+		LOG_MSG(force ? LVL_WARNING : LVL_ERROR,
+		    "Device %s is still open.", pp->name);
+		if (!force)
+			return (EBUSY);
+	}
+
+	for (n = 0; n < sc->n_components; n++) {
+		if (sc->components[n].gcons != NULL)
+			remove_component(sc, &sc->components[n], delay);
+	}
+
+	gp = sc->geom;
+	gp->softc = NULL;
+
+	KASSERT(sc->provider == NULL, ("Provider still exists for %s",
+	    gp->name));
+
+	/* XXX: This might or might not work, since we're called with
+	 * the topology lock held. Also, it might panic the kernel if
+	 * the error'd BIO is in softupdates code. */
+	mtx_lock(&sc->delayed_bio_q_mtx);
+	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
+		struct g_virstor_bio_q *bq;
+		bq = STAILQ_FIRST(&sc->delayed_bio_q);
+		bq->bio->bio_error = ENOSPC;
+		g_io_deliver(bq->bio, EIO);
+		STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
+		free(bq, M_GVIRSTOR);
+	}
+	mtx_unlock(&sc->delayed_bio_q_mtx);
+	mtx_destroy(&sc->delayed_bio_q_mtx);
+
+	free(sc->map, M_GVIRSTOR);
+	free(sc->components, M_GVIRSTOR);
+	bzero(sc, sizeof *sc);
+	free(sc, M_GVIRSTOR);
+
+	pp = LIST_FIRST(&gp->provider); /* We only offer one provider */
+	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
+		LOG_MSG(LVL_DEBUG, "Device %s destroyed", gp->name);
+
+	g_wither_geom(gp, ENXIO);
+
+	return (0);
+}
+
+/*
+ * Utility function: read metadata & decode. Wants topology lock to be
+ * held.
+ */
+static int
+read_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
+{
+	struct g_provider *pp;
+	char *buf;
+	int error;
+
+	g_topology_assert();
+	error = g_access(cp, 1, 0, 0);
+	if (error != 0)
+		return (error);
+	pp = cp->provider;
+	g_topology_unlock();
+	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
+	    &error);
+	g_topology_lock();
+	g_access(cp, -1, 0, 0);
+	if (buf == NULL)
+		return (error);
+
+	virstor_metadata_decode(buf, md);
+	g_free(buf);
+
+	return (0);
+}
+
+/**
+ * Utility function: encode & write metadata. Assumes topology lock is
+ * held.
+ */
+static int
+write_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
+{
+	struct g_provider *pp;
+	char *buf;
+	int error;
+
+	KASSERT(cp != NULL && md != NULL && cp->provider != NULL,
+	    ("Something's fishy in %s", __func__));
+	LOG_MSG(LVL_DEBUG, "Writing metadata on %s", cp->provider->name);
+	g_topology_assert();
+	error = g_access(cp, 0, 1, 0);
+	if (error != 0)
+		return (error);
+	pp = cp->provider;
+
+	buf = malloc(pp->sectorsize, M_GVIRSTOR, M_WAITOK);
+	virstor_metadata_encode(md, buf);
+	g_topology_unlock();
+	error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf,
+	    pp->sectorsize);
+	g_topology_lock();
+	g_access(cp, 0, -1, 0);
+
+	free(buf, M_GVIRSTOR);
+	return (0);
+}
+
+/*
+ * Creates a new instance of this GEOM class, initialise softc
+ */
+static struct g_geom *
+create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md)
+{
+	struct g_geom *gp;
+	struct g_virstor_softc *sc;
+
+	LOG_MSG(LVL_DEBUG, "Creating geom instance for %s (id=%u)",
+	    md->md_name, md->md_id);
+
+	if (md->md_count < 1 || md->md_chunk_size < 1 ||
+	    md->md_virsize < md->md_chunk_size) {
+		/* This is bogus configuration, and probably means data is
+		 * somehow corrupted. Panic, maybe? */
+		LOG_MSG(LVL_ERROR, "Nonsensical metadata information for %s",
+		    md->md_name);
+		return (NULL);
+	}
+
+	/* Check if it's already created */
+	LIST_FOREACH(gp, &mp->geom, geom) {
+		sc = gp->softc;
+		if (sc != NULL && strcmp(sc->geom->name, md->md_name) == 0) {
+			LOG_MSG(LVL_WARNING, "Geom %s already exists",
+			    md->md_name);
+			if (sc->id != md->md_id) {
+				LOG_MSG(LVL_ERROR,
+				    "Some stale or invalid components "
+				    "exist for virstor device named %s. "
+				    "You will need to <CLEAR> all stale "
+				    "components and maybe reconfigure "
+				    "the virstor device. Tune "
+				    "kern.geom.virstor.debug sysctl up "
+				    "for more information.",
+				    sc->geom->name);
+			}
+			return (NULL);
+		}
+	}
+	gp = g_new_geomf(mp, "%s", md->md_name);
+	gp->softc = NULL; /* to circumevent races that test softc */
+
+	gp->start = g_virstor_start;
+	gp->spoiled = g_virstor_orphan;
+	gp->orphan = g_virstor_orphan;
+	gp->access = g_virstor_access;
+	gp->dumpconf = g_virstor_dumpconf;
+
+	sc = malloc(sizeof(*sc), M_GVIRSTOR, M_WAITOK | M_ZERO);
+	sc->id = md->md_id;
+	sc->n_components = md->md_count;
+	sc->components = malloc(sizeof(struct g_virstor_component) * md->md_count,
+	    M_GVIRSTOR, M_WAITOK | M_ZERO);
+	sc->chunk_size = md->md_chunk_size;
+	sc->virsize = md->md_virsize;
+	STAILQ_INIT(&sc->delayed_bio_q);
+	mtx_init(&sc->delayed_bio_q_mtx, "gvirstor_delayed_bio_q_mtx",
+	    "gvirstor", MTX_DEF | MTX_RECURSE);
+
+	sc->geom = gp;
+	sc->provider = NULL; /* virstor_check_and_run will create it */
+	gp->softc = sc;
+
+	LOG_MSG(LVL_ANNOUNCE, "Device %s created", sc->geom->name);
+
+	return (gp);
+}
+
+/*
+ * Add provider to a GEOM class instance
+ */
+static int
+add_provider_to_geom(struct g_virstor_softc *sc, struct g_provider *pp,
+    struct g_virstor_metadata *md)
+{
+	struct g_virstor_component *component;
+	struct g_consumer *cp, *fcp;
+	struct g_geom *gp;
+	int error;
+
+	if (md->no >= sc->n_components)
+		return (EINVAL);
+
+	/* "Current" compontent */
+	component = &(sc->components[md->no]);
+	if (component->gcons != NULL)
+		return (EEXIST);
+
+	gp = sc->geom;
+	fcp = LIST_FIRST(&gp->consumer);
+
+	cp = g_new_consumer(gp);
+	error = g_attach(cp, pp);
+
+	if (error != 0) {
+		g_destroy_consumer(cp);
+		return (error);
+	}
+
+	if (fcp != NULL) {
+		if (fcp->provider->sectorsize != pp->sectorsize) {
+			/* TODO: this can be made to work */
+			LOG_MSG(LVL_ERROR, "Provider %s of %s has invalid "
+			    "sector size (%d)", pp->name, sc->geom->name,
+			    pp->sectorsize);
+			return (EINVAL);
+		}
+		if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) {
+			/* Replicate access permissions from first "live" consumer
+			 * to the new one */
+			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
+			if (error != 0) {
+				g_detach(cp);
+				g_destroy_consumer(cp);
+				return (error);
+			}
+		}
+	}
+
+	/* Bring up a new component */
+	cp->private = component;
+	component->gcons = cp;
+	component->sc = sc;
+	component->index = md->no;
+	component->chunk_count = md->chunk_count;
+	component->chunk_next = md->chunk_next;
+	component->chunk_reserved = md->chunk_reserved;
+	component->flags = md->flags;
+
+	LOG_MSG(LVL_DEBUG, "%s attached to %s", pp->name, sc->geom->name);
+
+	virstor_check_and_run(sc);
+	return (0);
+}
+
+/*
+ * Check if everything's ready to create the geom provider & device entry,
+ * create and start provider.
+ * Called ultimately by .taste, from g_event thread
+ */
+static void
+virstor_check_and_run(struct g_virstor_softc *sc)
+{
+	off_t off;
+	size_t n, count;
+	int index;
+	int error;
+
+	if (virstor_valid_components(sc) != sc->n_components)
+		return;
+
+	if (virstor_valid_components(sc) == 0) {
+		/* This is actually a candidate for panic() */
+		LOG_MSG(LVL_ERROR, "No valid components for %s?",
+		    sc->provider->name);
+		return;
+	}
+
+	sc->sectorsize = sc->components[0].gcons->provider->sectorsize;
+
+	/* Initialise allocation map from the first consumer */
+	sc->chunk_count = sc->virsize / sc->chunk_size;
+	if (sc->chunk_count * (off_t)sc->chunk_size != sc->virsize) {
+		LOG_MSG(LVL_WARNING, "Device %s truncated to %ju bytes",
+		    sc->provider->name,
+		    sc->chunk_count * (off_t)sc->chunk_size);
+	}
+	sc->map_size = sc->chunk_count * sizeof *(sc->map);
+	/* The following allocation is in order of 4MB - 8MB */
+	sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK);
+	KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s",
+	    __func__, sc->map_size, sc->provider->name));
+	sc->map_sectors = sc->map_size / sc->sectorsize;
+
+	count = 0;
+	for (n = 0; n < sc->n_components; n++)
+		count += sc->components[n].chunk_count;
+	LOG_MSG(LVL_INFO, "Device %s has %zu physical chunks and %zu virtual "
+	    "(%zu KB chunks)",
+	    sc->geom->name, count, sc->chunk_count, sc->chunk_size / 1024);
+
+	error = g_access(sc->components[0].gcons, 1, 0, 0);
+	if (error != 0) {
+		LOG_MSG(LVL_ERROR, "Cannot acquire read access for %s to "
+		    "read allocation map for %s",
+		    sc->components[0].gcons->provider->name,
+		    sc->geom->name);
+		return;
+	}
+	/* Read in the allocation map */
+	LOG_MSG(LVL_DEBUG, "Reading map for %s from %s", sc->geom->name,
+	    sc->components[0].gcons->provider->name);
+	off = count = n = 0;
+	while (count < sc->map_size) {
+		struct g_virstor_map_entry *mapbuf;
+		size_t bs;
+
+		bs = MIN(MAXPHYS, sc->map_size - count);
+		if (bs % sc->sectorsize != 0) {
+			/* Check for alignment errors */
+			bs = (bs / sc->sectorsize) * sc->sectorsize;
+			if (bs == 0)
+				break;
+			LOG_MSG(LVL_ERROR, "Trouble: map is not sector-aligned "
+			    "for %s on %s", sc->geom->name,
+			    sc->components[0].gcons->provider->name);
+		}
+		mapbuf = g_read_data(sc->components[0].gcons, off, bs, &error);
+		if (mapbuf == NULL) {
+			free(sc->map, M_GVIRSTOR);
+			LOG_MSG(LVL_ERROR, "Error reading allocation map "
+			    "for %s from %s (offset %ju) (error %d)",
+			    sc->geom->name,
+			    sc->components[0].gcons->provider->name,
+			    off, error);
+			return;
+		}
+
+		bcopy(mapbuf, &sc->map[n], bs);
+		off += bs;
+		count += bs;
+		n += bs / sizeof *(sc->map);
+		g_free(mapbuf);
+	}
+	g_access(sc->components[0].gcons, -1, 0, 0);
+	LOG_MSG(LVL_DEBUG, "Read map for %s", sc->geom->name);
+
+	/* find first component with allocatable chunks */
+	index = -1;
+	for (n = 0; n < sc->n_components; n++) {
+		if (sc->components[n].chunk_next <
+		    sc->components[n].chunk_count) {
+			index = n;
+			break;
+		}
+	}
+	if (index == -1)
+		/* not found? set it to the last component and handle it
+		 * later */
+		index = sc->n_components - 1;
+
+	if (index >= sc->n_components - g_virstor_component_watermark - 1) {
+		LOG_MSG(LVL_WARNING, "Device %s running out of components "
+		    "(%d/%u: %s)", sc->geom->name,
+		    index+1,
+		    sc->n_components,
+		    sc->components[index].gcons->provider->name);
+	}
+	sc->curr_component = index;
+
+	if (sc->components[index].chunk_next >=
+	    sc->components[index].chunk_count - g_virstor_chunk_watermark) {
+		LOG_MSG(LVL_WARNING,
+		    "Component %s of %s is running out of free space "
+		    "(%u chunks left)",
+		    sc->components[index].gcons->provider->name,
+		    sc->geom->name, sc->components[index].chunk_count -
+		    sc->components[index].chunk_next);
+	}
+
+	sc->me_per_sector = sc->sectorsize / sizeof *(sc->map);
+	if (sc->sectorsize % sizeof *(sc->map) != 0) {
+		LOG_MSG(LVL_ERROR,
+		    "%s: Map entries don't fit exactly in a sector (%s)",
+		    __func__, sc->geom->name);
+		return;
+	}
+
+	/* Recalculate allocated chunks in components & at the same time
+	 * verify map data is sane. We could trust metadata on this, but
+	 * we want to make sure. */
+	for (n = 0; n < sc->n_components; n++)
+		sc->components[n].chunk_next = sc->components[n].chunk_reserved;
+
+	for (n = 0; n < sc->chunk_count; n++) {
+		if (sc->map[n].provider_no >= sc->n_components ||
+			sc->map[n].provider_chunk >=
+			sc->components[sc->map[n].provider_no].chunk_count) {
+			LOG_MSG(LVL_ERROR, "%s: Invalid entry %u in map for %s",
+			    __func__, (u_int)n, sc->geom->name);
+			LOG_MSG(LVL_ERROR, "%s: provider_no: %u, n_components: %u"
+			    " provider_chunk: %u, chunk_count: %u", __func__,
+			    sc->map[n].provider_no, sc->n_components,
+			    sc->map[n].provider_chunk,
+			    sc->components[sc->map[n].provider_no].chunk_count);
+			return;
+		}
+		if (sc->map[n].flags & VIRSTOR_MAP_ALLOCATED)
+			sc->components[sc->map[n].provider_no].chunk_next++;
+	}
+
+	sc->provider = g_new_providerf(sc->geom, "virstor/%s",
+	    sc->geom->name);
+
+	sc->provider->sectorsize = sc->sectorsize;
+	sc->provider->mediasize = sc->virsize;
+	g_error_provider(sc->provider, 0);
+
+	LOG_MSG(LVL_INFO, "%s activated", sc->provider->name);
+	LOG_MSG(LVL_DEBUG, "%s starting with current component %u, starting "
+	    "chunk %u", sc->provider->name, sc->curr_component,
+	    sc->components[sc->curr_component].chunk_next);
+}
+
+/*
+ * Returns count of active providers in this geom instance
+ */
+static u_int
+virstor_valid_components(struct g_virstor_softc *sc)
+{
+	unsigned int nc, i;
+
+	nc = 0;
+	KASSERT(sc != NULL, ("%s: softc is NULL", __func__));
+	KASSERT(sc->components != NULL, ("%s: sc->components is NULL", __func__));
+	for (i = 0; i < sc->n_components; i++)
+		if (sc->components[i].gcons != NULL)
+			nc++;
+	return (nc);
+}
+
+/*
+ * Called when the consumer gets orphaned (?)
+ */
+static void
+g_virstor_orphan(struct g_consumer *cp)
+{
+	struct g_virstor_softc *sc;
+	struct g_virstor_component *comp;
+	struct g_geom *gp;
+
+	g_topology_assert();
+	gp = cp->geom;
+	sc = gp->softc;
+	if (sc == NULL)
+		return;
+
+	comp = cp->private;
+	KASSERT(comp != NULL, ("%s: No component in private part of consumer",
+	    __func__));
+	remove_component(sc, comp, FALSE);
+	if (virstor_valid_components(sc) == 0)
+		virstor_geom_destroy(sc, TRUE, FALSE);
+}
+
+/*
+ * Called to notify geom when it's been opened, and for what intent
+ */
+static int
+g_virstor_access(struct g_provider *pp, int dr, int dw, int de)
+{
+	struct g_consumer *c;
+	struct g_virstor_softc *sc;
+	struct g_geom *gp;
+	int error;
+
+	KASSERT(pp != NULL, ("%s: NULL provider", __func__));
+	gp = pp->geom;
+	KASSERT(gp != NULL, ("%s: NULL geom", __func__));
+	sc = gp->softc;
+
+	if (sc == NULL) {
+		/* It seems that .access can be called with negative dr,dw,dx
+		 * in this case but I want to check for myself */
+		LOG_MSG(LVL_WARNING, "access(%d, %d, %d) for %s",
+		    dr, dw, de, pp->name);
+		/* This should only happen when geom is withered so
+		 * allow only negative requests */
+		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
+		    ("%s: Positive access for %s", __func__, pp->name));
+		if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
+			LOG_MSG(LVL_DEBUG, "Device %s definitely destroyed",
+			    pp->name);
+		return (0);
+	}
+
+	/* Grab an exclusive bit to propagate on our consumers on first open */
+	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
+		de++;
+	/* ... drop it on close */
+	if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) {
+		de--;
+		update_metadata(sc);	/* Writes statistical information */
+	}
+
+	error = ENXIO;
+	LIST_FOREACH(c, &gp->consumer, consumer) {
+		KASSERT(c != NULL, ("%s: consumer is NULL", __func__));
+		error = g_access(c, dr, dw, de);
+		if (error != 0) {
+			struct g_consumer *c2;
+
+			/* Backout earlier changes */
+			LIST_FOREACH(c2, &gp->consumer, consumer) {
+				if (c2 == c) /* all eariler components fixed */
+					return (error);
+				g_access(c2, -dr, -dw, -de);
+			}
+		}
+	}
+
+	return (error);
+}
+
+/*
+ * Generate XML dump of current state
+ */
+static void
+g_virstor_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
+    struct g_consumer *cp, struct g_provider *pp)
+{
+	struct g_virstor_softc *sc;
+
+	g_topology_assert();
+	sc = gp->softc;
+
+	if (sc == NULL || pp != NULL)
+		return;
+
+	if (cp != NULL) {
+		/* For each component */
+		struct g_virstor_component *comp;
+
+		comp = cp->private;
+		if (comp == NULL)
+			return;
+		sbuf_printf(sb, "%s<ComponentIndex>%u</ComponentIndex>\n",
+		    indent, comp->index);
+		sbuf_printf(sb, "%s<ChunkCount>%u</ChunkCount>\n",
+		    indent, comp->chunk_count);
+		sbuf_printf(sb, "%s<ChunksUsed>%u</ChunksUsed>\n",
+		    indent, comp->chunk_next);
+		sbuf_printf(sb, "%s<ChunksReserved>%u</ChunksReserved>\n",
+		    indent, comp->chunk_reserved);
+		sbuf_printf(sb, "%s<StorageFree>%u%%</StorageFree>\n",
+		    indent,
+		    comp->chunk_next > 0 ? 100 -
+		    ((comp->chunk_next + comp->chunk_reserved) * 100) /
+		    comp->chunk_count : 100);
+	} else {
+		/* For the whole thing */
+		u_int count, used, i;
+		off_t size;
+
+		count = used = size = 0;
+		for (i = 0; i < sc->n_components; i++) {
+			if (sc->components[i].gcons != NULL) {
+				count += sc->components[i].chunk_count;
+				used += sc->components[i].chunk_next +
+				    sc->components[i].chunk_reserved;
+				size += sc->components[i].gcons->
+				    provider->mediasize;
+			}
+		}
+
+		sbuf_printf(sb, "%s<Status>"
+		    "Components=%u, Online=%u</Status>\n", indent,
+		    sc->n_components, virstor_valid_components(sc));
+		sbuf_printf(sb, "%s<State>%u%% physical free</State>\n",
+		    indent, 100-(used * 100) / count);
+		sbuf_printf(sb, "%s<ChunkSize>%zu</ChunkSize>\n", indent,
+		    sc->chunk_size);
+		sbuf_printf(sb, "%s<PhysicalFree>%u%%</PhysicalFree>\n",
+		    indent, used > 0 ? 100 - (used * 100) / count : 100);
+		sbuf_printf(sb, "%s<ChunkPhysicalCount>%u</ChunkPhysicalCount>\n",
+		    indent, count);
+		sbuf_printf(sb, "%s<ChunkVirtualCount>%zu</ChunkVirtualCount>\n",
+		    indent, sc->chunk_count);
+		sbuf_printf(sb, "%s<PhysicalBacking>%zu%%</PhysicalBacking>\n",
+		    indent,
+		    (count * 100) / sc->chunk_count);
+		sbuf_printf(sb, "%s<PhysicalBackingSize>%jd</PhysicalBackingSize>\n",
+		    indent, size);
+		sbuf_printf(sb, "%s<VirtualSize>%jd</VirtualSize>\n", indent,
+		    sc->virsize);
+	}
+}
+
+/*
+ * GEOM .done handler
+ * Can't use standard handler because one requested IO may
+ * fork into additional data IOs
+ */
+static void
+g_virstor_done(struct bio *b)
+{
+	struct g_virstor_softc *sc;
+	struct bio *parent_b;
+
+	parent_b = b->bio_parent;
+	sc = parent_b->bio_to->geom->softc;
+
+	if (b->bio_error != 0) {
+		LOG_MSG(LVL_ERROR, "Error %d for offset=%ju, length=%ju, %s",
+		    b->bio_error, b->bio_offset, b->bio_length,
+		    b->bio_to->name);
+		if (parent_b->bio_error == 0)
+			parent_b->bio_error = b->bio_error;
+	}
+
+	parent_b->bio_inbed++;
+	parent_b->bio_completed += b->bio_completed;
+
+	if (parent_b->bio_children == parent_b->bio_inbed) {
+		parent_b->bio_completed = parent_b->bio_length;
+		g_io_deliver(parent_b, parent_b->bio_error);
+	}
+	g_destroy_bio(b);
+}
+
+/*
+ * I/O starts here
+ * Called in g_down thread
+ */
+static void
+g_virstor_start(struct bio *b)
+{
+	struct g_virstor_softc *sc;
+	struct g_virstor_component *comp;
+	struct bio *cb;
+	struct g_provider *pp;
+	char *addr;
+	off_t offset, length;
+	struct bio_queue_head bq;
+	size_t chunk_size;	/* cached for convenience */
+	u_int count;
+
+	pp = b->bio_to;
+	sc = pp->geom->softc;
+	KASSERT(sc != NULL, ("%s: no softc (error=%d, device=%s)", __func__,
+	    b->bio_to->error, b->bio_to->name));
+
+	LOG_REQ(LVL_MOREDEBUG, b, "%s", __func__);
+
+	switch (b->bio_cmd) {
+	case BIO_READ:
+	case BIO_WRITE:
+	case BIO_DELETE:
+		break;
+	default:
+		g_io_deliver(b, EOPNOTSUPP);
+		return;
+	}
+
+	LOG_MSG(LVL_DEBUG2, "BIO arrived, size=%ju", b->bio_length);
+	bioq_init(&bq);
+
+	chunk_size = sc->chunk_size;
+	addr = b->bio_data;
+	offset = b->bio_offset;	/* virtual offset and length */
+	length = b->bio_length;
+
+	while (length > 0) {
+		size_t chunk_index, in_chunk_offset, in_chunk_length;
+		struct virstor_map_entry *me;
+
+		chunk_index = offset / chunk_size; /* round downwards */
+		in_chunk_offset = offset % chunk_size;
+		in_chunk_length = min(length, chunk_size - in_chunk_offset);
+		LOG_MSG(LVL_DEBUG, "Mapped %s(%ju, %ju) to (%zu,%zu,%zu)",
+		    b->bio_cmd == BIO_READ ? "R" : "W",
+		    offset, length,
+		    chunk_index, in_chunk_offset, in_chunk_length);
+		me = &sc->map[chunk_index];
+
+		if (b->bio_cmd == BIO_READ || b->bio_cmd == BIO_DELETE) {
+			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
+				/* Reads from unallocated chunks return zeroed
+				 * buffers */
+				if (b->bio_cmd == BIO_READ)
+					bzero(addr, in_chunk_length);
+			} else {
+				comp = &sc->components[me->provider_no];
+
+				cb = g_clone_bio(b);
+				if (cb == NULL) {
+					bioq_dismantle(&bq);
+					if (b->bio_error == 0)
+						b->bio_error = ENOMEM;
+					g_io_deliver(b, b->bio_error);
+					return;
+				}
+				cb->bio_to = comp->gcons->provider;
+				cb->bio_done = g_virstor_done;
+				cb->bio_offset =
+				    (off_t)me->provider_chunk * (off_t)chunk_size
+				    + in_chunk_offset;
+				cb->bio_length = in_chunk_length;
+				cb->bio_data = addr;
+				cb->bio_caller1 = comp;
+				bioq_disksort(&bq, cb);
+			}
+		} else { /* handle BIO_WRITE */
+			KASSERT(b->bio_cmd == BIO_WRITE,
+			    ("%s: Unknown command %d", __func__,
+			    b->bio_cmd));
+
+			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
+				/* We have a virtual chunk, represented by
+				 * the "me" entry, but it's not yet allocated
+				 * (tied to) a physical chunk. So do it now. */
+				struct virstor_map_entry *data_me;
+				u_int phys_chunk, comp_no;
+				off_t s_offset;
+				int error;
+
+				error = allocate_chunk(sc, &comp, &comp_no,
+				    &phys_chunk);
+				if (error != 0) {
+					/* We cannot allocate a physical chunk
+					 * to satisfy this request, so we'll
+					 * delay it to when we can...
+					 * XXX: this will prevent the fs from
+					 * being umounted! */
+					struct g_virstor_bio_q *biq;
+					biq = malloc(sizeof *biq, M_GVIRSTOR,
+					    M_NOWAIT);
+					if (biq == NULL) {
+						bioq_dismantle(&bq);
+						if (b->bio_error == 0)
+							b->bio_error = ENOMEM;
+						g_io_deliver(b, b->bio_error);
+						return;
+					}
+					biq->bio = b;
+					mtx_lock(&sc->delayed_bio_q_mtx);
+					STAILQ_INSERT_TAIL(&sc->delayed_bio_q,
+					    biq, linkage);
+					mtx_unlock(&sc->delayed_bio_q_mtx);
+					LOG_MSG(LVL_WARNING, "Delaying BIO "
+					    "(size=%ju) until free physical "
+					    "space can be found on %s",
+					    b->bio_length,
+					    sc->provider->name);
+					return;
+				}
+				LOG_MSG(LVL_DEBUG, "Allocated chunk %u on %s "
+				    "for %s",
+				    phys_chunk,
+				    comp->gcons->provider->name,
+				    sc->provider->name);
+
+				me->provider_no = comp_no;
+				me->provider_chunk = phys_chunk;
+				me->flags |= VIRSTOR_MAP_ALLOCATED;
+
+				cb = g_clone_bio(b);
+				if (cb == NULL) {
+					me->flags &= ~VIRSTOR_MAP_ALLOCATED;
+					me->provider_no = 0;
+					me->provider_chunk = 0;
+					bioq_dismantle(&bq);
+					if (b->bio_error == 0)
+						b->bio_error = ENOMEM;
+					g_io_deliver(b, b->bio_error);
+					return;
+				}
+
+				/* The allocation table is stored continuously
+				 * at the start of the drive. We need to
+				 * calculate the offset of the sector that holds
+				 * this map entry both on the drive and in the
+				 * map array.
+				 * sc_offset will end up pointing to the drive
+				 * sector. */
+				s_offset = chunk_index * sizeof *me;
+				s_offset = (s_offset / sc->sectorsize) *
+				    sc->sectorsize;
+
+				/* data_me points to map entry sector
+				 * in memory (analoguos to offset) */
+				data_me = &sc->map[(chunk_index /
+				    sc->me_per_sector) * sc->me_per_sector];
+
+				/* Commit sector with map entry to storage */
+				cb->bio_to = sc->components[0].gcons->provider;
+				cb->bio_done = g_virstor_done;
+				cb->bio_offset = s_offset;
+				cb->bio_data = (char *)data_me;
+				cb->bio_length = sc->sectorsize;
+				cb->bio_caller1 = &sc->components[0];
+				bioq_disksort(&bq, cb);
+			}
+
+			comp = &sc->components[me->provider_no];
+			cb = g_clone_bio(b);
+			if (cb == NULL) {
+				bioq_dismantle(&bq);
+				if (b->bio_error == 0)
+					b->bio_error = ENOMEM;
+				g_io_deliver(b, b->bio_error);
+				return;
+			}
+			/* Finally, handle the data */
+			cb->bio_to = comp->gcons->provider;
+			cb->bio_done = g_virstor_done;
+			cb->bio_offset = (off_t)me->provider_chunk*(off_t)chunk_size +
+			    in_chunk_offset;
+			cb->bio_length = in_chunk_length;
+			cb->bio_data = addr;
+			cb->bio_caller1 = comp;
+			bioq_disksort(&bq, cb);
+		}
+		addr += in_chunk_length;
+		length -= in_chunk_length;
+		offset += in_chunk_length;
+	}
+
+	/* Fire off bio's here */
+	count = 0;
+	for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) {
+		bioq_remove(&bq, cb);
+		LOG_REQ(LVL_MOREDEBUG, cb, "Firing request");
+		comp = cb->bio_caller1;
+		cb->bio_caller1 = NULL;
+		LOG_MSG(LVL_DEBUG, " firing bio, offset=%ju, length=%ju",
+		    cb->bio_offset, cb->bio_length);
+		g_io_request(cb, comp->gcons);
+		count++;
+	}
+	if (count == 0) { /* We handled everything locally */
+		b->bio_completed = b->bio_length;
+		g_io_deliver(b, 0);
+	}
+
+}
+
+/*
+ * Allocate a chunk from a physical provider. Returns physical component,
+ * chunk index relative to the component and the component's index.
+ */
+static int
+allocate_chunk(struct g_virstor_softc *sc, struct g_virstor_component **comp,
+    u_int *comp_no_p, u_int *chunk)
+{
+	u_int comp_no;
+
+	KASSERT(sc->curr_component < sc->n_components,
+	    ("%s: Invalid curr_component: %u",  __func__, sc->curr_component));
+
+	comp_no = sc->curr_component;
+	*comp = &sc->components[comp_no];
+	dump_component(*comp);
+	if ((*comp)->chunk_next >= (*comp)->chunk_count) {
+		/* This component is full. Allocate next component */
+		if (comp_no >= sc->n_components-1) {
+			LOG_MSG(LVL_ERROR, "All physical space allocated for %s",
+			    sc->geom->name);
+			return (-1);
+		}
+		(*comp)->flags &= ~VIRSTOR_PROVIDER_CURRENT;
+		sc->curr_component = ++comp_no;
+
+		*comp = &sc->components[comp_no];
+		if (comp_no >= sc->n_components - g_virstor_component_watermark-1)
+			LOG_MSG(LVL_WARNING, "Device %s running out of components "
+			    "(switching to %u/%u: %s)", sc->geom->name,
+			    comp_no+1, sc->n_components,
+			    (*comp)->gcons->provider->name);
+		/* Take care not to overwrite reserved chunks */
+		if ( (*comp)->chunk_reserved > 0 &&
+		    (*comp)->chunk_next < (*comp)->chunk_reserved)
+			(*comp)->chunk_next = (*comp)->chunk_reserved;
+
+		(*comp)->flags |=
+		    VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT;
+		dump_component(*comp);
+		*comp_no_p = comp_no;
+		*chunk = (*comp)->chunk_next++;
+	} else {
+		*comp_no_p = comp_no;
+		*chunk = (*comp)->chunk_next++;
+	}
+	return (0);
+}
+
+/* Dump a component */
+static void
+dump_component(struct g_virstor_component *comp)
+{
+
+	if (g_virstor_debug < LVL_DEBUG2)
+		return;
+	printf("Component %d: %s\n", comp->index, comp->gcons->provider->name);
+	printf("  chunk_count: %u\n", comp->chunk_count);
+	printf("   chunk_next: %u\n", comp->chunk_next);
+	printf("        flags: %u\n", comp->flags);
+}
+
+#if 0
+/* Dump a map entry */
+static void
+dump_me(struct virstor_map_entry *me, unsigned int nr)
+{
+	if (g_virstor_debug < LVL_DEBUG)
+		return;
+	printf("VIRT. CHUNK #%d: ", nr);
+	if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0)
+		printf("(unallocated)\n");
+	else
+		printf("allocated at provider %u, provider_chunk %u\n",
+		    me->provider_no, me->provider_chunk);
+}
+#endif
+
+/*
+ * Dismantle bio_queue and destroy its components
+ */
+static void
+bioq_dismantle(struct bio_queue_head *bq)
+{
+	struct bio *b;
+
+	for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) {
+		bioq_remove(bq, b);
+		g_destroy_bio(b);
+	}
+}
+
+/*
+ * The function that shouldn't be called.
+ * When this is called, the stack is already garbled because of
+ * argument mismatch. There's nothing to do now but panic, which is
+ * accidentally the whole purpose of this function.
+ * Motivation: to guard from accidentally calling geom methods when
+ * they shouldn't be called. (see g_..._taste)
+ */
+static void
+invalid_call(void)
+{
+	panic("invalid_call() has just been called. Something's fishy here.");
+}
+
+DECLARE_GEOM_CLASS(g_virstor_class, g_virstor); /* Let there be light */
--- /dev/null
+++ sys/geom/virstor/g_virstor.h
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (c) 2006-2007 Ivan Voras <ivoras at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/geom/virstor/g_virstor.h,v 1.1 2007/09/23 07:34:23 pjd Exp $
+ */
+
+#ifndef _G_VIRSTOR_H_
+#define _G_VIRSTOR_H_
+
+#define	G_VIRSTOR_CLASS_NAME "VIRSTOR"
+
+
+#define VIRSTOR_MAP_ALLOCATED 1
+struct virstor_map_entry {
+	uint16_t	flags;
+	uint16_t	provider_no;
+	uint32_t	provider_chunk;
+};
+
+#define	VIRSTOR_MAP_ENTRY_SIZE (sizeof(struct virstor_map_entry))
+#define	VIRSTOR_MAP_BLOCK_ENTRIES (MAXPHYS / VIRSTOR_MAP_ENTRY_SIZE)
+/* Struct size is guarded by CTASSERT in main source */
+
+#ifdef _KERNEL
+
+#define	LOG_MSG(lvl, ...)       do {					\
+        if (g_virstor_debug >= (lvl)) {					\
+                printf("GEOM_" G_VIRSTOR_CLASS_NAME);			\
+                if (lvl > 0)						\
+                        printf("[%u]", lvl);				\
+                printf(": ");						\
+                printf(__VA_ARGS__);					\
+                printf("\n");						\
+        }								\
+} while (0)
+#define	LOG_MESSAGE LOG_MSG
+
+#define	LOG_REQ(lvl, bp, ...)  do {					\
+        if (g_virstor_debug >= (lvl)) {					\
+                printf("GEOM_" G_VIRSTOR_CLASS_NAME);			\
+                if (lvl > 0)						\
+                        printf("[%u]", lvl);				\
+                printf(": ");						\
+                printf(__VA_ARGS__);					\
+                printf(" ");						\
+                g_print_bio(bp);					\
+                printf("\n");						\
+        }								\
+} while (0)
+#define	LOG_REQUEST LOG_REQ
+
+/* "critical" system announcements (e.g. "geom is up") */
+#define	LVL_ANNOUNCE	0
+/* errors */
+#define	LVL_ERROR	1
+/* warnings */
+#define	LVL_WARNING	2
+/* info, noncritical for system operation (user doesn't have to see it */
+#define	LVL_INFO	5
+/* debug info */
+#define	LVL_DEBUG	10
+/* more debug info */
+#define	LVL_DEBUG2	12
+/* superfluous debug info (large volumes of data) */
+#define	LVL_MOREDEBUG	15
+
+
+/* Component data */
+struct g_virstor_component {
+	struct g_consumer	*gcons;
+	struct g_virstor_softc	*sc;
+	unsigned int		 index;		/* Component index in array */
+	unsigned int		 chunk_count;
+	unsigned int		 chunk_next;
+	unsigned int		 chunk_reserved;
+	unsigned int		 flags;
+};
+
+
+/* Internal geom instance data */
+struct g_virstor_softc {
+	struct g_geom		*geom;
+	struct g_provider	*provider;
+	struct g_virstor_component *components;
+	u_int			 n_components;
+	u_int			 curr_component; /* Component currently used */
+	uint32_t		 id;		/* Unique ID of this geom */
+	off_t			 virsize;	/* Total size of virstor */
+	off_t			 sectorsize;
+	size_t			 chunk_size;
+	size_t			 chunk_count;	/* governs map_size */
+	struct virstor_map_entry *map;
+	size_t			 map_size;	/* (in bytes) */
+	size_t			 map_sectors;	/* Size of map in sectors */
+	size_t			 me_per_sector;	/* # map entries in a sector */
+	STAILQ_HEAD(, g_virstor_bio_q)	 delayed_bio_q;	/* Queue of delayed BIOs */
+	struct mtx		 delayed_bio_q_mtx;
+};
+
+/* "delayed BIOs" Queue element */
+struct g_virstor_bio_q {
+	struct bio		*bio;
+	STAILQ_ENTRY(g_virstor_bio_q) linkage;
+};
+
+
+#endif	/* _KERNEL */
+
+#ifndef _PATH_DEV
+#define _PATH_DEV "/dev/"
+#endif
+
+#endif	/* !_G_VIRSTOR_H_ */
--- /dev/null
+++ sys/geom/virstor/g_virstor_md.h
@@ -0,0 +1,69 @@
+/*-
+ * Copyright (c) 2005 Ivan Voras <ivoras at gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/geom/virstor/g_virstor_md.h,v 1.1 2007/09/23 07:34:23 pjd Exp $
+ */
+
+
+#ifndef _G_VIRSTOR_MD_H_
+#define _G_VIRSTOR_MD_H_
+
+/*
+ * Metadata declaration
+ */
+
+#define	G_VIRSTOR_MAGIC		"GEOM::VIRSTOR"
+#define	G_VIRSTOR_VERSION	1
+
+/* flag: provider is allocated */
+#define	VIRSTOR_PROVIDER_ALLOCATED	1
+/* flag: provider is currently being filled (usually it's the last
+ * provider with VIRSTOR_PROVIDER_ALLOCATED flag */
+#define VIRSTOR_PROVIDER_CURRENT	2
+
+struct g_virstor_metadata {
+	/* Data global to the virstor device */
+	char		md_magic[16];		/* Magic value. */
+	uint32_t	md_version;		/* Version number. */
+	char		md_name[16];		/* Device name (e.g. "mydata") */
+	uint32_t	md_id;			/* Unique ID. */
+	uint64_t	md_virsize;		/* Virtual device's size */
+	uint32_t	md_chunk_size;		/* Chunk size in bytes */
+	uint16_t	md_count;		/* Total number of providers */
+
+	/* Data local to this provider */
+	char		provider[16];		/* Hardcoded provider name */
+	uint16_t	no;			/* Provider number/index */
+	uint64_t	provsize;		/* Provider's size */
+	uint32_t	chunk_count;		/* Number of chunks in this pr. */
+	uint32_t	chunk_next;		/* Next chunk to allocate */
+	uint16_t	chunk_reserved;		/* Count of "reserved" chunks */
+	uint16_t	flags;			/* Provider's flags */
+};
+
+void virstor_metadata_encode(struct g_virstor_metadata *md, unsigned char *data);
+void virstor_metadata_decode(unsigned char *data, struct g_virstor_metadata *md);
+
+#endif	/* !_G_VIRSTOR_H_ */
--- /dev/null
+++ sys/geom/virstor/g_virstor_md.c
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 2005 Ivan Voras <ivoras at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/geom/virstor/g_virstor_md.c,v 1.1 2007/09/23 07:34:23 pjd Exp $");
+
+#include <sys/param.h>
+#include <sys/endian.h>
+
+#include <geom/virstor/g_virstor_md.h>
+#include <geom/virstor/binstream.h>
+
+/*
+ * Encode data from g_virstor_metadata structure into a endian-independant
+ * byte stream.
+ */
+void
+virstor_metadata_encode(struct g_virstor_metadata *md, unsigned char *data)
+{
+	bin_stream_t bs;
+
+	bs_open(&bs, data);
+
+	bs_write_buf(&bs, md->md_magic, sizeof(md->md_magic));
+	bs_write_u32(&bs, md->md_version);
+	bs_write_buf(&bs, md->md_name, sizeof(md->md_name));
+	bs_write_u64(&bs, md->md_virsize);
+	bs_write_u32(&bs, md->md_chunk_size);
+	bs_write_u32(&bs, md->md_id);
+	bs_write_u16(&bs, md->md_count);
+
+	bs_write_buf(&bs, md->provider, sizeof(md->provider));
+	bs_write_u16(&bs, md->no);
+	bs_write_u64(&bs, md->provsize);
+	bs_write_u32(&bs, md->chunk_count);
+	bs_write_u32(&bs, md->chunk_next);
+	bs_write_u16(&bs, md->chunk_reserved);
+	bs_write_u16(&bs, md->flags);
+}
+
+
+/*
+ * Decode data from endian-independant byte stream into g_virstor_metadata
+ * structure.
+ */
+void
+virstor_metadata_decode(unsigned char *data, struct g_virstor_metadata *md)
+{
+	bin_stream_t bs;
+
+	bs_open(&bs, (char *)(data));
+
+	bs_read_buf(&bs, md->md_magic, sizeof(md->md_magic));
+	md->md_version = bs_read_u32(&bs);
+	bs_read_buf(&bs, md->md_name, sizeof(md->md_name));
+	md->md_virsize = bs_read_u64(&bs);
+	md->md_chunk_size = bs_read_u32(&bs);
+	md->md_id = bs_read_u32(&bs);
+	md->md_count = bs_read_u16(&bs);
+
+	bs_read_buf(&bs, md->provider, sizeof(md->provider));
+	md->no = bs_read_u16(&bs);
+	md->provsize = bs_read_u64(&bs);
+	md->chunk_count = bs_read_u32(&bs);
+	md->chunk_next = bs_read_u32(&bs);
+	md->chunk_reserved = bs_read_u16(&bs);
+	md->flags = bs_read_u16(&bs);
+}
--- /dev/null
+++ sys/geom/virstor/binstream.c
@@ -0,0 +1,185 @@
+/*-
+ * Copyright (c) 2005 Ivan Voras <ivoras at gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// $Id: binstream.c,v 1.1 2006/07/05 10:47:54 ivoras Exp $
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/geom/virstor/binstream.c,v 1.1 2007/09/23 07:34:23 pjd Exp $");
+
+#include <sys/endian.h>
+#include <sys/param.h>
+
+#include <geom/virstor/binstream.h>
+
+
+/* "Open" a binary stream for reading */
+void
+bs_open(bin_stream_t * bs, void *data)
+{
+	bs->data = (char *)data;
+	bs->pos = 0;
+}
+
+
+/* "Reset" position in binary stream to zero */
+void
+bs_reset(bin_stream_t * bs)
+{
+	bs->pos = 0;
+}
+
+/* Write a zero-terminated string; return next position */
+unsigned
+bs_write_str(bin_stream_t * bs, char *data)
+{
+	int		len = 0;
+	do {
+		*(bs->data + bs->pos + len) = *data;
+		len++;
+	} while (*(data++) != '\0');
+	bs->pos += len;
+	return bs->pos;
+}
+
+
+/* Write an arbitrary buffer; return next position */
+unsigned
+bs_write_buf(bin_stream_t * bs, char *data, unsigned data_size)
+{
+	unsigned	i;
+	for (i = 0; i < data_size; i++)
+		*(bs->data + bs->pos + i) = *(data + i);
+	bs->pos += data_size;
+	return bs->pos;
+}
+
+
+/* Write a 8bit uint; return next position. */
+unsigned
+bs_write_u8(bin_stream_t * bs, uint8_t data)
+{
+	*((uint8_t *) (bs->data + bs->pos)) = data;
+	return ++(bs->pos);
+}
+
+
+/* Write a 16bit uint; return next position. */
+unsigned
+bs_write_u16(bin_stream_t * bs, uint16_t data)
+{
+	le16enc(bs->data + bs->pos, data);
+	return (bs->pos += 2);
+}
+
+
+/* Write a 32bit uint; return next position. */
+unsigned
+bs_write_u32(bin_stream_t * bs, uint32_t data)
+{
+	le32enc(bs->data + bs->pos, data);
+	return (bs->pos += 4);
+}
+
+
+/* Write a 64bit uint; return next position. */
+unsigned
+bs_write_u64(bin_stream_t * bs, uint64_t data)
+{
+	le64enc(bs->data + bs->pos, data);
+	return (bs->pos += 8);
+}
+
+
+/* Read a 8bit uint & return it */
+uint8_t
+bs_read_u8(bin_stream_t * bs)
+{
+	uint8_t		data = *((uint8_t *) (bs->data + bs->pos));
+	bs->pos++;
+	return data;
+}
+
+
+/*
+ * Read a null-terminated string from stream into a buffer; buf_size is size
+ * of the buffer, including the final \0. Returns buf pointer or NULL if
+ * garbage input.
+ */
+char*
+bs_read_str(bin_stream_t * bs, char *buf, unsigned buf_size)
+{
+	unsigned	len = 0;
+	char           *work_buf = buf;
+	if (buf == NULL || buf_size < 1)
+		return NULL;
+	do {
+		*work_buf = *(bs->data + bs->pos + len);
+	} while (len++ < buf_size - 1 && *(work_buf++) != '\0');
+	*(buf + buf_size - 1) = '\0';
+	bs->pos += len;
+	return buf;
+}
+
+
+/* Read an arbitrary buffer. */
+void
+bs_read_buf(bin_stream_t * bs, char *buf, unsigned buf_size)
+{
+	unsigned	i;
+	for (i = 0; i < buf_size; i++)
+		*(buf + i) = *(bs->data + bs->pos + i);
+	bs->pos += buf_size;
+}
+
+
+/* Read a 16bit uint & return it */
+uint16_t
+bs_read_u16(bin_stream_t * bs)
+{
+	uint16_t	data = le16dec(bs->data + bs->pos);
+	bs->pos += 2;
+	return data;
+}
+
+
+/* Read a 32bit uint & return it */
+uint32_t
+bs_read_u32(bin_stream_t * bs)
+{
+	uint32_t	data = le32dec(bs->data + bs->pos);
+	bs->pos += 4;
+	return data;
+}
+
+
+/* Read a 64bit uint & return it */
+uint64_t
+bs_read_u64(bin_stream_t * bs)
+{
+	uint64_t	data = le64dec(bs->data + bs->pos);
+	bs->pos += 8;
+	return data;
+}
Index: g_gate.h
===================================================================
RCS file: /home/cvs/src/sys/geom/gate/g_gate.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/gate/g_gate.h -L sys/geom/gate/g_gate.h -u -r1.1.1.1 -r1.2
--- sys/geom/gate/g_gate.h
+++ sys/geom/gate/g_gate.h
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd at FreeBSD.org>
+ * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/gate/g_gate.h,v 1.7 2005/07/10 21:10:20 pjd Exp $
+ * $FreeBSD: src/sys/geom/gate/g_gate.h,v 1.10 2006/09/08 10:20:44 pjd Exp $
  */
 
 #ifndef _G_GATE_H_
@@ -68,9 +68,9 @@
  */
 struct g_gate_softc {
 	int			 sc_unit;		/* P: (read-only) */
-	int16_t			 sc_ref;		/* P: g_gate_list_mtx */
+	int			 sc_ref;		/* P: g_gate_list_mtx */
 	struct g_provider	*sc_provider;		/* P: (read-only) */
-	uint32_t		 sc_flags;		/* P: (read-only) */
+	uint32_t		 sc_flags;		/* P: sc_queue_mtx */
 
 	struct bio_queue_head	 sc_inqueue;		/* P: sc_queue_mtx */
 	struct bio_queue_head	 sc_outqueue;		/* P: sc_queue_mtx */
Index: g_gate.c
===================================================================
RCS file: /home/cvs/src/sys/geom/gate/g_gate.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/gate/g_gate.c -L sys/geom/gate/g_gate.c -u -r1.1.1.1 -r1.2
--- sys/geom/gate/g_gate.c
+++ sys/geom/gate/g_gate.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd at FreeBSD.org>
+ * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -22,10 +22,11 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/geom/gate/g_gate.c,v 1.21 2005/07/10 21:10:20 pjd Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/geom/gate/g_gate.c,v 1.26 2006/09/08 10:20:44 pjd Exp $");
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
@@ -48,7 +49,7 @@
 #include <geom/geom.h>
 #include <geom/gate/g_gate.h>
 
-static MALLOC_DEFINE(M_GATE, "gg data", "GEOM Gate Data");
+static MALLOC_DEFINE(M_GATE, "gg_data", "GEOM Gate Data");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, gate, CTLFLAG_RW, 0, "GEOM_GATE stuff");
@@ -56,12 +57,9 @@
 SYSCTL_UINT(_kern_geom_gate, OID_AUTO, debug, CTLFLAG_RW, &g_gate_debug, 0,
     "Debug level");
 
-static int g_gate_destroy_geom(struct gctl_req *, struct g_class *,
-    struct g_geom *);
 struct g_class g_gate_class = {
 	.name = G_GATE_CLASS_NAME,
 	.version = G_VERSION,
-	.destroy_geom = g_gate_destroy_geom
 };
 
 static struct cdev *status_dev;
@@ -78,17 +76,11 @@
 static struct mtx g_gate_list_mtx;
 
 
-static void
-g_gate_wither(struct g_gate_softc *sc)
-{
-
-	atomic_set_32(&sc->sc_flags, G_GATE_FLAG_DESTROY);
-}
-
 static int
 g_gate_destroy(struct g_gate_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
+	struct g_geom *gp;
 	struct bio *bp;
 
 	g_topology_assert();
@@ -98,18 +90,15 @@
 		mtx_unlock(&g_gate_list_mtx);
 		return (EBUSY);
 	}
-	if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0) {
-		g_gate_wither(sc);
-		LIST_REMOVE(sc, sc_next);
-	}
 	mtx_unlock(&g_gate_list_mtx);
 	mtx_lock(&sc->sc_queue_mtx);
+	if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0)
+		sc->sc_flags |= G_GATE_FLAG_DESTROY;
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
-	if (sc->sc_ref > 0) {
-		G_GATE_DEBUG(1, "Cannot destroy %s yet.", sc->sc_name);
-		return (0);
-	}
+	gp = pp->geom;
+	pp->flags |= G_PF_WITHER;
+	g_orphan_provider(pp, ENXIO);
 	callout_drain(&sc->sc_callout);
 	mtx_lock(&sc->sc_queue_mtx);
 	for (;;) {
@@ -134,35 +123,26 @@
 			break;
 		}
 	}
+	mtx_unlock(&sc->sc_queue_mtx);
+	g_topology_unlock();
+	mtx_lock(&g_gate_list_mtx);
+	/* One reference is ours. */
+	sc->sc_ref--;
+	while (sc->sc_ref > 0) {
+		msleep(&sc->sc_ref, &g_gate_list_mtx, 0, "gg:destroy", 0);
+	}
+	LIST_REMOVE(sc, sc_next);
+	mtx_unlock(&g_gate_list_mtx);
 	mtx_destroy(&sc->sc_queue_mtx);
-	G_GATE_DEBUG(0, "Device %s destroyed.", sc->sc_name);
-	pp->geom->softc = NULL;
-	g_wither_geom(pp->geom, ENXIO);
+	g_topology_lock();
+	G_GATE_DEBUG(0, "Device %s destroyed.", gp->name);
+	gp->softc = NULL;
+	g_wither_geom(gp, ENXIO);
 	sc->sc_provider = NULL;
 	free(sc, M_GATE);
 	return (0);
 }
 
-static void
-g_gate_destroy_it(void *arg, int flag __unused)
-{
-	struct g_gate_softc *sc;
-
-	g_topology_assert();
-	sc = arg;
-	mtx_lock(&g_gate_list_mtx);
-	g_gate_destroy(sc, 1);
-}
-
-static int
-g_gate_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
-{
-
-	g_topology_assert();
-	mtx_lock(&g_gate_list_mtx);
-	return (g_gate_destroy(gp->softc, 0));
-}
-
 static int
 g_gate_access(struct g_provider *pp, int dr, int dw, int de)
 {
@@ -231,30 +211,17 @@
 }
 
 static struct g_gate_softc *
-g_gate_find(u_int unit)
+g_gate_hold(u_int unit)
 {
 	struct g_gate_softc *sc;
 
+	mtx_lock(&g_gate_list_mtx);
 	LIST_FOREACH(sc, &g_gate_list, sc_next) {
 		if (sc->sc_unit == unit)
 			break;
 	}
-	return (sc);
-}
-
-static struct g_gate_softc *
-g_gate_hold(u_int unit)
-{
-	struct g_gate_softc *sc;
-
-	mtx_lock(&g_gate_list_mtx);
-	sc = g_gate_find(unit);
-	if (sc != NULL) {
-		if ((sc->sc_flags & G_GATE_FLAG_DESTROY) != 0)
-			sc = NULL;
-		else
-			sc->sc_ref++;
-	}
+	if (sc != NULL)
+		sc->sc_ref++;
 	mtx_unlock(&g_gate_list_mtx);
 	return (sc);
 }
@@ -268,8 +235,8 @@
 	sc->sc_ref--;
 	KASSERT(sc->sc_ref >= 0, ("Negative sc_ref for %s.", sc->sc_name));
 	if (sc->sc_ref == 0 && (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
+		wakeup(&sc->sc_ref);
 		mtx_unlock(&g_gate_list_mtx);
-		g_waitfor_event(g_gate_destroy_it, sc, M_WAITOK, NULL);
 	} else {
 		mtx_unlock(&g_gate_list_mtx);
 	}
@@ -485,10 +452,9 @@
 		g_topology_lock();
 		mtx_lock(&g_gate_list_mtx);
 		error = g_gate_destroy(sc, ggio->gctl_force);
-		if (error == 0)
-			g_gate_wither(sc);
 		g_topology_unlock();
-		g_gate_release(sc);
+		if (error != 0)
+			g_gate_release(sc);
 		return (error);
 	    }
 	case G_GATE_CMD_CANCEL:
@@ -534,22 +500,24 @@
 		struct g_gate_ctl_io *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
-		sc = g_gate_find(ggio->gctl_unit);
+		sc = g_gate_hold(ggio->gctl_unit);
 		if (sc == NULL)
 			return (ENXIO);
+		error = 0;
 		for (;;) {
 			mtx_lock(&sc->sc_queue_mtx);
 			bp = bioq_first(&sc->sc_inqueue);
 			if (bp != NULL)
 				break;
-			if (msleep(sc, &sc->sc_queue_mtx,
-			    PPAUSE | PDROP | PCATCH, "ggwait", 0) != 0) {
+			if ((sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
 				ggio->gctl_error = ECANCELED;
-				return (0);
+				mtx_unlock(&sc->sc_queue_mtx);
+				goto start_end;
 			}
-			if ((sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
+			if (msleep(sc, &sc->sc_queue_mtx,
+			    PPAUSE | PDROP | PCATCH, "ggwait", 0) != 0) {
 				ggio->gctl_error = ECANCELED;
-				return (0);
+				goto start_end;
 			}
 		}
 		ggio->gctl_cmd = bp->bio_cmd;
@@ -558,7 +526,7 @@
 			mtx_unlock(&sc->sc_queue_mtx);
 			ggio->gctl_length = bp->bio_length;
 			ggio->gctl_error = ENOMEM;
-			return (0);
+			goto start_end;
 		}
 		bioq_remove(&sc->sc_inqueue, bp);
 		bioq_insert_tail(&sc->sc_outqueue, bp);
@@ -580,20 +548,23 @@
 				bioq_remove(&sc->sc_outqueue, bp);
 				bioq_insert_head(&sc->sc_inqueue, bp);
 				mtx_unlock(&sc->sc_queue_mtx);
-				return (error);
+				goto start_end;
 			}
 			break;
 		}
-		return (0);
+start_end:
+		g_gate_release(sc);
+		return (error);
 	    }
 	case G_GATE_CMD_DONE:
 	    {
 		struct g_gate_ctl_io *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
-		sc = g_gate_find(ggio->gctl_unit);
+		sc = g_gate_hold(ggio->gctl_unit);
 		if (sc == NULL)
 			return (ENOENT);
+		error = 0;
 		mtx_lock(&sc->sc_queue_mtx);
 		TAILQ_FOREACH(bp, &sc->sc_outqueue.queue, bio_queue) {
 			if (ggio->gctl_seq == (uintptr_t)bp->bio_driver1)
@@ -608,7 +579,7 @@
 			/*
 			 * Request was probably canceled.
 			 */
-			return (0);
+			goto done_end;
 		}
 		if (ggio->gctl_error == EAGAIN) {
 			bp->bio_error = 0;
@@ -637,6 +608,8 @@
 			G_GATE_LOGREQ(2, bp, "Request done.");
 			g_io_deliver(bp, bp->bio_error);
 		}
+done_end:
+		g_gate_release(sc);
 		return (error);
 	    }
 	}
Index: g_label_ufs.c
===================================================================
RCS file: /home/cvs/src/sys/geom/label/g_label_ufs.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/label/g_label_ufs.c -L sys/geom/label/g_label_ufs.c -u -r1.2 -r1.3
--- sys/geom/label/g_label_ufs.c
+++ sys/geom/label/g_label_ufs.c
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/label/g_label_ufs.c,v 1.3.2.3 2006/03/08 14:20:14 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/label/g_label_ufs.c,v 1.11 2006/09/16 11:24:41 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -78,8 +78,8 @@
 		if (fs == NULL)
 			continue;
 		/* Check for magic and make sure things are the right size */
-		if (fs->fs_magic == FS_UFS1_MAGIC &&
-		    fs->fs_old_size * fs->fs_fsize == (int32_t)pp->mediasize) {
+		if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_fsize > 0 &&
+		    pp->mediasize / fs->fs_fsize == fs->fs_old_size) {
 		    	/* Valid UFS1. */
 		} else if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_fsize > 0 &&
 		    pp->mediasize / fs->fs_fsize == fs->fs_size) {
Index: g_label_reiserfs.c
===================================================================
RCS file: /home/cvs/src/sys/geom/label/g_label_reiserfs.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/label/g_label_reiserfs.c -L sys/geom/label/g_label_reiserfs.c -u -r1.1.1.1 -r1.2
--- sys/geom/label/g_label_reiserfs.c
+++ sys/geom/label/g_label_reiserfs.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/label/g_label_reiserfs.c,v 1.1.2.2 2005/08/30 15:18:54 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/label/g_label_reiserfs.c,v 1.2 2005/08/23 18:55:38 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
Index: g_label_ntfs.c
===================================================================
RCS file: /home/cvs/src/sys/geom/label/g_label_ntfs.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/label/g_label_ntfs.c -L sys/geom/label/g_label_ntfs.c -u -r1.1.1.1 -r1.2
--- sys/geom/label/g_label_ntfs.c
+++ sys/geom/label/g_label_ntfs.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/label/g_label_ntfs.c,v 1.5.2.2 2006/01/17 08:59:55 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/label/g_label_ntfs.c,v 1.6 2006/01/18 11:03:20 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
Index: g_label_iso9660.c
===================================================================
RCS file: /home/cvs/src/sys/geom/label/g_label_iso9660.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/label/g_label_iso9660.c -L sys/geom/label/g_label_iso9660.c -u -r1.2 -r1.3
--- sys/geom/label/g_label_iso9660.c
+++ sys/geom/label/g_label_iso9660.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/label/g_label_iso9660.c,v 1.1.8.3 2006/03/01 17:53:57 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/label/g_label_iso9660.c,v 1.5 2006/02/01 12:06:00 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
Index: g_label_msdosfs.c
===================================================================
RCS file: /home/cvs/src/sys/geom/label/g_label_msdosfs.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/label/g_label_msdosfs.c -L sys/geom/label/g_label_msdosfs.c -u -r1.2 -r1.3
--- sys/geom/label/g_label_msdosfs.c
+++ sys/geom/label/g_label_msdosfs.c
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd at FreeBSD.org>
+ * Copyright (c) 2006 Tobias Reifenberger
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/label/g_label_msdosfs.c,v 1.1.8.2 2006/03/01 17:53:57 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/label/g_label_msdosfs.c,v 1.6 2006/09/30 08:16:49 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -34,57 +35,171 @@
 
 #include <geom/geom.h>
 #include <geom/label/g_label.h>
+#include <geom/label/g_label_msdosfs.h>
 
 #define G_LABEL_MSDOSFS_DIR	"msdosfs"
-
-#define	FAT12	"FAT12   "
-#define	FAT16	"FAT16   "
-#define	FAT32	"FAT32   "
-#define	VOLUME_LEN	11
-#define NO_NAME "NO NAME    "
-
+#define LABEL_NO_NAME		"NO NAME    "
 
 static void
 g_label_msdosfs_taste(struct g_consumer *cp, char *label, size_t size)
 {
 	struct g_provider *pp;
-	char *sector, *volume;
-	int i;
+	FAT_BSBPB *pfat_bsbpb;
+	FAT32_BSBPB *pfat32_bsbpb;
+	FAT_DES *pfat_entry;
+	uint8_t *sector0, *sector;
+	uint32_t i;
 
 	g_topology_assert_not();
 	pp = cp->provider;
-	label[0] = '\0';
+	sector0 = NULL;
+	sector = NULL;
+	bzero(label, size);
 
-	sector = (char *)g_read_data(cp, 0, pp->sectorsize, NULL);
-	if (sector == NULL)
+	/* Check if the sector size of the medium is a valid FAT sector size. */
+	switch(pp->sectorsize) {
+	case 512:
+	case 1024:
+	case 2048:
+	case 4096:
+		break;
+	default:
+		G_LABEL_DEBUG(1, "MSDOSFS: %s: sector size %d not compatible.",
+		    pp->name, pp->sectorsize);
 		return;
-	if (strncmp(sector + 0x36, FAT12, strlen(FAT12)) == 0) {
-		G_LABEL_DEBUG(1, "MSDOS (FAT12) file system detected on %s.",
+	}
+
+	/* Load 1st sector with boot sector and boot parameter block. */
+	sector0 = (uint8_t *)g_read_data(cp, 0, pp->sectorsize, NULL);
+	if (sector0 == NULL)
+		return;
+
+	/* Check for the FAT boot sector signature. */
+	if (sector0[510] != 0x55 || sector0[511] != 0xaa) {
+		G_LABEL_DEBUG(1, "MSDOSFS: %s: no FAT signature found.",
 		    pp->name);
-		volume = sector + 0x2b;
-	} else if (strncmp(sector + 0x36, FAT16, strlen(FAT16)) == 0) {
-		G_LABEL_DEBUG(1, "MSDOS (FAT16) file system detected on %s.",
+		goto error;
+	}
+
+
+	/*
+	 * Test if this is really a FAT volume and determine the FAT type.
+	 */
+
+	pfat_bsbpb = (FAT_BSBPB *)sector0;
+	pfat32_bsbpb = (FAT32_BSBPB *)sector0;
+
+	if (UINT16BYTES(pfat_bsbpb->BPB_FATSz16) != 0) {
+		/*
+		 * If the BPB_FATSz16 field is not zero and the string "FAT" is
+		 * at the right place, this should be a FAT12 or FAT16 volume.
+		 */
+		if (strncmp(pfat_bsbpb->BS_FilSysType, "FAT", 3) != 0) {
+			G_LABEL_DEBUG(1,
+			    "MSDOSFS: %s: FAT12/16 volume not valid.",
+			    pp->name);
+			goto error;
+		}
+		G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT12/FAT16 volume detected.",
 		    pp->name);
-		volume = sector + 0x2b;
-	} else if (strncmp(sector + 0x52, FAT32, strlen(FAT32)) == 0) {
-		G_LABEL_DEBUG(1, "MSDOS (FAT32) file system detected on %s.",
+
+		/* A volume with no name should have "NO NAME    " as label. */
+		if (strncmp(pfat_bsbpb->BS_VolLab, LABEL_NO_NAME,
+		    sizeof(pfat_bsbpb->BS_VolLab)) == 0) {
+			G_LABEL_DEBUG(1,
+			    "MSDOSFS: %s: FAT12/16 volume has no name.",
+			    pp->name);
+			goto error;
+		}
+		strlcpy(label, pfat_bsbpb->BS_VolLab,
+		    MIN(size, sizeof(pfat_bsbpb->BS_VolLab) + 1));
+	} else if (UINT32BYTES(pfat32_bsbpb->BPB_FATSz32) != 0) {
+		uint32_t fat_FirstDataSector, fat_BytesPerSector, offset;
+
+		/*
+		 * If the BPB_FATSz32 field is not zero and the string "FAT" is
+		 * at the right place, this should be a FAT32 volume.
+		 */
+		if (strncmp(pfat32_bsbpb->BS_FilSysType, "FAT", 3) != 0) {
+			G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT32 volume not valid.",
+			    pp->name);
+			goto error;
+		}
+		G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT32 volume detected.",
 		    pp->name);
-		volume = sector + 0x47;
+
+		/*
+		 * If the volume label is not "NO NAME    " we're done.
+		 */
+		if (strncmp(pfat32_bsbpb->BS_VolLab, LABEL_NO_NAME,
+		    sizeof(pfat32_bsbpb->BS_VolLab)) != 0) {
+			strlcpy(label, pfat32_bsbpb->BS_VolLab,
+			    MIN(size, sizeof(pfat32_bsbpb->BS_VolLab) + 1));
+			goto endofchecks;
+		}
+
+		/*
+		 * If the volume label "NO NAME    " is in the boot sector, the
+		 * label of FAT32 volumes may be stored as a special entry in
+		 * the root directory.
+		 */
+		fat_FirstDataSector =
+		    UINT16BYTES(pfat32_bsbpb->BPB_RsvdSecCnt) +
+		    (pfat32_bsbpb->BPB_NumFATs *
+		     UINT32BYTES(pfat32_bsbpb->BPB_FATSz32));
+		fat_BytesPerSector = UINT16BYTES(pfat32_bsbpb->BPB_BytsPerSec);
+
+		G_LABEL_DEBUG(2,
+		    "MSDOSFS: FAT_FirstDataSector=0x%x, FAT_BytesPerSector=%d",
+		    fat_FirstDataSector, fat_BytesPerSector);
+
+		for (offset = fat_BytesPerSector * fat_FirstDataSector;;
+		    offset += fat_BytesPerSector) {
+			sector = (uint8_t *)g_read_data(cp, offset,
+			    fat_BytesPerSector, NULL);
+			if (sector == NULL)
+				goto error;
+
+			pfat_entry = (FAT_DES *)sector;
+			do {
+				/* No more entries available. */
+				if (pfat_entry->DIR_Name[0] == 0) {
+					G_LABEL_DEBUG(1, "MSDOSFS: %s: "
+					    "FAT32 volume has no name.",
+					    pp->name);
+					goto error;
+				}
+
+				/* Skip empty or long name entries. */
+				if (pfat_entry->DIR_Name[0] == 0xe5 ||
+				    (pfat_entry->DIR_Attr &
+				     FAT_DES_ATTR_LONG_NAME) ==
+				    FAT_DES_ATTR_LONG_NAME) {
+					continue;
+				}
+
+				/*
+				 * The name of the entry is the volume label if
+				 * ATTR_VOLUME_ID is set.
+				 */
+				if (pfat_entry->DIR_Attr &
+				    FAT_DES_ATTR_VOLUME_ID) {
+					strlcpy(label, pfat_entry->DIR_Name,
+					    MIN(size,
+					    sizeof(pfat_bsbpb->BS_VolLab) + 1));
+					goto endofchecks;
+				}
+			} while((uint8_t *)(++pfat_entry) <
+			    (uint8_t *)(sector + fat_BytesPerSector));
+			g_free(sector);
+		}
 	} else {
-		g_free(sector);
-		return;
-	}
-	if (strncmp(volume, NO_NAME, VOLUME_LEN) == 0) {
-		g_free(sector);
-		return;
-	}
-	if (volume[0] == '\0') {
-		g_free(sector);
-		return;
+		G_LABEL_DEBUG(1, "MSDOSFS: %s: no FAT volume detected.",
+		    pp->name);
+		goto error;
 	}
-	bzero(label, size);
-	strlcpy(label, volume, MIN(size, VOLUME_LEN));
-	g_free(sector);
+
+endofchecks:
 	for (i = size - 1; i > 0; i--) {
 		if (label[i] == '\0')
 			continue;
@@ -93,6 +208,12 @@
 		else
 			break;
 	}
+
+error:
+	if (sector0 != NULL)
+		g_free(sector0);
+	if (sector != NULL)
+		g_free(sector);
 }
 
 const struct g_label_desc g_label_msdosfs = {
Index: g_label.c
===================================================================
RCS file: /home/cvs/src/sys/geom/label/g_label.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/label/g_label.c -L sys/geom/label/g_label.c -u -r1.2 -r1.3
--- sys/geom/label/g_label.c
+++ sys/geom/label/g_label.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/label/g_label.c,v 1.13.2.4 2006/03/01 17:53:57 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/label/g_label.c,v 1.21 2006/08/12 15:30:24 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -36,6 +36,7 @@
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
+#include <sys/libkern.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 #include <geom/label/g_label.h>
@@ -116,6 +117,23 @@
 	g_slice_spoiled(cp);
 }
 
+static int
+g_label_is_name_ok(const char *label)
+{
+	const char *s;
+
+	/* Check is the label starts from ../ */
+	if (strncmp(label, "../", 3) == 0)
+		return (0);
+	/* Check is the label contains /../ */
+	if (strstr(label, "/../") != NULL)
+		return (0);
+	/* Check is the label ends at ../ */
+	if ((s = strstr(label, "/..")) != NULL && s[3] == '\0')
+		return (0);
+	return (1);
+}
+
 static struct g_geom *
 g_label_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
     const char *label, const char *dir, off_t mediasize)
@@ -127,6 +145,12 @@
 
 	g_topology_assert();
 
+	if (!g_label_is_name_ok(label)) {
+		G_LABEL_DEBUG(0, "%s contains suspicious label, skipping.",
+		    pp->name);
+		G_LABEL_DEBUG(1, "%s suspicious label is: %s", pp->name, label);
+		return (NULL);
+	}
 	gp = NULL;
 	cp = NULL;
 	snprintf(name, sizeof(name), "%s/%s", dir, label);
Index: g_label.h
===================================================================
RCS file: /home/cvs/src/sys/geom/label/g_label.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/label/g_label.h -L sys/geom/label/g_label.h -u -r1.2 -r1.3
--- sys/geom/label/g_label.h
+++ sys/geom/label/g_label.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/label/g_label.h,v 1.4.2.3 2006/03/01 17:53:57 pjd Exp $
+ * $FreeBSD: src/sys/geom/label/g_label.h,v 1.7 2006/02/01 12:06:00 pjd Exp $
  */
 
 #ifndef	_G_LABEL_H_
Index: g_label_ext2fs.c
===================================================================
RCS file: /home/cvs/src/sys/geom/label/g_label_ext2fs.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/geom/label/g_label_ext2fs.c -L sys/geom/label/g_label_ext2fs.c -u -r1.1.1.1 -r1.2
--- sys/geom/label/g_label_ext2fs.c
+++ sys/geom/label/g_label_ext2fs.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/label/g_label_ext2fs.c,v 1.1.2.2 2005/08/30 15:18:54 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/label/g_label_ext2fs.c,v 1.2 2005/08/23 18:55:38 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
Index: g_mirror.c
===================================================================
RCS file: /home/cvs/src/sys/geom/mirror/g_mirror.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/mirror/g_mirror.c -L sys/geom/mirror/g_mirror.c -u -r1.2 -r1.3
--- sys/geom/mirror/g_mirror.c
+++ sys/geom/mirror/g_mirror.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/mirror/g_mirror.c,v 1.66.2.4.2.1 2006/04/26 06:35:10 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/mirror/g_mirror.c,v 1.93 2007/06/05 00:00:52 jeff Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -78,7 +78,7 @@
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
-static eventhandler_tag g_mirror_pre_sync = NULL, g_mirror_post_sync = NULL;
+static eventhandler_tag g_mirror_pre_sync = NULL;
 
 static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
@@ -539,13 +539,11 @@
 		}
 	}
 	callout_drain(&sc->sc_callout);
-	gp->softc = NULL;
 
 	g_topology_lock();
 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
 		g_mirror_disconnect_consumer(sc, cp);
 	}
-	sc->sc_sync.ds_geom->softc = NULL;
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
@@ -804,6 +802,8 @@
 
 	if (sc->sc_provider == NULL)
 		return (0);
+	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
+		return (0);
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_writes > 0)
@@ -833,6 +833,8 @@
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
+	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
+		return;
 	sc->sc_idle = 0;
 	sc->sc_last_write = time_uptime;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
@@ -876,7 +878,7 @@
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
-	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
+	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_disksort(&sc->sc_queue, bp);
 	wakeup(sc);
@@ -1004,7 +1006,7 @@
 
 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
-	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
+	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_disksort(&sc->sc_queue, bp);
 	wakeup(sc);
@@ -1044,6 +1046,48 @@
 }
 
 static void
+g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
+{
+	struct bio_queue_head queue;
+	struct g_mirror_disk *disk;
+	struct g_consumer *cp;
+	struct bio *cbp;
+
+	bioq_init(&queue);
+	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
+		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
+			continue;
+		cbp = g_clone_bio(bp);
+		if (cbp == NULL) {
+			for (cbp = bioq_first(&queue); cbp != NULL;
+			    cbp = bioq_first(&queue)) {
+				bioq_remove(&queue, cbp);
+				g_destroy_bio(cbp);
+			}
+			if (bp->bio_error == 0)
+				bp->bio_error = ENOMEM;
+			g_io_deliver(bp, bp->bio_error);
+			return;
+		}
+		bioq_insert_tail(&queue, cbp);
+		cbp->bio_done = g_std_done;
+		cbp->bio_caller1 = disk;
+		cbp->bio_to = disk->d_consumer->provider;
+	}
+	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
+		bioq_remove(&queue, cbp);
+		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
+		disk = cbp->bio_caller1;
+		cbp->bio_caller1 = NULL;
+		cp = disk->d_consumer;
+		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
+		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
+		    cp->acr, cp->acw, cp->ace));
+		g_io_request(cbp, disk->d_consumer);
+	}
+}
+
+static void
 g_mirror_start(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
@@ -1063,6 +1107,9 @@
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
+	case BIO_FLUSH:
+		g_mirror_flush(sc, bp);
+		return;
 	case BIO_GETATTR:
 		if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
 			g_mirror_kernel_dump(bp);
@@ -1660,6 +1707,8 @@
 
 	g_topology_assert();
 	gp = sc->sc_geom;
+	if (gp->softc == NULL)
+		return (1);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
@@ -1689,6 +1738,8 @@
 		g_topology_unlock();
 		return (0);
 	}
+	sc->sc_geom->softc = NULL;
+	sc->sc_sync.ds_geom->softc = NULL;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
 		g_topology_unlock();
 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
@@ -1717,9 +1768,9 @@
 	int timeout;
 
 	sc = arg;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
@@ -1782,14 +1833,6 @@
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = bioq_first(&sc->sc_queue);
 		if (bp == NULL) {
-			if (ep != NULL) {
-				/*
-				 * We have a pending even, try to serve it
-				 * again.
-				 */
-				mtx_unlock(&sc->sc_queue_mtx);
-				continue;
-			}
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
@@ -1801,6 +1844,15 @@
 				mtx_lock(&sc->sc_queue_mtx);
 			}
 			sx_xunlock(&sc->sc_lock);
+			/*
+			 * XXX: We can miss an event here, because an event
+			 *      can be added without sx-device-lock and without
+			 *      mtx-queue-lock. Maybe I should just stop using
+			 *      dedicated mutex for events synchronization and
+			 *      stick with the queue lock?
+			 *      The event will hang here until next I/O request
+			 *      or next event is received.
+			 */
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
 			    timeout * hz);
 			sx_xlock(&sc->sc_lock);
@@ -1810,12 +1862,22 @@
 		bioq_remove(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 
-		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
-			g_mirror_regular_request(bp);
-		else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
-			g_mirror_sync_request(bp);
-		else
+		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
+		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
+			g_mirror_sync_request(bp);	/* READ */
+		} else if (bp->bio_to != sc->sc_provider) {
+			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
+				g_mirror_regular_request(bp);
+			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
+				g_mirror_sync_request(bp);	/* WRITE */
+			else {
+				KASSERT(0,
+				    ("Invalid request cflags=0x%hhx to=%s.",
+				    bp->bio_cflags, bp->bio_to->name));
+			}
+		} else {
 			g_mirror_register_request(bp);
+		}
 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
@@ -1826,6 +1888,8 @@
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
+	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
+		return;
 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
@@ -1870,7 +1934,8 @@
 
 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_mirror_get_diskname(disk));
-	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
+	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
+		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	KASSERT(disk->d_sync.ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_mirror_get_diskname(disk)));
@@ -1978,8 +2043,8 @@
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
-	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
-	    pp->name);
+	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
+	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_start(disk);
@@ -2074,9 +2139,9 @@
 		 * Not good, NOT GOOD!
 		 * It means that mirror was started on stale disks
 		 * and more fresh disk just arrive.
-		 * If there were writes, mirror is fucked up, sorry.
+		 * If there were writes, mirror is broken, sorry.
 		 * I think the best choice here is don't touch
-		 * this disk and inform the user laudly.
+		 * this disk and inform the user loudly.
 		 */
 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
@@ -2381,7 +2446,7 @@
 			if (dp != NULL)
 				LIST_INSERT_AFTER(dp, disk, d_next);
 		}
-		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
+		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			break;
@@ -2422,7 +2487,7 @@
 		disk->d_sync.ds_offset_done = 0;
 		g_mirror_update_idle(sc, disk);
 		g_mirror_update_metadata(disk);
-		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
+		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_STALE:
@@ -2697,36 +2762,75 @@
 	return (0);
 }
 
+static void
+g_mirror_destroy_delayed(void *arg, int flag)
+{
+	struct g_mirror_softc *sc;
+	int error;
+
+	if (flag == EV_CANCEL) {
+		G_MIRROR_DEBUG(1, "Destroying canceled.");
+		return;
+	}
+	sc = arg;
+	g_topology_unlock();
+	sx_xlock(&sc->sc_lock);
+	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
+	    ("DESTROY flag set on %s.", sc->sc_name));
+	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
+	    ("DESTROYING flag not set on %s.", sc->sc_name));
+	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
+	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
+	if (error != 0) {
+		G_MIRROR_DEBUG(0, "Cannot destroy %s.", sc->sc_name);
+		sx_xunlock(&sc->sc_lock);
+	}
+	g_topology_lock();
+}
+
 static int
 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_mirror_softc *sc;
-	int dcr, dcw, dce;
+	int dcr, dcw, dce, error = 0;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
+	sc = pp->geom->softc;
+	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
+		return (0);
+	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
+
 	dcr = pp->acr + acr;
 	dcw = pp->acw + acw;
 	dce = pp->ace + ace;
 
-	sc = pp->geom->softc;
-	if (sc == NULL || LIST_EMPTY(&sc->sc_disks) ||
-	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
-		if (acr <= 0 && acw <= 0 && ace <= 0)
-			return (0);
-		else
-			return (ENXIO);
+	g_topology_unlock();
+	sx_xlock(&sc->sc_lock);
+	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
+	    LIST_EMPTY(&sc->sc_disks)) {
+		if (acr > 0 || acw > 0 || ace > 0)
+			error = ENXIO;
+		goto end;
 	}
-	if (dcw == 0 && !sc->sc_idle) {
-		g_topology_unlock();
-		sx_xlock(&sc->sc_lock);
+	if (dcw == 0 && !sc->sc_idle)
 		g_mirror_idle(sc, dcw);
-		sx_xunlock(&sc->sc_lock);
-		g_topology_lock();
+	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0) {
+		if (acr > 0 || acw > 0 || ace > 0) {
+			error = ENXIO;
+			goto end;
+		}
+		if (dcr == 0 && dcw == 0 && dce == 0) {
+			g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK,
+			    sc, NULL);
+		}
 	}
-	return (0);
+end:
+	sx_xunlock(&sc->sc_lock);
+	g_topology_lock();
+	return (error);
 }
 
 static struct g_geom *
@@ -2800,7 +2904,8 @@
 		return (NULL);
 	}
 
-	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
+	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
+	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GMIRROR");
 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
@@ -2813,8 +2918,9 @@
 }
 
 int
-g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
+g_mirror_destroy(struct g_mirror_softc *sc, int how)
 {
+	struct g_mirror_disk *disk;
 	struct g_provider *pp;
 
 	g_topology_assert_not();
@@ -2824,17 +2930,39 @@
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
-		if (force) {
-			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
-			    "can't be definitely removed.", pp->name);
-		} else {
+		switch (how) {
+		case G_MIRROR_DESTROY_SOFT:
 			G_MIRROR_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
+		case G_MIRROR_DESTROY_DELAYED:
+			G_MIRROR_DEBUG(1,
+			    "Device %s will be destroyed on last close.",
+			    pp->name);
+			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
+				if (disk->d_state ==
+				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
+					g_mirror_sync_stop(disk, 1);
+				}
+			}
+			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
+			return (EBUSY);
+		case G_MIRROR_DESTROY_HARD:
+			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
+			    "can't be definitely removed.", pp->name);
 		}
 	}
 
+	g_topology_lock();
+	if (sc->sc_geom->softc == NULL) {
+		g_topology_unlock();
+		return (0);
+	}
+	sc->sc_geom->softc = NULL;
+	sc->sc_sync.ds_geom->softc = NULL;
+	g_topology_unlock();
+
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
@@ -2937,7 +3065,8 @@
 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (LIST_EMPTY(&sc->sc_disks)) {
-			g_mirror_destroy(sc, 1);
+			g_cancel_event(sc);
+			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
 			g_topology_lock();
 			return (NULL);
 		}
@@ -2958,7 +3087,8 @@
 	g_topology_unlock();
 	sc = gp->softc;
 	sx_xlock(&sc->sc_lock);
-	error = g_mirror_destroy(gp->softc, 0);
+	g_cancel_event(sc);
+	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
 	if (error != 0)
 		sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
@@ -3057,6 +3187,7 @@
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
+			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 #undef	ADD_FLAG
 		}
@@ -3087,36 +3218,7 @@
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_mirror_softc *sc;
-	struct g_mirror_disk *disk;
-
-	mp = arg;
-	DROP_GIANT();
-	g_topology_lock();
-	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
-		if ((sc = gp->softc) == NULL)
-			continue;
-		/* Skip synchronization geom. */
-		if (gp == sc->sc_sync.ds_geom)
-			continue;
-		g_topology_unlock();
-		sx_xlock(&sc->sc_lock);
-		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
-			if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
-				g_mirror_sync_stop(disk, 1);
-		}
-		sx_xunlock(&sc->sc_lock);
-		g_topology_lock();
-	}
-	g_topology_unlock();
-	PICKUP_GIANT();
-}
-
-static void
-g_mirror_shutdown_post_sync(void *arg, int howto)
-{
-	struct g_class *mp;
-	struct g_geom *gp, *gp2;
-	struct g_mirror_softc *sc;
+	int error;
 
 	mp = arg;
 	DROP_GIANT();
@@ -3129,14 +3231,14 @@
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
-		g_mirror_destroy(sc, 1);
+		g_cancel_event(sc);
+		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
+		if (error != 0)
+			sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 	g_topology_unlock();
 	PICKUP_GIANT();
-#if 0
-	tsleep(&gp, PRIBIO, "m:shutdown", hz * 20);
-#endif
 }
 
 static void
@@ -3145,9 +3247,7 @@
 
 	g_mirror_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 	    g_mirror_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
-	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
-	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
-	if (g_mirror_pre_sync == NULL || g_mirror_post_sync == NULL)
+	if (g_mirror_pre_sync == NULL)
 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
@@ -3157,8 +3257,6 @@
 
 	if (g_mirror_pre_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_mirror_pre_sync);
-	if (g_mirror_post_sync != NULL)
-		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
 }
 
 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
Index: g_mirror_ctl.c
===================================================================
RCS file: /home/cvs/src/sys/geom/mirror/g_mirror_ctl.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/mirror/g_mirror_ctl.c -L sys/geom/mirror/g_mirror_ctl.c -u -r1.2 -r1.3
--- sys/geom/mirror/g_mirror_ctl.c
+++ sys/geom/mirror/g_mirror_ctl.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/mirror/g_mirror_ctl.c,v 1.11.2.1 2006/03/20 15:48:55 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/mirror/g_mirror_ctl.c,v 1.18 2006/11/01 22:51:49 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -75,6 +75,8 @@
 	struct g_mirror_disk *disk;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
+	if (strncmp(name, "/dev/", 5) == 0)
+		name += 5;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_consumer == NULL)
 			continue;
@@ -95,7 +97,8 @@
 	intmax_t *slicep;
 	uint32_t slice;
 	uint8_t balance;
-	int *nargs, *autosync, *noautosync, *hardcode, *dynamic, do_sync = 0;
+	int *autosync, *noautosync, *failsync, *nofailsync, *hardcode, *dynamic;
+	int *nargs, do_sync = 0, dirty = 1;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
@@ -126,6 +129,16 @@
 		gctl_error(req, "No '%s' argument.", "noautosync");
 		return;
 	}
+	failsync = gctl_get_paraml(req, "failsync", sizeof(*failsync));
+	if (failsync == NULL) {
+		gctl_error(req, "No '%s' argument.", "failsync");
+		return;
+	}
+	nofailsync = gctl_get_paraml(req, "nofailsync", sizeof(*nofailsync));
+	if (nofailsync == NULL) {
+		gctl_error(req, "No '%s' argument.", "nofailsync");
+		return;
+	}
 	hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
 	if (hardcode == NULL) {
 		gctl_error(req, "No '%s' argument.", "hardcode");
@@ -141,6 +154,11 @@
 		    "noautosync");
 		return;
 	}
+	if (*failsync && *nofailsync) {
+		gctl_error(req, "'%s' and '%s' specified.", "failsync",
+		    "nofailsync");
+		return;
+	}
 	if (*hardcode && *dynamic) {
 		gctl_error(req, "'%s' and '%s' specified.", "hardcode",
 		    "dynamic");
@@ -178,7 +196,8 @@
 		return;
 	}
 	if (sc->sc_balance == balance && sc->sc_slice == slice && !*autosync &&
-	    !*noautosync && !*hardcode && !*dynamic) {
+	    !*noautosync && !*failsync && !*nofailsync && !*hardcode &&
+	    !*dynamic) {
 		sx_xunlock(&sc->sc_lock);
 		gctl_error(req, "Nothing has changed.");
 		return;
@@ -194,6 +213,15 @@
 		if (*noautosync)
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_NOAUTOSYNC;
 	}
+	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0) {
+		if (*failsync)
+			sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_NOFAILSYNC;
+	} else {
+		if (*nofailsync) {
+			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_NOFAILSYNC;
+			dirty = 0;
+		}
+	}
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (do_sync) {
 			if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
@@ -203,6 +231,8 @@
 			disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
 		else if (*dynamic)
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_HARDCODED;
+		if (!dirty)
+			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 		if (do_sync) {
 			if (disk->d_state == G_MIRROR_DISK_STATE_STALE) {
@@ -366,12 +396,12 @@
 			gctl_error(req, "No 'arg%u' argument.", i);
 			continue;
 		}
-		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
-			name += strlen("/dev/");
 		if (g_mirror_find_disk(sc, name) != NULL) {
 			gctl_error(req, "Provider %s already inserted.", name);
 			continue;
 		}
+		if (strncmp(name, "/dev/", 5) == 0)
+			name += 5;
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			gctl_error(req, "Unknown provider %s.", name);
@@ -628,6 +658,7 @@
 	const char *name;
 	char param[16];
 	u_int i;
+	int how;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
@@ -643,6 +674,10 @@
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
+	if (*force)
+		how = G_MIRROR_DESTROY_HARD;
+	else
+		how = G_MIRROR_DESTROY_SOFT;
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
@@ -656,7 +691,8 @@
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
-		error = g_mirror_destroy(sc, *force);
+		g_cancel_event(sc);
+		error = g_mirror_destroy(sc, how);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_geom->name, error);
Index: g_mirror.h
===================================================================
RCS file: /home/cvs/src/sys/geom/mirror/g_mirror.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/geom/mirror/g_mirror.h -L sys/geom/mirror/g_mirror.h -u -r1.2 -r1.3
--- sys/geom/mirror/g_mirror.h
+++ sys/geom/mirror/g_mirror.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/geom/mirror/g_mirror.h,v 1.17.2.1 2006/03/20 15:48:55 pjd Exp $
+ * $FreeBSD: src/sys/geom/mirror/g_mirror.h,v 1.24 2006/11/01 22:51:49 pjd Exp $
  */
 
 #ifndef	_G_MIRROR_H_
@@ -41,8 +41,9 @@
  * 1 - Added 'prefer' balance algorithm.
  * 2 - Added md_genid field to metadata.
  * 3 - Added md_provsize field to metadata.
+ * 4 - Added 'no failure synchronization' flag.
  */
-#define	G_MIRROR_VERSION	3
+#define	G_MIRROR_VERSION	4
 
 #define	G_MIRROR_BALANCE_NONE		0
 #define	G_MIRROR_BALANCE_ROUND_ROBIN	1
@@ -64,7 +65,9 @@
 					 G_MIRROR_DISK_FLAG_INACTIVE)
 
 #define	G_MIRROR_DEVICE_FLAG_NOAUTOSYNC	0x0000000000000001ULL
-#define	G_MIRROR_DEVICE_FLAG_MASK	(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC)
+#define	G_MIRROR_DEVICE_FLAG_NOFAILSYNC	0x0000000000000002ULL
+#define	G_MIRROR_DEVICE_FLAG_MASK	(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC | \
+					 G_MIRROR_DEVICE_FLAG_NOFAILSYNC)
 
 #ifdef _KERNEL
 extern u_int g_mirror_debug;
@@ -153,6 +156,7 @@
 
 #define	G_MIRROR_DEVICE_FLAG_DESTROY	0x0100000000000000ULL
 #define	G_MIRROR_DEVICE_FLAG_WAIT	0x0200000000000000ULL
+#define	G_MIRROR_DEVICE_FLAG_DESTROYING	0x0400000000000000ULL
 
 #define	G_MIRROR_DEVICE_STATE_STARTING		0
 #define	G_MIRROR_DEVICE_STATE_RUNNING		1
@@ -209,7 +213,10 @@
 #define	sc_name	sc_geom->name
 
 u_int g_mirror_ndisks(struct g_mirror_softc *sc, int state);
-int g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force);
+#define	G_MIRROR_DESTROY_SOFT		0
+#define	G_MIRROR_DESTROY_DELAYED	1
+#define	G_MIRROR_DESTROY_HARD		2
+int g_mirror_destroy(struct g_mirror_softc *sc, int how);
 int g_mirror_event_send(void *arg, int state, int flags);
 struct g_mirror_metadata;
 int g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
@@ -337,7 +344,7 @@
 	return (0);
 }
 static __inline int
-mirror_metadata_decode_v3(const u_char *data, struct g_mirror_metadata *md)
+mirror_metadata_decode_v3v4(const u_char *data, struct g_mirror_metadata *md)
 {
 	MD5_CTX ctx;
 
@@ -381,7 +388,8 @@
 		error = mirror_metadata_decode_v2(data, md);
 		break;
 	case 3:
-		error = mirror_metadata_decode_v3(data, md);
+	case 4:
+		error = mirror_metadata_decode_v3v4(data, md);
 		break;
 	default:
 		error = EINVAL;
@@ -452,6 +460,8 @@
 	if (md->md_mflags == 0)
 		printf(" NONE");
 	else {
+		if ((md->md_mflags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
+			printf(" NOFAILSYNC");
 		if ((md->md_mflags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0)
 			printf(" NOAUTOSYNC");
 	}
Index: g_zero.c
===================================================================
RCS file: /home/cvs/src/sys/geom/zero/g_zero.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/geom/zero/g_zero.c -L sys/geom/zero/g_zero.c -u -r1.1.1.2 -r1.2
--- sys/geom/zero/g_zero.c
+++ sys/geom/zero/g_zero.c
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/zero/g_zero.c,v 1.1.2.2 2006/01/17 11:49:09 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/zero/g_zero.c,v 1.4 2006/02/01 12:06:01 pjd Exp $");
 
 #include <sys/param.h>
 #include <sys/bio.h>


More information about the Midnightbsd-cvs mailing list