[Midnightbsd-cvs] src [10228] trunk/cddl/contrib/opensolaris/cmd: sync

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sat Jun 2 12:07:18 EDT 2018


Revision: 10228
          http://svnweb.midnightbsd.org/src/?rev=10228
Author:   laffer1
Date:     2018-06-02 12:07:17 -0400 (Sat, 02 Jun 2018)
Log Message:
-----------
sync

Modified Paths:
--------------
    trunk/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c
    trunk/cddl/contrib/opensolaris/cmd/lockstat/sym.c
    trunk/cddl/contrib/opensolaris/cmd/mdb/tools/common/die.c
    trunk/cddl/contrib/opensolaris/cmd/mdb/tools/common/util.h
    trunk/cddl/contrib/opensolaris/cmd/plockstat/plockstat.c
    trunk/cddl/contrib/opensolaris/cmd/sgs/include/_string_table.h
    trunk/cddl/contrib/opensolaris/cmd/sgs/include/alist.h
    trunk/cddl/contrib/opensolaris/cmd/sgs/include/debug.h
    trunk/cddl/contrib/opensolaris/cmd/sgs/include/sgs.h
    trunk/cddl/contrib/opensolaris/cmd/sgs/include/string_table.h
    trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/findprime.c
    trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c
    trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/string_table.c
    trunk/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h
    trunk/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c
    trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.8
    trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.c
    trunk/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
    trunk/cddl/contrib/opensolaris/cmd/zhack/zhack.c
    trunk/cddl/contrib/opensolaris/cmd/zinject/translate.c
    trunk/cddl/contrib/opensolaris/cmd/zinject/zinject.c
    trunk/cddl/contrib/opensolaris/cmd/zinject/zinject.h
    trunk/cddl/contrib/opensolaris/cmd/zlook/zlook.c
    trunk/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7
    trunk/cddl/contrib/opensolaris/cmd/zpool/zpool.8
    trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c
    trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
    trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
    trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
    trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
    trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1
    trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
    trunk/cddl/contrib/opensolaris/cmd/ztest/ztest.c

Property Changed:
----------------
    trunk/cddl/contrib/opensolaris/cmd/lockstat/lockstat.1
    trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.8
    trunk/cddl/contrib/opensolaris/cmd/zpool/zpool.8
    trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1

Index: trunk/cddl/contrib/opensolaris/cmd/lockstat/lockstat.1
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/lockstat/lockstat.1	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/lockstat/lockstat.1	2018-06-02 16:07:17 UTC (rev 10228)

Property changes on: trunk/cddl/contrib/opensolaris/cmd/lockstat/lockstat.1
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -46,10 +47,9 @@
 #include <signal.h>
 #include <assert.h>
 
-#if defined(sun)
+#ifdef illumos
 #define	GETOPT_EOF	EOF
 #else
-/* FreeBSD */ 
 #include <sys/time.h>
 #include <sys/resource.h>
 
@@ -57,7 +57,7 @@
 #define	GETOPT_EOF		(-1)
 
 typedef	uintptr_t	pc_t;
-#endif /* defined(sun) */
+#endif
 
 #define	LOCKSTAT_OPTSTR	"x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V"
 
@@ -158,14 +158,22 @@
 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 1" },
 	{ 'C',	"Lock",	"R/W reader blocked by write wanted",	"nsec",
 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 0 && arg4" },
-	{ 'C',	"Lock",	"Unknown event (type 8)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 9)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 10)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 11)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 12)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 13)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 14)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 15)",		"units"	},
+	{ 'C',	"Lock",	"R/W writer spin on writer",		"nsec",
+	    "lockstat:::rw-spin", "arg2 == 0 && arg3 == 1" },
+	{ 'C',	"Lock",	"R/W writer spin on readers",		"nsec",
+	    "lockstat:::rw-spin", "arg2 == 0 && arg3 == 0 && arg4" },
+	{ 'C',	"Lock",	"R/W reader spin on writer",		"nsec",
+	    "lockstat:::rw-spin", "arg2 != 0 && arg3 == 1" },
+	{ 'C',	"Lock",	"R/W reader spin on write wanted",	"nsec",
+	    "lockstat:::rw-spin", "arg2 != 0 && arg3 == 0 && arg4" },
+	{ 'C',	"Lock",	"SX exclusive block",			"nsec",
+	    "lockstat:::sx-block", "arg2 == 0" },
+	{ 'C',	"Lock",	"SX shared block",			"nsec",
+	    "lockstat:::sx-block", "arg2 != 0" },
+	{ 'C',	"Lock",	"SX exclusive spin",			"nsec",
+	    "lockstat:::sx-spin", "arg2 == 0" },
+	{ 'C',	"Lock",	"SX shared spin",			"nsec",
+	    "lockstat:::sx-spin", "arg2 != 0" },
 	{ 'C',	"Lock",	"Unknown event (type 16)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 17)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 18)",		"units"	},
@@ -189,13 +197,17 @@
 	    "lockstat:::spin-release", NULL,
 	    "lockstat:::spin-acquire" },
 	{ 'H',	"Lock",	"R/W writer hold",			"nsec",
-	    "lockstat:::rw-release", "arg1 == 0",
-	    "lockstat:::rw-acquire" },
+	    "lockstat::rw_wunlock:rw-release", NULL,
+	    "lockstat::rw_wlock:rw-acquire" },
 	{ 'H',	"Lock",	"R/W reader hold",			"nsec",
-	    "lockstat:::rw-release", "arg1 != 0",
-	    "lockstat:::rw-acquire" },
-	{ 'H',	"Lock",	"Unknown event (type 36)",		"units"	},
-	{ 'H',	"Lock",	"Unknown event (type 37)",		"units"	},
+	    "lockstat::rw_runlock:rw-release", NULL,
+	    "lockstat::rw_rlock:rw-acquire" },
+	{ 'H',	"Lock",	"SX shared hold",			"nsec",
+	    "lockstat::sx_sunlock:sx-release", NULL,
+	    "lockstat::sx_slock:sx-acquire" },
+	{ 'H',	"Lock",	"SX exclusive hold",			"nsec",
+	    "lockstat::sx_xunlock:sx-release", NULL,
+	    "lockstat::sx_xlock:sx-acquire" },
 	{ 'H',	"Lock",	"Unknown event (type 38)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 39)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 40)",		"units"	},
@@ -214,10 +226,9 @@
 	{ 'H',	"Lock",	"Unknown event (type 53)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 54)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 55)",		"units"	},
-#if defined(sun)
+#ifdef illumos
 	{ 'I',	"CPU+PIL", "Profiling interrupt",		"nsec",
 #else
-	/* FreeBSD */
 	{ 'I',	"CPU+Pri_Class", "Profiling interrupt",		"nsec",
 #endif
 	    "profile:::profile-97", NULL },
@@ -231,7 +242,7 @@
 	{ 'E',	"Lock",	"Lockstat record failure",		"(N/A)"	},
 };
 
-#if !defined(sun)
+#ifndef illumos
 static char *g_pri_class[] = {
 	"",
 	"Intr",
@@ -598,7 +609,7 @@
 		*filt[0] = '\0';
 	}
 
-#if defined(sun)
+#ifdef illumos
 	(void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ?
 	    " || " : "", what, (void *)base, what, (void *)(base + size));
 #else
@@ -676,7 +687,7 @@
 		 * the number of nanoseconds) is the number of nanoseconds
 		 * late -- and it's stored in arg2.
 		 */
-#if defined(sun)
+#ifdef illumos
 		arg0 = "(uintptr_t)curthread->t_cpu + \n"
 		    "\t    curthread->t_cpu->cpu_profile_pil";
 #else
@@ -824,7 +835,7 @@
 }
 
 static void
-#if defined(sun)
+#ifdef illumos
 status_fire(void)
 #else
 status_fire(int i)
@@ -1423,7 +1434,7 @@
 		exit(127);
 	}
 
-#if defined(sun)
+#ifdef illumos
 	while (waitpid(child, &status, WEXITED) != child)
 #else
 	while (waitpid(child, &status, 0) != child)
@@ -1468,7 +1479,7 @@
 			dfail("failed to walk aggregate");
 	}
 
-#if defined(sun)
+#ifdef illumos
 	if ((data_buf = memalign(sizeof (uint64_t),
 	    (g_nrecs + 1) * g_recsize)) == NULL)
 #else
@@ -1500,7 +1511,7 @@
 	if (g_gflag) {
 		lsrec_t *newlsp, *oldlsp;
 
-#if defined(sun)
+#ifdef illumos
 		newlsp = memalign(sizeof (uint64_t),
 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
 #else
@@ -1664,7 +1675,7 @@
 	else if (symoff == 0)
 		(void) sprintf(buf, "%s", symname);
 	else if (symoff < 16 && bcmp(symname, "cpu[", 4) == 0)	/* CPU+PIL */
-#if defined(sun)
+#ifdef illumos
 		(void) sprintf(buf, "%s+%ld", symname, (long)symoff);
 #else
 		(void) sprintf(buf, "%s+%s", symname, g_pri_class[(int)symoff]);

Modified: trunk/cddl/contrib/opensolaris/cmd/lockstat/sym.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/lockstat/sym.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/lockstat/sym.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -42,12 +43,11 @@
 #include <libelf.h>
 #include <link.h>
 #include <elf.h>
-#if defined(sun)
+#ifdef illumos
 #include <sys/machelf.h>
 
 #include <kstat.h>
 #else
-/* FreeBSD */
 #include <sys/elf.h>
 #include <sys/ksyms.h>
 #endif
@@ -63,7 +63,7 @@
 static int nsyms, maxsyms;
 static char maxsymname[64];
 
-#if defined(sun)
+#ifdef illumos
 #ifdef _ELF64
 #define	elf_getshdr elf64_getshdr
 #else
@@ -102,7 +102,7 @@
 			sep->addr = 0;
 }
 
-#if defined(sun)
+#ifdef illumos
 static void
 fake_up_certain_popular_kernel_symbols(void)
 {
@@ -130,8 +130,7 @@
 	}
 	(void) kstat_close(kc);
 }
-#else
-/* FreeBSD */
+#else /* !illumos */
 static void
 fake_up_certain_popular_kernel_symbols(void)
 {
@@ -148,7 +147,7 @@
 		add_symbol(name, addr, sizeof (uintptr_t));
 	}
 }
-#endif /* !defined(sun) */
+#endif /* illumos */
 
 static int
 symcmp(const void *p1, const void *p2)
@@ -174,7 +173,7 @@
 	int		fd;
 	int		i;
 	int		strindex = -1;
-#if !defined(sun)
+#ifndef illumos
 	void		*ksyms;
 	size_t		sz;
 #endif
@@ -182,12 +181,11 @@
 	if ((fd = open("/dev/ksyms", O_RDONLY)) == -1)
 		return (-1);
 
-#if defined(sun)
+#ifdef illumos
 	(void) elf_version(EV_CURRENT);
 
 	elf = elf_begin(fd, ELF_C_READ, NULL);
 #else
-	/* FreeBSD */
 	/* 
 	 * XXX - libelf needs to be fixed so it will work with
 	 * non 'ordinary' files like /dev/ksyms.  The following

Modified: trunk/cddl/contrib/opensolaris/cmd/mdb/tools/common/die.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/mdb/tools/common/die.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/mdb/tools/common/die.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -40,7 +41,7 @@
 {
 	va_list ap;
 	int err = errno;
-#if !defined(sun)
+#ifndef illumos
 	const char *progname = getprogname();
 #endif
 
@@ -54,7 +55,7 @@
 	if (format[strlen(format) - 1] != '\n')
 		(void) fprintf(stderr, ": %s\n", strerror(err));
 
-#if defined(__FreeBSD__)
+#ifndef illumos
 	exit(0);
 #else
 	exit(1);
@@ -65,7 +66,7 @@
 elfdie(char *format, ...)
 {
 	va_list ap;
-#if !defined(sun)
+#ifndef illumos
 	const char *progname = getprogname();
 #endif
 
@@ -79,7 +80,7 @@
 	if (format[strlen(format) - 1] != '\n')
 		(void) fprintf(stderr, ": %s\n", elf_errmsg(elf_errno()));
 
-#if defined(__FreeBSD__)
+#ifndef illumos
 	exit(0);
 #else
 	exit(1);

Modified: trunk/cddl/contrib/opensolaris/cmd/mdb/tools/common/util.h
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/mdb/tools/common/util.h	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/mdb/tools/common/util.h	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -40,7 +41,7 @@
 extern void die(char *, ...);
 extern void elfdie(char *, ...);
 
-#if defined(sun)
+#ifdef illumos
 extern const char *progname;
 #endif
 

Modified: trunk/cddl/contrib/opensolaris/cmd/plockstat/plockstat.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/plockstat/plockstat.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/plockstat/plockstat.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -24,7 +25,7 @@
  * Use is subject to license terms.
  */
 
-#if defined(sun)
+#ifdef illumos
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 #endif
 
@@ -505,7 +506,7 @@
 {
 	char name[256];
 	GElf_Sym sym;
-#if defined(sun)
+#ifdef illumos
 	prsyminfo_t info;
 #else
 	prmap_t *map;
@@ -515,10 +516,10 @@
 
 	if (P == NULL || Pxlookup_by_addr(P, addr, name, sizeof (name),
 	    &sym, &info) != 0) {
-		(void) snprintf(buf, size, "%#lx", addr);
+		(void) snprintf(buf, size, "%#lx", (unsigned long)addr);
 		return (0);
 	}
-#if defined(sun)
+#ifdef illumos
 	if (info.prs_object == NULL)
 		info.prs_object = "<unknown>";
 
@@ -537,7 +538,7 @@
 	size -= len;
 
 	if (sym.st_value != addr)
-		len = snprintf(buf, size, "+%#lx", addr - sym.st_value);
+		len = snprintf(buf, size, "+%#lx", (unsigned long)(addr - sym.st_value));
 
 	if (nolocks && strcmp("libc.so.1", map->pr_mapname) == 0 &&
 	    (strstr("mutex", name) == 0 ||
@@ -668,7 +669,7 @@
 static void
 prochandler(struct ps_prochandle *P, const char *msg, void *arg)
 {
-#if defined(sun)
+#ifdef illumos
 	const psinfo_t *prp = Ppsinfo(P);
 	int pid = Pstatus(P)->pr_pid;
 #else
@@ -773,18 +774,19 @@
 int
 main(int argc, char **argv)
 {
-#if defined(sun)
+#ifdef illumos
 	ucred_t *ucp;
 #endif
 	int err;
 	int opt_C = 0, opt_H = 0, opt_p = 0, opt_v = 0;
-	char c, *p, *end;
+	int c;
+	char *p, *end;
 	struct sigaction act;
 	int done = 0;
 
 	g_pname = basename(argv[0]);
 	argv[0] = g_pname; /* rewrite argv[0] for getopt errors */
-#if defined(sun)
+#ifdef illumos
 	/*
 	 * Make sure we have the required dtrace_proc privilege.
 	 */
@@ -987,7 +989,7 @@
 
 	if (opt_v)
 		(void) printf("%s: tracing enabled for pid %d\n", g_pname,
-#if defined(sun)
+#ifdef illumos
 		    (int)Pstatus(g_pr)->pr_pid);
 #else
 		    (int)proc_getpid(g_pr));

Modified: trunk/cddl/contrib/opensolaris/cmd/sgs/include/_string_table.h
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/sgs/include/_string_table.h	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/sgs/include/_string_table.h	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/sgs/include/alist.h
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/sgs/include/alist.h	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/sgs/include/alist.h	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/sgs/include/debug.h
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/sgs/include/debug.h	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/sgs/include/debug.h	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/sgs/include/sgs.h
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/sgs/include/sgs.h	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/sgs/include/sgs.h	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/sgs/include/string_table.h
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/sgs/include/string_table.h	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/sgs/include/string_table.h	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/findprime.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/findprime.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/findprime.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/string_table.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/string_table.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/sgs/tools/common/string_table.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.8
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.8	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.8	2018-06-02 16:07:17 UTC (rev 10228)
@@ -14,11 +14,14 @@
 .\"
 .\" Copyright 2012, Richard Lowe.
 .\" Copyright (c) 2012, Marcelo Araujo <araujo at FreeBSD.org>.
+.\" Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+.\" Copyright 2016 Nexenta Systems, Inc.
 .\" All Rights Reserved.
 .\"
-.\" $FreeBSD: release/9.2.0/cddl/contrib/opensolaris/cmd/zdb/zdb.8 243674 2012-11-29 14:05:04Z mm $
+.\" $FreeBSD: stable/10/cddl/contrib/opensolaris/cmd/zdb/zdb.8 326336 2017-11-28 18:44:58Z asomers $
+.\" $MidnightBSD$
 .\"
-.Dd May 10, 2012
+.Dd October 1, 2017
 .Dt ZDB 8
 .Os
 .Sh NAME
@@ -26,34 +29,44 @@
 .Nd Display zpool debugging and consistency information
 .Sh SYNOPSIS
 .Nm
-.Op Fl CumdibcsDvhLXFPA
+.Op Fl CmdibcsDvhLMXFPA
 .Op Fl e Op Fl p Ar path...
 .Op Fl t Ar txg
+.Op Fl U Ar cache
+.Op Fl I Ar inflight I/Os
+.Op Fl x Ar dumpdir
+.Op Fl o Ar var=value
 .Ar poolname
 .Op Ar object ...
 .Nm
 .Op Fl divPA
 .Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
 .Ar dataset
 .Op Ar object ...
 .Nm
-.Fl m Op Fl LXFPA
+.Fl m Op Fl MLXFPA
 .Op Fl t Ar txg
 .Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
 .Ar poolname
 .Nm
 .Fl R Op Fl A
 .Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
 .Ar poolname
+.Ar poolname
 .Ar vdev Ns : Ns Ar offset Ns : Ns Ar size Ns Op Ns : Ns Ar flags
 .Nm
 .Fl S
 .Op Fl AP
 .Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
 .Ar poolname
+.Ar poolname
 .Nm
 .Fl l
-.Op Fl uA
+.Op Fl Aqu
 .Ar device
 .Nm
 .Fl C
@@ -76,6 +89,17 @@
 The precise output of most invocations is not documented, a knowledge of ZFS
 internals is assumed.
 .Pp
+If the
+.Ar dataset
+argument does not contain any
+.Sy /
+or
+.Sy @
+characters, it is interpreted as a pool name.
+The root dataset can be specified as
+.Pa pool Ns Sy /
+(pool name followed by a slash).
+.Pp
 When operating on an imported and active pool it is possible, though unlikely,
 that zdb may interpret inconsistent pool data and behave erratically.
 .Sh OPTIONS
@@ -118,6 +142,12 @@
 If specified twice, display a histogram of deduplication statistics, showing
 the allocated (physically present on disk) and referenced (logically
 referenced in the pool) block counts and sizes by reference count.
+.Pp
+If specified a third time, display the statistics independently for each deduplication table.
+.Pp
+If specified a fourth time, dump the contents of the deduplication tables describing duplicate blocks.
+.Pp
+If specified a fifth time, also dump the contents of the deduplication tables describing unique blocks.
 .It Fl h
 Display pool history similar to
 .Cm zpool history ,
@@ -128,10 +158,15 @@
 If specified multiple times, display counts of each intent log transaction
 type.
 .It Fl l Ar device
-Display the vdev labels from the specified device.
+Read the vdev labels from the specified device.
+Return 0 if a valid label was found, 1 if an error occurred, and 2 if no valid
+labels were found.
 If the
 .Fl u
 option is also specified, also display the uberblocks on this device.
+If the
+.Fl q
+option is also specified, also display the uberblocks on this device.
 .It Fl L
 Disable leak tracing and the loading of space maps.
 By default,
@@ -139,6 +174,13 @@
 verifies that all non-free blocks are referenced, which can be very expensive.
 .It Fl m
 Display the offset, spacemap, and free space of each metaslab.
+When specified twice, also display information about the on-disk free
+space histogram associated with each metaslab. When specified three time,
+display the maximum contiguous free space, the in-core free space histogram,
+and the percentage of free space in each space map.  When specified
+four times display every spacemap record.
+.It Fl M
+Display the offset, spacemap, and free space of each metaslab.
 When specified twice, also display information about the maximum contiguous
 free space and the percentage of free space in each space map.
 When specified three times display every spacemap record.
@@ -202,9 +244,26 @@
 The
 .Fl p
 flag specifies the path under which devices are to be searched.
+.It Fl x Ar dumpdir
+All blocks accessed will be copied to files in the specified directory.
+The blocks will be placed in sparse files whose name is the same as
+that of the file or device read.  zdb can be then run on the generated files.
+Note that the
+.Fl bbc
+flags are sufficient to access (and thus copy)
+all metadata on the pool.
 .It Fl F
 Attempt to make an unreadable pool readable by trying progressively older
 transactions.
+.It Fl I Ar inflight I/Os
+Limit the number of outstanding checksum I/Os to the specified value.
+The default value is 200. This option affects the performance of the
+.Fl c
+option.
+.It Fl o Ar var=value
+Set the given global libzpool variable to the provided value. The value must be
+an unsigned 32-bit integer. Currently only little-endian systems are supported
+to avoid accidentally setting the high 32 bits of 64-bit variables.
 .It Fl P
 Print numbers in an unscaled form more amenable to parsing, eg. 1000000 rather
 than 1M.
@@ -218,9 +277,7 @@
 transaction numbers.
 .It Fl U Ar cachefile
 Use a cache file other than
-.Pa /etc/zfs/zpool.cache .
-This option is only valid with
-.Fl C
+.Pa /boot/zfs/zpool.cache .
 .It Fl v
 Enable verbosity.
 Specify multiple times for increased verbosity.


Property changes on: trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.8
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -21,7 +22,9 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Nexenta Systems, Inc.
  */
 
 #include <stdio.h>
@@ -59,7 +62,7 @@
 #include <sys/ddt.h>
 #include <sys/zfeature.h>
 #include <zfs_comutil.h>
-#undef ZFS_MAXNAMELEN
+#include <libcmdutils.h>
 #undef verify
 #include <libzfs.h>
 
@@ -75,9 +78,13 @@
 	DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES))
 
 #ifndef lint
-extern int zfs_recover;
+extern boolean_t zfs_recover;
+extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
+extern int zfs_vdev_async_read_max_active;
 #else
-int zfs_recover;
+boolean_t zfs_recover;
+uint64_t zfs_arc_max, zfs_arc_meta_limit;
+int zfs_vdev_async_read_max_active;
 #endif
 
 const char cmdname[] = "zdb";
@@ -89,7 +96,10 @@
 uint64_t *zopt_object = NULL;
 int zopt_objects = 0;
 libzfs_handle_t *g_zfs;
+uint64_t max_inflight = 1000;
 
+static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
+
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
  * debugging facilities.
@@ -110,16 +120,18 @@
 usage(void)
 {
 	(void) fprintf(stderr,
-            "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]]"
-            "poolname [object...]\n"
-            "       %s [-divPA] [-e -p path...] dataset [object...]\n"
-            "       %s -m [-LXFPA] [-t txg] [-e [-p path...]]"
-            "poolname [vdev [metaslab...]]\n"
-            "       %s -R [-A] [-e [-p path...]] poolname "
-            "vdev:offset:size[:flags]\n"
-            "       %s -S [-PA] [-e [-p path...]] poolname\n"
-            "       %s -l [-uA] device\n"
-            "       %s -C [-A] [-U config]\n\n",
+	    "Usage: %s [-CmMdibcsDvhLXFPAG] [-t txg] [-e [-p path...]] "
+	    "[-U config] [-I inflight I/Os] [-x dumpdir] [-o var=value] "
+	    "poolname [object...]\n"
+	    "       %s [-divPA] [-e -p path...] [-U config] dataset "
+	    "[object...]\n"
+	    "       %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
+	    "poolname [vdev [metaslab...]]\n"
+	    "       %s -R [-A] [-e [-p path...]] poolname "
+	    "vdev:offset:size[:flags]\n"
+	    "       %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
+	    "       %s -l [-Aqu] device\n"
+	    "       %s -C [-A] [-U config]\n\n",
 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
 
 	(void) fprintf(stderr, "    Dataset name must include at least one "
@@ -129,7 +141,6 @@
 	(void) fprintf(stderr, "    If object numbers are specified, only "
 	    "those objects are dumped\n\n");
 	(void) fprintf(stderr, "    Options to control amount of output:\n");
-	(void) fprintf(stderr, "        -u uberblock\n");
 	(void) fprintf(stderr, "        -d dataset(s)\n");
 	(void) fprintf(stderr, "        -i intent logs\n");
 	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
@@ -136,6 +147,7 @@
 	(void) fprintf(stderr, "        -h pool history\n");
 	(void) fprintf(stderr, "        -b block statistics\n");
 	(void) fprintf(stderr, "        -m metaslabs\n");
+	(void) fprintf(stderr, "        -M metaslab groups\n");
 	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
 	    "all data) blocks\n");
 	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
@@ -142,13 +154,13 @@
 	(void) fprintf(stderr, "        -D dedup statistics\n");
 	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
 	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
-	(void) fprintf(stderr, "        -l dump label contents\n");
+	(void) fprintf(stderr, "        -l read label contents\n");
 	(void) fprintf(stderr, "        -L disable leak tracking (do not "
 	    "load spacemaps)\n");
 	(void) fprintf(stderr, "        -R read and display block from a "
 	    "device\n\n");
 	(void) fprintf(stderr, "    Below options are intended for use "
-	    "with other options (except -l):\n");
+	    "with other options:\n");
 	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
 	    "panic recovery (-AA) or both (-AAA)\n");
 	(void) fprintf(stderr, "        -F attempt automatic rewind within "
@@ -161,9 +173,20 @@
 	    "has altroot/not in a cachefile\n");
 	(void) fprintf(stderr, "        -p <path> -- use one or more with "
 	    "-e to specify path to vdev dir\n");
-	(void) fprintf(stderr, "	-P print numbers in parseable form\n");
+	(void) fprintf(stderr, "        -x <dumpdir> -- "
+	    "dump all read blocks into specified directory\n");
+	(void) fprintf(stderr, "        -P print numbers in parseable form\n");
 	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
 	    "searching for uberblocks\n");
+	(void) fprintf(stderr, "        -I <number of inflight I/Os> -- "
+	    "specify the maximum number of "
+	    "checksumming I/Os [default is 200]\n");
+	(void) fprintf(stderr, "        -G dump zfs_dbgmsg buffer before "
+	    "exiting\n");
+	(void) fprintf(stderr, "        -o <variable>=<value> set global "
+	    "variable to an unsigned 32-bit integer value\n");
+	(void) fprintf(stderr, "        -q don't print label contents\n");
+	(void) fprintf(stderr, "        -u uberblock\n");
 	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
 	    "to make only that option verbose\n");
 	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
@@ -170,6 +193,15 @@
 	exit(1);
 }
 
+static void
+dump_debug_buffer()
+{
+	if (dump_opt['G']) {
+		(void) printf("\n");
+		zfs_dbgmsg_print("zdb");
+	}
+}
+
 /*
  * Called for usage errors that are discovered after a call to spa_open(),
  * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
@@ -186,6 +218,8 @@
 	va_end(ap);
 	(void) fprintf(stderr, "\n");
 
+	dump_debug_buffer();
+
 	exit(1);
 }
 
@@ -230,12 +264,12 @@
 }
 
 static void
-zdb_nicenum(uint64_t num, char *buf)
+zdb_nicenum(uint64_t num, char *buf, size_t buflen)
 {
 	if (dump_opt['P'])
-		(void) sprintf(buf, "%llu", (longlong_t)num);
+		(void) snprintf(buf, buflen, "%llu", (longlong_t)num);
 	else
-		nicenum(num, buf);
+		nicenum(num, buf, sizeof (buf));
 }
 
 const char histo_stars[] = "****************************************";
@@ -242,7 +276,7 @@
 const int histo_width = sizeof (histo_stars) - 1;
 
 static void
-dump_histogram(const uint64_t *histo, int size)
+dump_histogram(const uint64_t *histo, int size, int offset)
 {
 	int i;
 	int minidx = size - 1;
@@ -263,7 +297,7 @@
 
 	for (i = minidx; i <= maxidx; i++) {
 		(void) printf("\t\t\t%3u: %6llu %s\n",
-		    i, (u_longlong_t)histo[i],
+		    i + offset, (u_longlong_t)histo[i],
 		    &histo_stars[(max - histo[i]) * histo_width / max]);
 	}
 }
@@ -316,19 +350,19 @@
 	    (u_longlong_t)zs.zs_salt);
 
 	(void) printf("\t\tLeafs with 2^n pointers:\n");
-	dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE);
+	dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
 
 	(void) printf("\t\tBlocks with n*5 entries:\n");
-	dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE);
+	dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
 
 	(void) printf("\t\tBlocks n/10 full:\n");
-	dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE);
+	dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
 
 	(void) printf("\t\tEntries with n chunks:\n");
-	dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE);
+	dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
 
 	(void) printf("\t\tBuckets with n entries:\n");
-	dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE);
+	dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
 }
 
 /*ARGSUSED*/
@@ -406,6 +440,84 @@
 	zap_cursor_fini(&zc);
 }
 
+static void
+dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	bpobj_phys_t *bpop = data;
+	char bytes[32], comp[32], uncomp[32];
+
+	/* make sure the output won't get truncated */
+	CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
+
+	if (bpop == NULL)
+		return;
+
+	zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes));
+	zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp));
+	zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp));
+
+	(void) printf("\t\tnum_blkptrs = %llu\n",
+	    (u_longlong_t)bpop->bpo_num_blkptrs);
+	(void) printf("\t\tbytes = %s\n", bytes);
+	if (size >= BPOBJ_SIZE_V1) {
+		(void) printf("\t\tcomp = %s\n", comp);
+		(void) printf("\t\tuncomp = %s\n", uncomp);
+	}
+	if (size >= sizeof (*bpop)) {
+		(void) printf("\t\tsubobjs = %llu\n",
+		    (u_longlong_t)bpop->bpo_subobjs);
+		(void) printf("\t\tnum_subobjs = %llu\n",
+		    (u_longlong_t)bpop->bpo_num_subobjs);
+	}
+
+	if (dump_opt['d'] < 5)
+		return;
+
+	for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) {
+		char blkbuf[BP_SPRINTF_LEN];
+		blkptr_t bp;
+
+		int err = dmu_read(os, object,
+		    i * sizeof (bp), sizeof (bp), &bp, 0);
+		if (err != 0) {
+			(void) printf("got error %u from dmu_read\n", err);
+			break;
+		}
+		snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp);
+		(void) printf("\t%s\n", blkbuf);
+	}
+}
+
+/* ARGSUSED */
+static void
+dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	dmu_object_info_t doi;
+
+	VERIFY0(dmu_object_info(os, object, &doi));
+	uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
+
+	int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
+	if (err != 0) {
+		(void) printf("got error %u from dmu_read\n", err);
+		kmem_free(subobjs, doi.doi_max_offset);
+		return;
+	}
+
+	int64_t last_nonzero = -1;
+	for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) {
+		if (subobjs[i] != 0)
+			last_nonzero = i;
+	}
+
+	for (int64_t i = 0; i <= last_nonzero; i++) {
+		(void) printf("\t%llu\n", (longlong_t)subobjs[i]);
+	}
+	kmem_free(subobjs, doi.doi_max_offset);
+}
+
 /*ARGSUSED*/
 static void
 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
@@ -517,16 +629,75 @@
 	zap_cursor_fini(&zc);
 }
 
+int
+get_dtl_refcount(vdev_t *vd)
+{
+	int refcount = 0;
+
+	if (vd->vdev_ops->vdev_op_leaf) {
+		space_map_t *sm = vd->vdev_dtl_sm;
+
+		if (sm != NULL &&
+		    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
+			return (1);
+		return (0);
+	}
+
+	for (int c = 0; c < vd->vdev_children; c++)
+		refcount += get_dtl_refcount(vd->vdev_child[c]);
+	return (refcount);
+}
+
+int
+get_metaslab_refcount(vdev_t *vd)
+{
+	int refcount = 0;
+
+	if (vd->vdev_top == vd && !vd->vdev_removing) {
+		for (int m = 0; m < vd->vdev_ms_count; m++) {
+			space_map_t *sm = vd->vdev_ms[m]->ms_sm;
+
+			if (sm != NULL &&
+			    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
+				refcount++;
+		}
+	}
+	for (int c = 0; c < vd->vdev_children; c++)
+		refcount += get_metaslab_refcount(vd->vdev_child[c]);
+
+	return (refcount);
+}
+
+static int
+verify_spacemap_refcounts(spa_t *spa)
+{
+	uint64_t expected_refcount = 0;
+	uint64_t actual_refcount;
+
+	(void) feature_get_refcount(spa,
+	    &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
+	    &expected_refcount);
+	actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
+	actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
+
+	if (expected_refcount != actual_refcount) {
+		(void) printf("space map refcount mismatch: expected %lld != "
+		    "actual %lld\n",
+		    (longlong_t)expected_refcount,
+		    (longlong_t)actual_refcount);
+		return (2);
+	}
+	return (0);
+}
+
 static void
-dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
+dump_spacemap(objset_t *os, space_map_t *sm)
 {
 	uint64_t alloc, offset, entry;
-	uint8_t mapshift = sm->sm_shift;
-	uint64_t mapstart = sm->sm_start;
 	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 			    "INVALID", "INVALID", "INVALID", "INVALID" };
 
-	if (smo->smo_object == 0)
+	if (sm == NULL)
 		return;
 
 	/*
@@ -533,10 +704,14 @@
 	 * Print out the freelist entries in both encoded and decoded form.
 	 */
 	alloc = 0;
-	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
-		VERIFY3U(0, ==, dmu_read(os, smo->smo_object, offset,
+	for (offset = 0; offset < space_map_length(sm);
+	    offset += sizeof (entry)) {
+		uint8_t mapshift = sm->sm_shift;
+
+		VERIFY0(dmu_read(os, space_map_object(sm), offset,
 		    sizeof (entry), &entry, DMU_READ_PREFETCH));
 		if (SM_DEBUG_DECODE(entry)) {
+
 			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
 			    (u_longlong_t)(offset / sizeof (entry)),
 			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
@@ -548,10 +723,10 @@
 			    (u_longlong_t)(offset / sizeof (entry)),
 			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
 			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
-			    mapshift) + mapstart),
+			    mapshift) + sm->sm_start),
 			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
-			    mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
-			    mapshift)),
+			    mapshift) + sm->sm_start +
+			    (SM_RUN_DECODE(entry) << mapshift)),
 			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
 			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
 				alloc += SM_RUN_DECODE(entry) << mapshift;
@@ -559,10 +734,10 @@
 				alloc -= SM_RUN_DECODE(entry) << mapshift;
 		}
 	}
-	if (alloc != smo->smo_alloc) {
+	if (alloc != space_map_allocated(sm)) {
 		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
 		    "with space map summary (%llu)\n",
-		    (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
+		    (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
 	}
 }
 
@@ -570,15 +745,20 @@
 dump_metaslab_stats(metaslab_t *msp)
 {
 	char maxbuf[32];
-	space_map_t *sm = msp->ms_map;
-	avl_tree_t *t = sm->sm_pp_root;
-	int free_pct = sm->sm_space * 100 / sm->sm_size;
+	range_tree_t *rt = msp->ms_tree;
+	avl_tree_t *t = &msp->ms_size_tree;
+	int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
 
-	zdb_nicenum(space_map_maxsize(sm), maxbuf);
+	/* max sure nicenum has enough space */
+	CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
 
+	zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
+
 	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
 	    "segments", avl_numnodes(t), "maxsize", maxbuf,
 	    "freepct", free_pct);
+	(void) printf("\tIn-memory histogram:\n");
+	dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 }
 
 static void
@@ -586,33 +766,46 @@
 {
 	vdev_t *vd = msp->ms_group->mg_vd;
 	spa_t *spa = vd->vdev_spa;
-	space_map_t *sm = msp->ms_map;
-	space_map_obj_t *smo = &msp->ms_smo;
+	space_map_t *sm = msp->ms_sm;
 	char freebuf[32];
 
-	zdb_nicenum(sm->sm_size - smo->smo_alloc, freebuf);
+	zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf,
+	    sizeof (freebuf));
 
 	(void) printf(
 	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
-	    (u_longlong_t)(sm->sm_start / sm->sm_size),
-	    (u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
+	    (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
+	    (u_longlong_t)space_map_object(sm), freebuf);
 
-	if (dump_opt['m'] > 1 && !dump_opt['L']) {
+	if (dump_opt['m'] > 2 && !dump_opt['L']) {
 		mutex_enter(&msp->ms_lock);
-		space_map_load_wait(sm);
-		if (!sm->sm_loaded)
-			VERIFY(space_map_load(sm, zfs_metaslab_ops,
-			    SM_FREE, smo, spa->spa_meta_objset) == 0);
+		metaslab_load_wait(msp);
+		if (!msp->ms_loaded) {
+			VERIFY0(metaslab_load(msp));
+			range_tree_stat_verify(msp->ms_tree);
+		}
 		dump_metaslab_stats(msp);
-		space_map_unload(sm);
+		metaslab_unload(msp);
 		mutex_exit(&msp->ms_lock);
 	}
 
-	if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
-		ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift));
+	if (dump_opt['m'] > 1 && sm != NULL &&
+	    spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
+		/*
+		 * The space map histogram represents free space in chunks
+		 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
+		 */
+		(void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
+		    (u_longlong_t)msp->ms_fragmentation);
+		dump_histogram(sm->sm_phys->smp_histogram,
+		    SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
+	}
 
+	if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
+		ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
+
 		mutex_enter(&msp->ms_lock);
-		dump_spacemap(spa->spa_meta_objset, smo, sm);
+		dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
 		mutex_exit(&msp->ms_lock);
 	}
 }
@@ -630,6 +823,47 @@
 }
 
 static void
+dump_metaslab_groups(spa_t *spa)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	metaslab_class_t *mc = spa_normal_class(spa);
+	uint64_t fragmentation;
+
+	metaslab_class_histogram_verify(mc);
+
+	for (int c = 0; c < rvd->vdev_children; c++) {
+		vdev_t *tvd = rvd->vdev_child[c];
+		metaslab_group_t *mg = tvd->vdev_mg;
+
+		if (mg->mg_class != mc)
+			continue;
+
+		metaslab_group_histogram_verify(mg);
+		mg->mg_fragmentation = metaslab_group_fragmentation(mg);
+
+		(void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
+		    "fragmentation",
+		    (u_longlong_t)tvd->vdev_id,
+		    (u_longlong_t)tvd->vdev_ms_count);
+		if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
+			(void) printf("%3s\n", "-");
+		} else {
+			(void) printf("%3llu%%\n",
+			    (u_longlong_t)mg->mg_fragmentation);
+		}
+		dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
+	}
+
+	(void) printf("\tpool %s\tfragmentation", spa_name(spa));
+	fragmentation = metaslab_class_fragmentation(mc);
+	if (fragmentation == ZFS_FRAG_INVALID)
+		(void) printf("\t%3s\n", "-");
+	else
+		(void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
+	dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
+}
+
+static void
 dump_metaslabs(spa_t *spa)
 {
 	vdev_t *vd, *rvd = spa->spa_root_vdev;
@@ -684,7 +918,7 @@
 		if (ddp->ddp_phys_birth == 0)
 			continue;
 		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
-		sprintf_blkptr(blkbuf, &blk);
+		snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
 		(void) printf("index %llx refcnt %llu %s %s\n",
 		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
 		    types[p], blkbuf);
@@ -801,9 +1035,9 @@
 }
 
 static void
-dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
+dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
 {
-	char *prefix = (void *)sm;
+	char *prefix = arg;
 
 	(void) printf("%s [%llu,%llu) length %llu\n",
 	    prefix,
@@ -833,17 +1067,16 @@
 	    required ? "DTL-required" : "DTL-expendable");
 
 	for (int t = 0; t < DTL_TYPES; t++) {
-		space_map_t *sm = &vd->vdev_dtl[t];
-		if (sm->sm_space == 0)
+		range_tree_t *rt = vd->vdev_dtl[t];
+		if (range_tree_space(rt) == 0)
 			continue;
 		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
 		    indent + 2, "", name[t]);
-		mutex_enter(sm->sm_lock);
-		space_map_walk(sm, dump_dtl_seg, (void *)prefix);
-		mutex_exit(sm->sm_lock);
+		mutex_enter(rt->rt_lock);
+		range_tree_walk(rt, dump_dtl_seg, prefix);
+		mutex_exit(rt->rt_lock);
 		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
-			dump_spacemap(spa->spa_meta_objset,
-			    &vd->vdev_dtl_smo, sm);
+			dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
 	}
 
 	for (int c = 0; c < vd->vdev_children; c++)
@@ -850,11 +1083,16 @@
 		dump_dtl(vd->vdev_child[c], indent + 4);
 }
 
+/* from spa_history.c: spa_history_create_obj() */
+#define	HIS_BUF_LEN_DEF	(128 << 10)
+#define	HIS_BUF_LEN_MAX	(1 << 30)
+
 static void
 dump_history(spa_t *spa)
 {
 	nvlist_t **events = NULL;
-	char buf[SPA_MAXBLOCKSIZE];
+	char *buf = NULL;
+	uint64_t bufsize = HIS_BUF_LEN_DEF;
 	uint64_t resid, len, off = 0;
 	uint_t num = 0;
 	int error;
@@ -863,8 +1101,11 @@
 	char tbuf[30];
 	char internalstr[MAXPATHLEN];
 
+	if ((buf = malloc(bufsize)) == NULL)
+		(void) fprintf(stderr, "Unable to read history: "
+		    "out of memory\n");
 	do {
-		len = sizeof (buf);
+		len = bufsize;
 
 		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
 			(void) fprintf(stderr, "Unable to read history: "
@@ -874,9 +1115,26 @@
 
 		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
 			break;
+		off -= resid;
 
-		off -= resid;
+		/*
+		 * If the history block is too big, double the buffer
+		 * size and try again.
+		 */
+		if (resid == len) {
+			free(buf);
+			buf = NULL;
+
+			bufsize <<= 1;
+			if ((bufsize >= HIS_BUF_LEN_MAX) ||
+			    ((buf = malloc(bufsize)) == NULL)) {
+				(void) fprintf(stderr, "Unable to read history: "
+				    "out of memory\n");
+				return;
+			}
+		}
 	} while (len != 0);
+	free(buf);
 
 	(void) printf("\nHistory:\n");
 	for (int i = 0; i < num; i++) {
@@ -928,7 +1186,8 @@
 }
 
 static uint64_t
-blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
+blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
+    const zbookmark_phys_t *zb)
 {
 	if (dnp == NULL) {
 		ASSERT(zb->zb_level < 0);
@@ -945,42 +1204,63 @@
 }
 
 static void
-sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
+snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
 {
 	const dva_t *dva = bp->blk_dva;
 	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
 
 	if (dump_opt['b'] >= 6) {
-		sprintf_blkptr(blkbuf, bp);
+		snprintf_blkptr(blkbuf, buflen, bp);
 		return;
 	}
 
+	if (BP_IS_EMBEDDED(bp)) {
+		(void) sprintf(blkbuf,
+		    "EMBEDDED et=%u %llxL/%llxP B=%llu",
+		    (int)BPE_GET_ETYPE(bp),
+		    (u_longlong_t)BPE_GET_LSIZE(bp),
+		    (u_longlong_t)BPE_GET_PSIZE(bp),
+		    (u_longlong_t)bp->blk_birth);
+		return;
+	}
+
 	blkbuf[0] = '\0';
-
 	for (int i = 0; i < ndvas; i++)
-		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
+		(void) snprintf(blkbuf + strlen(blkbuf),
+		    buflen - strlen(blkbuf), "%llu:%llx:%llx ",
 		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
 		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
 		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
 
-	(void) sprintf(blkbuf + strlen(blkbuf),
-	    "%llxL/%llxP F=%llu B=%llu/%llu",
-	    (u_longlong_t)BP_GET_LSIZE(bp),
-	    (u_longlong_t)BP_GET_PSIZE(bp),
-	    (u_longlong_t)bp->blk_fill,
-	    (u_longlong_t)bp->blk_birth,
-	    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
+	if (BP_IS_HOLE(bp)) {
+		(void) snprintf(blkbuf + strlen(blkbuf),
+		    buflen - strlen(blkbuf),
+		    "%llxL B=%llu",
+		    (u_longlong_t)BP_GET_LSIZE(bp),
+		    (u_longlong_t)bp->blk_birth);
+	} else {
+		(void) snprintf(blkbuf + strlen(blkbuf),
+		    buflen - strlen(blkbuf),
+		    "%llxL/%llxP F=%llu B=%llu/%llu",
+		    (u_longlong_t)BP_GET_LSIZE(bp),
+		    (u_longlong_t)BP_GET_PSIZE(bp),
+		    (u_longlong_t)BP_GET_FILL(bp),
+		    (u_longlong_t)bp->blk_birth,
+		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
+	}
 }
 
 static void
-print_indirect(blkptr_t *bp, const zbookmark_t *zb,
+print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
     const dnode_phys_t *dnp)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 	int l;
 
-	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
-	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
+	if (!BP_IS_EMBEDDED(bp)) {
+		ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
+		ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
+	}
 
 	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
 
@@ -994,13 +1274,13 @@
 		}
 	}
 
-	sprintf_blkptr_compact(blkbuf, bp);
+	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
 	(void) printf("%s\n", blkbuf);
 }
 
 static int
 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
-    blkptr_t *bp, const zbookmark_t *zb)
+    blkptr_t *bp, const zbookmark_phys_t *zb)
 {
 	int err = 0;
 
@@ -1009,8 +1289,8 @@
 
 	print_indirect(bp, zb, dnp);
 
-	if (BP_GET_LEVEL(bp) > 0) {
-		uint32_t flags = ARC_WAIT;
+	if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
+		arc_flags_t flags = ARC_FLAG_WAIT;
 		int i;
 		blkptr_t *cbp;
 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
@@ -1026,7 +1306,7 @@
 		/* recursively visit blocks below this */
 		cbp = buf->b_data;
 		for (i = 0; i < epb; i++, cbp++) {
-			zbookmark_t czb;
+			zbookmark_phys_t czb;
 
 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 			    zb->zb_level - 1,
@@ -1034,11 +1314,11 @@
 			err = visit_indirect(spa, dnp, cbp, &czb);
 			if (err)
 				break;
-			fill += cbp->blk_fill;
+			fill += BP_GET_FILL(cbp);
 		}
 		if (!err)
-			ASSERT3U(fill, ==, bp->blk_fill);
-		(void) arc_buf_remove_ref(buf, &buf);
+			ASSERT3U(fill, ==, BP_GET_FILL(bp));
+		arc_buf_destroy(buf, &buf);
 	}
 
 	return (err);
@@ -1050,7 +1330,7 @@
 {
 	dnode_phys_t *dnp = dn->dn_phys;
 	int j;
-	zbookmark_t czb;
+	zbookmark_phys_t czb;
 
 	(void) printf("Indirect blocks:\n");
 
@@ -1073,6 +1353,9 @@
 	time_t crtime;
 	char nice[32];
 
+	/* make sure nicenum has enough space */
+	CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ);
+
 	if (dd == NULL)
 		return;
 
@@ -1088,15 +1371,15 @@
 	    (u_longlong_t)dd->dd_origin_obj);
 	(void) printf("\t\tchild_dir_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_child_dir_zapobj);
-	zdb_nicenum(dd->dd_used_bytes, nice);
+	zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice));
 	(void) printf("\t\tused_bytes = %s\n", nice);
-	zdb_nicenum(dd->dd_compressed_bytes, nice);
+	zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice));
 	(void) printf("\t\tcompressed_bytes = %s\n", nice);
-	zdb_nicenum(dd->dd_uncompressed_bytes, nice);
+	zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice));
 	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
-	zdb_nicenum(dd->dd_quota, nice);
+	zdb_nicenum(dd->dd_quota, nice, sizeof (nice));
 	(void) printf("\t\tquota = %s\n", nice);
-	zdb_nicenum(dd->dd_reserved, nice);
+	zdb_nicenum(dd->dd_reserved, nice, sizeof (nice));
 	(void) printf("\t\treserved = %s\n", nice);
 	(void) printf("\t\tprops_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_props_zapobj);
@@ -1106,7 +1389,8 @@
 	    (u_longlong_t)dd->dd_flags);
 
 #define	DO(which) \
-	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
+	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \
+	    sizeof (nice)); \
 	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
 	DO(HEAD);
 	DO(SNAP);
@@ -1125,16 +1409,23 @@
 	char used[32], compressed[32], uncompressed[32], unique[32];
 	char blkbuf[BP_SPRINTF_LEN];
 
+	/* make sure nicenum has enough space */
+	CTASSERT(sizeof (used) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ);
+
 	if (ds == NULL)
 		return;
 
 	ASSERT(size == sizeof (*ds));
 	crtime = ds->ds_creation_time;
-	zdb_nicenum(ds->ds_referenced_bytes, used);
-	zdb_nicenum(ds->ds_compressed_bytes, compressed);
-	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
-	zdb_nicenum(ds->ds_unique_bytes, unique);
-	sprintf_blkptr(blkbuf, &ds->ds_bp);
+	zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used));
+	zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed));
+	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed,
+	    sizeof (uncompressed));
+	zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique));
+	snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
 
 	(void) printf("\t\tdir_obj = %llu\n",
 	    (u_longlong_t)ds->ds_dir_obj);
@@ -1179,7 +1470,7 @@
 	char blkbuf[BP_SPRINTF_LEN];
 
 	if (bp->blk_birth != 0) {
-		sprintf_blkptr(blkbuf, bp);
+		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
 		(void) printf("\t%s\n", blkbuf);
 	}
 	return (0);
@@ -1192,12 +1483,15 @@
 	bptree_phys_t *bt;
 	dmu_buf_t *db;
 
+	/* make sure nicenum has enough space */
+	CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
+
 	if (dump_opt['d'] < 3)
 		return;
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
 	bt = db->db_data;
-	zdb_nicenum(bt->bt_bytes, bytes);
+	zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes));
 	(void) printf("\n    %s: %llu datasets, %s\n",
 	    name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
 	dmu_buf_rele(db, FTAG);
@@ -1217,31 +1511,37 @@
 	char blkbuf[BP_SPRINTF_LEN];
 
 	ASSERT(bp->blk_birth != 0);
-	sprintf_blkptr_compact(blkbuf, bp);
+	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
 	(void) printf("\t%s\n", blkbuf);
 	return (0);
 }
 
 static void
-dump_bpobj(bpobj_t *bpo, char *name, int indent)
+dump_full_bpobj(bpobj_t *bpo, char *name, int indent)
 {
 	char bytes[32];
 	char comp[32];
 	char uncomp[32];
 
+	/* make sure nicenum has enough space */
+	CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
+
 	if (dump_opt['d'] < 3)
 		return;
 
-	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
+	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes));
 	if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
-		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
-		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
+		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp));
+		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp));
 		(void) printf("    %*s: object %llu, %llu local blkptrs, "
-		    "%llu subobjs, %s (%s/%s comp)\n",
+		    "%llu subobjs in object %llu, %s (%s/%s comp)\n",
 		    indent * 8, name,
 		    (u_longlong_t)bpo->bpo_object,
 		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
 		    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
+		    (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
 		    bytes, comp, uncomp);
 
 		for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
@@ -1258,7 +1558,7 @@
 				    error, (u_longlong_t)subobj);
 				continue;
 			}
-			dump_bpobj(&subbpo, "subobj", indent + 1);
+			dump_full_bpobj(&subbpo, "subobj", indent + 1);
 			bpobj_close(&subbpo);
 		}
 	} else {
@@ -1288,12 +1588,22 @@
 	char comp[32];
 	char uncomp[32];
 
+	/* make sure nicenum has enough space */
+	CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
+
 	if (dump_opt['d'] < 3)
 		return;
 
-	zdb_nicenum(dl->dl_phys->dl_used, bytes);
-	zdb_nicenum(dl->dl_phys->dl_comp, comp);
-	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
+	if (dl->dl_oldfmt) {
+		dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
+		return;
+	}
+
+	zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes));
+	zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp));
+	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp));
 	(void) printf("\n    Deadlist: %s (%s/%s comp)\n",
 	    bytes, comp, uncomp);
 
@@ -1312,8 +1622,7 @@
 			(void) snprintf(buf, sizeof (buf), "mintxg %llu -> ",
 			    (longlong_t)dle->dle_mintxg,
 			    (longlong_t)dle->dle_bpobj.bpo_object);
-
-			dump_bpobj(&dle->dle_bpobj, buf, 0);
+			dump_full_bpobj(&dle->dle_bpobj, buf, 0);
 		} else {
 			(void) printf("mintxg %llu -> obj %llu\n",
 			    (longlong_t)dle->dle_mintxg,
@@ -1450,23 +1759,19 @@
 		return;
 	}
 
-	error = zfs_obj_to_path(os, object, path, sizeof (path));
-	if (error != 0) {
-		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
-		    (u_longlong_t)object);
-	}
-	if (dump_opt['d'] < 3) {
-		(void) printf("\t%s\n", path);
-		(void) sa_handle_destroy(hdl);
-		return;
-	}
-
 	z_crtime = (time_t)crtm[0];
 	z_atime = (time_t)acctm[0];
 	z_mtime = (time_t)modtm[0];
 	z_ctime = (time_t)chgtm[0];
 
-	(void) printf("\tpath	%s\n", path);
+	if (dump_opt['d'] > 4) {
+		error = zfs_obj_to_path(os, object, path, sizeof (path));
+		if (error != 0) {
+			(void) snprintf(path, sizeof (path),
+			    "\?\?\?<object#%llu>", (u_longlong_t)object);
+		}
+		(void) printf("\tpath	%s\n", path);
+	}
 	dump_uidgid(os, uid, gid);
 	(void) printf("\tatime	%s", ctime(&z_atime));
 	(void) printf("\tmtime	%s", ctime(&z_mtime));
@@ -1505,8 +1810,8 @@
 	dump_uint64,		/* object array			*/
 	dump_none,		/* packed nvlist		*/
 	dump_packed_nvlist,	/* packed nvlist size		*/
-	dump_none,		/* bplist			*/
-	dump_none,		/* bplist header		*/
+	dump_none,		/* bpobj			*/
+	dump_bpobj,		/* bpobj header			*/
 	dump_none,		/* SPA space map header		*/
 	dump_none,		/* SPA space map		*/
 	dump_none,		/* ZIL intent log		*/
@@ -1553,7 +1858,7 @@
 	dump_zap,		/* deadlist			*/
 	dump_none,		/* deadlist hdr			*/
 	dump_zap,		/* dsl clones			*/
-	dump_none,		/* bpobj subobjs		*/
+	dump_bpobj_subobjs,	/* bpobj subobjs		*/
 	dump_unknown,		/* Unknown type, must be last	*/
 };
 
@@ -1570,6 +1875,13 @@
 	char aux[50];
 	int error;
 
+	/* make sure nicenum has enough space */
+	CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
+
 	if (*print_header) {
 		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
 		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
@@ -1590,11 +1902,11 @@
 	}
 	dmu_object_info_from_dnode(dn, &doi);
 
-	zdb_nicenum(doi.doi_metadata_block_size, iblk);
-	zdb_nicenum(doi.doi_data_block_size, dblk);
-	zdb_nicenum(doi.doi_max_offset, lsize);
-	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
-	zdb_nicenum(doi.doi_bonus_size, bonus_size);
+	zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
+	zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
+	zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
+	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize));
+	zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size));
 	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
 	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
 	    doi.doi_max_offset);
@@ -1657,6 +1969,8 @@
 
 		for (;;) {
 			char segsize[32];
+			/* make sure nicenum has enough space */
+			CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ);
 			error = dnode_next_offset(dn,
 			    0, &start, minlvl, blkfill, 0);
 			if (error)
@@ -1664,7 +1978,7 @@
 			end = start;
 			error = dnode_next_offset(dn,
 			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
-			zdb_nicenum(end - start, segsize);
+			zdb_nicenum(end - start, segsize, sizeof (segsize));
 			(void) printf("\t\tsegment [%016llx, %016llx)"
 			    " size %5s\n", (u_longlong_t)start,
 			    (u_longlong_t)end, segsize);
@@ -1689,12 +2003,15 @@
 	uint64_t refdbytes, usedobjs, scratch;
 	char numbuf[32];
 	char blkbuf[BP_SPRINTF_LEN + 20];
-	char osname[MAXNAMELEN];
+	char osname[ZFS_MAX_DATASET_NAME_LEN];
 	char *type = "UNKNOWN";
 	int verbosity = dump_opt['d'];
 	int print_header = 1;
 	int i, error;
 
+	/* make sure nicenum has enough space */
+	CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
+
 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 	dmu_objset_fast_stat(os, &dds);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
@@ -1704,20 +2021,21 @@
 
 	if (dds.dds_type == DMU_OST_META) {
 		dds.dds_creation_txg = TXG_INITIAL;
-		usedobjs = os->os_rootbp->blk_fill;
-		refdbytes = os->os_spa->spa_dsl_pool->
-		    dp_mos_dir->dd_phys->dd_used_bytes;
+		usedobjs = BP_GET_FILL(os->os_rootbp);
+		refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)->
+		    dd_used_bytes;
 	} else {
 		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
 	}
 
-	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
+	ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
 
-	zdb_nicenum(refdbytes, numbuf);
+	zdb_nicenum(refdbytes, numbuf, sizeof (numbuf));
 
 	if (verbosity >= 4) {
-		(void) sprintf(blkbuf, ", rootbp ");
-		(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
+		(void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
+		(void) snprintf_blkptr(blkbuf + strlen(blkbuf),
+		    sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
 	} else {
 		blkbuf[0] = '\0';
 	}
@@ -1747,7 +2065,7 @@
 	if (verbosity < 2)
 		return;
 
-	if (os->os_rootbp->blk_birth == 0)
+	if (BP_IS_HOLE(os->os_rootbp))
 		return;
 
 	dump_object(os, 0, verbosity, &print_header);
@@ -1788,7 +2106,7 @@
 	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
 	if (dump_opt['u'] >= 3) {
 		char blkbuf[BP_SPRINTF_LEN];
-		sprintf_blkptr(blkbuf, &ub->ub_rootbp);
+		snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
 		(void) printf("\trootbp = %s\n", blkbuf);
 	}
 	(void) printf(footer ? footer : "");
@@ -1888,36 +2206,37 @@
 	}
 }
 
-static void
+static int
 dump_label(const char *dev)
 {
 	int fd;
 	vdev_label_t label;
-	char *path, *buf = label.vl_vdev_phys.vp_nvlist;
+	char path[MAXPATHLEN];
+	char *buf = label.vl_vdev_phys.vp_nvlist;
 	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
 	struct stat64 statbuf;
 	uint64_t psize, ashift;
-	int len = strlen(dev) + 1;
+	boolean_t label_found = B_FALSE;
 
-	if (strncmp(dev, "/dev/dsk/", 9) == 0) {
-		len++;
-		path = malloc(len);
-		(void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
-	} else {
-		path = strdup(dev);
-	}
+	(void) strlcpy(path, dev, sizeof (path));
+	if (dev[0] == '/') {
+		if (strncmp(dev, ZFS_DISK_ROOTD,
+		    strlen(ZFS_DISK_ROOTD)) == 0) {
+			(void) snprintf(path, sizeof (path), "%s%s",
+			    ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD));
+		}
+	} else if (stat64(path, &statbuf) != 0) {
+		char *s;
 
-	if ((fd = open64(path, O_RDONLY)) < 0) {
-		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
-		free(path);
-		exit(1);
+		(void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD,
+		    dev);
+		if ((s = strrchr(dev, 's')) == NULL || !isdigit(*(s + 1)))
+			(void) strlcat(path, "s0", sizeof (path));
 	}
 
-	if (fstat64(fd, &statbuf) != 0) {
+	if (stat64(path, &statbuf) != 0) {
 		(void) printf("failed to stat '%s': %s\n", path,
 		    strerror(errno));
-		free(path);
-		(void) close(fd);
 		exit(1);
 	}
 
@@ -1924,11 +2243,14 @@
 	if (S_ISBLK(statbuf.st_mode)) {
 		(void) printf("cannot use '%s': character device required\n",
 		    path);
-		free(path);
-		(void) close(fd);
 		exit(1);
 	}
 
+	if ((fd = open64(path, O_RDONLY)) < 0) {
+		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
+		exit(1);
+	}
+
 	psize = statbuf.st_size;
 	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
 
@@ -1935,23 +2257,28 @@
 	for (int l = 0; l < VDEV_LABELS; l++) {
 		nvlist_t *config = NULL;
 
-		(void) printf("--------------------------------------------\n");
-		(void) printf("LABEL %d\n", l);
-		(void) printf("--------------------------------------------\n");
+		if (!dump_opt['q']) {
+			(void) printf("------------------------------------\n");
+			(void) printf("LABEL %d\n", l);
+			(void) printf("------------------------------------\n");
+		}
 
 		if (pread64(fd, &label, sizeof (label),
 		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
-			(void) printf("failed to read label %d\n", l);
+			if (!dump_opt['q'])
+				(void) printf("failed to read label %d\n", l);
 			continue;
 		}
 
 		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
-			(void) printf("failed to unpack label %d\n", l);
+			if (!dump_opt['q'])
+				(void) printf("failed to unpack label %d\n", l);
 			ashift = SPA_MINBLOCKSHIFT;
 		} else {
 			nvlist_t *vdev_tree = NULL;
 
-			dump_nvlist(config, 4);
+			if (!dump_opt['q'])
+				dump_nvlist(config, 4);
 			if ((nvlist_lookup_nvlist(config,
 			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
 			    (nvlist_lookup_uint64(vdev_tree,
@@ -1958,15 +2285,19 @@
 			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
 				ashift = SPA_MINBLOCKSHIFT;
 			nvlist_free(config);
+			label_found = B_TRUE;
 		}
 		if (dump_opt['u'])
 			dump_label_uberblocks(&label, ashift);
 	}
 
-	free(path);
 	(void) close(fd);
+
+	return (label_found ? 0 : 2);
 }
 
+static uint64_t dataset_feature_count[SPA_FEATURES];
+
 /*ARGSUSED*/
 static int
 dump_one_dir(const char *dsname, void *arg)
@@ -1979,6 +2310,15 @@
 		(void) printf("Could not open %s, error %d\n", dsname, error);
 		return (0);
 	}
+
+	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+		if (!dmu_objset_ds(os)->ds_feature_inuse[f])
+			continue;
+		ASSERT(spa_feature_table[f].fi_flags &
+		    ZFEATURE_FLAG_PER_DATASET);
+		dataset_feature_count[f]++;
+	}
+
 	dump_dir(os);
 	dmu_objset_disown(os, FTAG);
 	fuid_table_destroy();
@@ -1989,12 +2329,14 @@
 /*
  * Block statistics.
  */
-#define	PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1)
+#define	PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
 typedef struct zdb_blkstats {
 	uint64_t zb_asize;
 	uint64_t zb_lsize;
 	uint64_t zb_psize;
 	uint64_t zb_count;
+	uint64_t zb_gangs;
+	uint64_t zb_ditto_samevdev;
 	uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
 } zdb_blkstats_t;
 
@@ -2019,6 +2361,9 @@
 	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
 	uint64_t	zcb_dedup_asize;
 	uint64_t	zcb_dedup_blocks;
+	uint64_t	zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
+	uint64_t	zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
+	    [BPE_PAYLOAD_SIZE];
 	uint64_t	zcb_start;
 	uint64_t	zcb_lastprint;
 	uint64_t	zcb_totalasize;
@@ -2042,6 +2387,7 @@
 	for (int i = 0; i < 4; i++) {
 		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
 		int t = (i & 1) ? type : ZDB_OT_TOTAL;
+		int equal;
 		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
 
 		zb->zb_asize += BP_GET_ASIZE(bp);
@@ -2048,9 +2394,45 @@
 		zb->zb_lsize += BP_GET_LSIZE(bp);
 		zb->zb_psize += BP_GET_PSIZE(bp);
 		zb->zb_count++;
-		zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++;
+
+		/*
+		 * The histogram is only big enough to record blocks up to
+		 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
+		 * "other", bucket.
+		 */
+		int idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
+		idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
+		zb->zb_psize_histogram[idx]++;
+
+		zb->zb_gangs += BP_COUNT_GANG(bp);
+
+		switch (BP_GET_NDVAS(bp)) {
+		case 2:
+			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
+			    DVA_GET_VDEV(&bp->blk_dva[1]))
+				zb->zb_ditto_samevdev++;
+			break;
+		case 3:
+			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
+			    DVA_GET_VDEV(&bp->blk_dva[1])) +
+			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
+			    DVA_GET_VDEV(&bp->blk_dva[2])) +
+			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
+			    DVA_GET_VDEV(&bp->blk_dva[2]));
+			if (equal != 0)
+				zb->zb_ditto_samevdev++;
+			break;
+		}
+
 	}
 
+	if (BP_IS_EMBEDDED(bp)) {
+		zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
+		zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
+		    [BPE_GET_PSIZE(bp)]++;
+		return;
+	}
+
 	if (dump_opt['L'])
 		return;
 
@@ -2079,12 +2461,52 @@
 	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
 }
 
+/* ARGSUSED */
+static void
+zdb_blkptr_done(zio_t *zio)
+{
+	spa_t *spa = zio->io_spa;
+	blkptr_t *bp = zio->io_bp;
+	int ioerr = zio->io_error;
+	zdb_cb_t *zcb = zio->io_private;
+	zbookmark_phys_t *zb = &zio->io_bookmark;
+
+	zio_data_buf_free(zio->io_data, zio->io_size);
+
+	mutex_enter(&spa->spa_scrub_lock);
+	spa->spa_scrub_inflight--;
+	cv_broadcast(&spa->spa_scrub_io_cv);
+
+	if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
+		char blkbuf[BP_SPRINTF_LEN];
+
+		zcb->zcb_haderrors = 1;
+		zcb->zcb_errors[ioerr]++;
+
+		if (dump_opt['b'] >= 2)
+			snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
+		else
+			blkbuf[0] = '\0';
+
+		(void) printf("zdb_blkptr_cb: "
+		    "Got error %d reading "
+		    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
+		    ioerr,
+		    (u_longlong_t)zb->zb_objset,
+		    (u_longlong_t)zb->zb_object,
+		    (u_longlong_t)zb->zb_level,
+		    (u_longlong_t)zb->zb_blkid,
+		    blkbuf);
+	}
+	mutex_exit(&spa->spa_scrub_lock);
+}
+
+/* ARGSUSED */
 static int
 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
-    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	zdb_cb_t *zcb = arg;
-	char blkbuf[BP_SPRINTF_LEN];
 	dmu_object_type_t type;
 	boolean_t is_metadata;
 
@@ -2091,6 +2513,21 @@
 	if (bp == NULL)
 		return (0);
 
+	if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
+		char blkbuf[BP_SPRINTF_LEN];
+		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
+		(void) printf("objset %llu object %llu "
+		    "level %lld offset 0x%llx %s\n",
+		    (u_longlong_t)zb->zb_objset,
+		    (u_longlong_t)zb->zb_object,
+		    (longlong_t)zb->zb_level,
+		    (u_longlong_t)blkid2offset(dnp, bp, zb),
+		    blkbuf);
+	}
+
+	if (BP_IS_HOLE(bp))
+		return (0);
+
 	type = BP_GET_TYPE(bp);
 
 	zdb_count_block(zcb, zilog, bp,
@@ -2098,10 +2535,10 @@
 
 	is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
 
-	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
-		int ioerr;
+	if (!BP_IS_EMBEDDED(bp) &&
+	    (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
 		size_t size = BP_GET_PSIZE(bp);
-		void *data = malloc(size);
+		void *data = zio_data_buf_alloc(size);
 		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
 
 		/* If it's an intent log block, failure is expected. */
@@ -2108,46 +2545,26 @@
 		if (zb->zb_level == ZB_ZIL_LEVEL)
 			flags |= ZIO_FLAG_SPECULATIVE;
 
-		ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
-		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
+		mutex_enter(&spa->spa_scrub_lock);
+		while (spa->spa_scrub_inflight > max_inflight)
+			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
+		spa->spa_scrub_inflight++;
+		mutex_exit(&spa->spa_scrub_lock);
 
-		free(data);
-		if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
-			zcb->zcb_haderrors = 1;
-			zcb->zcb_errors[ioerr]++;
-
-			if (dump_opt['b'] >= 2)
-				sprintf_blkptr(blkbuf, bp);
-			else
-				blkbuf[0] = '\0';
-
-			(void) printf("zdb_blkptr_cb: "
-			    "Got error %d reading "
-			    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
-			    ioerr,
-			    (u_longlong_t)zb->zb_objset,
-			    (u_longlong_t)zb->zb_object,
-			    (u_longlong_t)zb->zb_level,
-			    (u_longlong_t)zb->zb_blkid,
-			    blkbuf);
-		}
+		zio_nowait(zio_read(NULL, spa, bp, data, size,
+		    zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
 	}
 
 	zcb->zcb_readfails = 0;
 
-	if (dump_opt['b'] >= 5) {
-		sprintf_blkptr(blkbuf, bp);
-		(void) printf("objset %llu object %llu "
-		    "level %lld offset 0x%llx %s\n",
-		    (u_longlong_t)zb->zb_objset,
-		    (u_longlong_t)zb->zb_object,
-		    (longlong_t)zb->zb_level,
-		    (u_longlong_t)blkid2offset(dnp, bp, zb),
-		    blkbuf);
-	}
+	/* only call gethrtime() every 100 blocks */
+	static int iters;
+	if (++iters > 100)
+		iters = 0;
+	else
+		return (0);
 
-	if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) &&
-	    gethrtime() > zcb->zcb_lastprint + NANOSEC) {
+	if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
 		uint64_t now = gethrtime();
 		char buf[10];
 		uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
@@ -2156,6 +2573,9 @@
 		int sec_remaining =
 		    (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
 
+		/* make sure nicenum has enough space */
+		CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ);
+
 		zfs_nicenum(bytes, buf, sizeof (buf));
 		(void) fprintf(stderr,
 		    "\r%5s completed (%4dMB/s) "
@@ -2172,39 +2592,16 @@
 }
 
 static void
-zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
+zdb_leak(void *arg, uint64_t start, uint64_t size)
 {
-	vdev_t *vd = sm->sm_ppd;
+	vdev_t *vd = arg;
 
 	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
 	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
 }
 
-/* ARGSUSED */
-static void
-zdb_space_map_load(space_map_t *sm)
-{
-}
-
-static void
-zdb_space_map_unload(space_map_t *sm)
-{
-	space_map_vacate(sm, zdb_leak, sm);
-}
-
-/* ARGSUSED */
-static void
-zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
-{
-}
-
-static space_map_ops_t zdb_space_map_ops = {
-	zdb_space_map_load,
-	zdb_space_map_unload,
-	NULL,	/* alloc */
-	zdb_space_map_claim,
-	NULL,	/* free */
-	NULL	/* maxsize */
+static metaslab_ops_t zdb_metaslab_ops = {
+	NULL	/* alloc */
 };
 
 static void
@@ -2254,19 +2651,47 @@
 
 	if (!dump_opt['L']) {
 		vdev_t *rvd = spa->spa_root_vdev;
-		for (int c = 0; c < rvd->vdev_children; c++) {
+		for (uint64_t c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *vd = rvd->vdev_child[c];
-			for (int m = 0; m < vd->vdev_ms_count; m++) {
+			for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
 				metaslab_t *msp = vd->vdev_ms[m];
 				mutex_enter(&msp->ms_lock);
-				space_map_unload(msp->ms_map);
-				VERIFY(space_map_load(msp->ms_map,
-				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
-				    spa->spa_meta_objset) == 0);
-				msp->ms_map->sm_ppd = vd;
+				metaslab_unload(msp);
+
+				/*
+				 * For leak detection, we overload the metaslab
+				 * ms_tree to contain allocated segments
+				 * instead of free segments. As a result,
+				 * we can't use the normal metaslab_load/unload
+				 * interfaces.
+				 */
+				if (msp->ms_sm != NULL) {
+					(void) fprintf(stderr,
+					    "\rloading space map for "
+					    "vdev %llu of %llu, "
+					    "metaslab %llu of %llu ...",
+					    (longlong_t)c,
+					    (longlong_t)rvd->vdev_children,
+					    (longlong_t)m,
+					    (longlong_t)vd->vdev_ms_count);
+
+					msp->ms_ops = &zdb_metaslab_ops;
+
+					/*
+					 * We don't want to spend the CPU
+					 * manipulating the size-ordered
+					 * tree, so clear the range_tree
+					 * ops.
+					 */
+					msp->ms_tree->rt_ops = NULL;
+					VERIFY0(space_map_load(msp->ms_sm,
+					    msp->ms_tree, SM_ALLOC));
+					msp->ms_loaded = B_TRUE;
+				}
 				mutex_exit(&msp->ms_lock);
 			}
 		}
+		(void) fprintf(stderr, "\n");
 	}
 
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
@@ -2286,7 +2711,20 @@
 			for (int m = 0; m < vd->vdev_ms_count; m++) {
 				metaslab_t *msp = vd->vdev_ms[m];
 				mutex_enter(&msp->ms_lock);
-				space_map_unload(msp->ms_map);
+
+				/*
+				 * The ms_tree has been overloaded to
+				 * contain allocated segments. Now that we
+				 * finished traversing all blocks, any
+				 * block that remains in the ms_tree
+				 * represents an allocated block that we
+				 * did not claim during the traversal.
+				 * Claimed blocks would have been removed
+				 * from the ms_tree.
+				 */
+				range_tree_vacate(msp->ms_tree, zdb_leak, vd);
+				msp->ms_loaded = B_FALSE;
+
 				mutex_exit(&msp->ms_lock);
 			}
 		}
@@ -2301,7 +2739,7 @@
 
 	if (dump_opt['b'] >= 5) {
 		char blkbuf[BP_SPRINTF_LEN];
-		sprintf_blkptr(blkbuf, bp);
+		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
 		(void) printf("[%s] %s\n",
 		    "deferred free", blkbuf);
 	}
@@ -2316,7 +2754,7 @@
 	zdb_blkstats_t *zb, *tzb;
 	uint64_t norm_alloc, norm_space, total_alloc, total_found;
 	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
-	int leaks = 0;
+	boolean_t leaks = B_FALSE;
 
 	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
 	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
@@ -2344,8 +2782,7 @@
 		(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
 		    count_block_cb, &zcb, NULL);
 	}
-	if (spa_feature_is_active(spa,
-	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+	if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
 		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
 		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
 		    &zcb, NULL));
@@ -2358,6 +2795,20 @@
 	zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
 	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
 
+	/*
+	 * If we've traversed the data blocks then we need to wait for those
+	 * I/Os to complete. We leverage "The Godfather" zio to wait on
+	 * all async I/Os to complete.
+	 */
+	if (dump_opt['c']) {
+		for (int i = 0; i < max_ncpus; i++) {
+			(void) zio_wait(spa->spa_async_zio_root[i]);
+			spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
+			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
+			    ZIO_FLAG_GODFATHER);
+		}
+	}
+
 	if (zcb.zcb_haderrors) {
 		(void) printf("\nError counts:\n\n");
 		(void) printf("\t%5s  %s\n", "errno", "count");
@@ -2393,7 +2844,7 @@
 		    (u_longlong_t)total_alloc,
 		    (dump_opt['L']) ? "unreachable" : "leaked",
 		    (longlong_t)(total_alloc - total_found));
-		leaks = 1;
+		leaks = B_TRUE;
 	}
 
 	if (tzb->zb_count == 0)
@@ -2402,6 +2853,8 @@
 	(void) printf("\n");
 	(void) printf("\tbp count:      %10llu\n",
 	    (u_longlong_t)tzb->zb_count);
+	(void) printf("\tganged count:  %10llu\n",
+	    (longlong_t)tzb->zb_gangs);
 	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
 	    (u_longlong_t)tzb->zb_lsize,
 	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
@@ -2423,6 +2876,28 @@
 	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
 	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
 
+	for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
+		if (zcb.zcb_embedded_blocks[i] == 0)
+			continue;
+		(void) printf("\n");
+		(void) printf("\tadditional, non-pointer bps of type %u: "
+		    "%10llu\n",
+		    i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
+
+		if (dump_opt['b'] >= 3) {
+			(void) printf("\t number of (compressed) bytes:  "
+			    "number of bps\n");
+			dump_histogram(zcb.zcb_embedded_histogram[i],
+			    sizeof (zcb.zcb_embedded_histogram[i]) /
+			    sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
+		}
+	}
+
+	if (tzb->zb_ditto_samevdev != 0) {
+		(void) printf("\tDittoed blocks on same vdev: %llu\n",
+		    (longlong_t)tzb->zb_ditto_samevdev);
+	}
+
 	if (dump_opt['b'] >= 2) {
 		int l, t, level;
 		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
@@ -2430,9 +2905,17 @@
 
 		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
 			char csize[32], lsize[32], psize[32], asize[32];
-			char avg[32];
+			char avg[32], gang[32];
 			char *typename;
 
+			/* make sure nicenum has enough space */
+			CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ);
+			CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
+			CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ);
+			CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
+			CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ);
+			CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ);
+
 			if (t < DMU_OT_NUMTYPES)
 				typename = dmu_ot[t].ot_name;
 			else
@@ -2466,11 +2949,17 @@
 				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
 					continue;
 
-				zdb_nicenum(zb->zb_count, csize);
-				zdb_nicenum(zb->zb_lsize, lsize);
-				zdb_nicenum(zb->zb_psize, psize);
-				zdb_nicenum(zb->zb_asize, asize);
-				zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
+				zdb_nicenum(zb->zb_count, csize,
+				    sizeof (csize));
+				zdb_nicenum(zb->zb_lsize, lsize,
+				    sizeof (lsize));
+				zdb_nicenum(zb->zb_psize, psize,
+				    sizeof (psize));
+				zdb_nicenum(zb->zb_asize, asize,
+				    sizeof (asize));
+				zdb_nicenum(zb->zb_asize / zb->zb_count, avg,
+				    sizeof (avg));
+				zdb_nicenum(zb->zb_gangs, gang, sizeof (gang));
 
 				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
 				    "\t%5.2f\t%6.2f\t",
@@ -2484,12 +2973,17 @@
 					(void) printf("    L%d %s\n",
 					    level, typename);
 
+				if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
+					(void) printf("\t number of ganged "
+					    "blocks: %s\n", gang);
+				}
+
 				if (dump_opt['b'] >= 4) {
 					(void) printf("psize "
 					    "(in 512-byte sectors): "
 					    "number of blocks\n");
 					dump_histogram(zb->zb_psize_histogram,
-					    PSIZE_HISTO_SIZE);
+					    PSIZE_HISTO_SIZE, 0);
 				}
 			}
 		}
@@ -2518,13 +3012,13 @@
 /* ARGSUSED */
 static int
 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
-    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	avl_tree_t *t = arg;
 	avl_index_t where;
 	zdb_ddt_entry_t *zdde, zdde_search;
 
-	if (bp == NULL)
+	if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
 		return (0);
 
 	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
@@ -2531,7 +3025,7 @@
 		(void) printf("traversing objset %llu, %llu objects, "
 		    "%lu blocks so far\n",
 		    (u_longlong_t)zb->zb_objset,
-		    (u_longlong_t)bp->blk_fill,
+		    (u_longlong_t)BP_GET_FILL(bp),
 		    avl_numnodes(t));
 	}
 
@@ -2591,7 +3085,8 @@
 		dds.dds_ref_psize = zdde->zdde_ref_psize;
 		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
 
-		ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
+		ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
+		    &dds, 0);
 
 		umem_free(zdde, sizeof (*zdde));
 	}
@@ -2634,19 +3129,22 @@
 
 	if (dump_opt['d'] > 2 || dump_opt['m'])
 		dump_metaslabs(spa);
+	if (dump_opt['M'])
+		dump_metaslab_groups(spa);
 
 	if (dump_opt['d'] || dump_opt['i']) {
 		dump_dir(dp->dp_meta_objset);
 		if (dump_opt['d'] >= 3) {
-			dump_bpobj(&spa->spa_deferred_bpobj,
+			dump_full_bpobj(&spa->spa_deferred_bpobj,
 			    "Deferred frees", 0);
 			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
-				dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
+				dump_full_bpobj(
+				    &spa->spa_dsl_pool->dp_free_bpobj,
 				    "Pool snapshot frees", 0);
 			}
 
 			if (spa_feature_is_active(spa,
-			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+			    SPA_FEATURE_ASYNC_DESTROY)) {
 				dump_bptree(spa->spa_meta_objset,
 				    spa->spa_dsl_pool->dp_bptree_obj,
 				    "Pool dataset frees");
@@ -2655,10 +3153,38 @@
 		}
 		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
 		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
+
+		for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+			uint64_t refcount;
+
+			if (!(spa_feature_table[f].fi_flags &
+			    ZFEATURE_FLAG_PER_DATASET)) {
+				ASSERT0(dataset_feature_count[f]);
+				continue;
+			}
+			(void) feature_get_refcount(spa,
+			    &spa_feature_table[f], &refcount);
+			if (dataset_feature_count[f] != refcount) {
+				(void) printf("%s feature refcount mismatch: "
+				    "%lld datasets != %lld refcount\n",
+				    spa_feature_table[f].fi_uname,
+				    (longlong_t)dataset_feature_count[f],
+				    (longlong_t)refcount);
+				rc = 2;
+			} else {
+				(void) printf("Verified %s feature refcount "
+				    "of %llu is correct\n",
+				    spa_feature_table[f].fi_uname,
+				    (longlong_t)refcount);
+			}
+		}
 	}
-	if (dump_opt['b'] || dump_opt['c'])
+	if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
 		rc = dump_block_stats(spa);
 
+	if (rc == 0)
+		rc = verify_spacemap_refcounts(spa);
+
 	if (dump_opt['s'])
 		show_pool_stats(spa);
 
@@ -2665,8 +3191,10 @@
 	if (dump_opt['h'])
 		dump_history(spa);
 
-	if (rc != 0)
+	if (rc != 0) {
+		dump_debug_buffer();
 		exit(rc);
+	}
 }
 
 #define	ZDB_FLAG_CHECKSUM	0x0001
@@ -2688,7 +3216,7 @@
 	if (flags & ZDB_FLAG_BSWAP)
 		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
 
-	sprintf_blkptr(blkbuf, bp);
+	snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
 	(void) printf("%s\n", blkbuf);
 }
 
@@ -2884,6 +3412,7 @@
 				free(dup);
 				return;
 			}
+			i += p - &flagstr[i + 1]; /* skip over the number */
 		}
 	}
 
@@ -3042,7 +3571,7 @@
 	nvlist_t *match = NULL;
 	char *name = NULL;
 	char *sepp = NULL;
-	char sep;
+	char sep = '\0';
 	int count = 0;
 	importargs_t args = { 0 };
 
@@ -3118,6 +3647,8 @@
 	nvlist_t *policy = NULL;
 	uint64_t max_txg = UINT64_MAX;
 	int rewind = ZPOOL_NEVER_REWIND;
+	char *spa_config_path_env;
+	boolean_t target_is_spa = B_TRUE;
 
 	(void) setrlimit(RLIMIT_NOFILE, &rl);
 	(void) enable_extended_FILE_stdio(-1, -1);
@@ -3124,7 +3655,17 @@
 
 	dprintf_setup(&argc, argv);
 
-	while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
+	/*
+	 * If there is an environment variable SPA_CONFIG_PATH it overrides
+	 * default spa_config_path setting. If -U flag is specified it will
+	 * override this environment variable settings once again.
+	 */
+	spa_config_path_env = getenv("SPA_CONFIG_PATH");
+	if (spa_config_path_env != NULL)
+		spa_config_path = spa_config_path_env;
+
+	while ((c = getopt(argc, argv,
+	    "bcdhilmMI:suCDRSAFLXx:evp:t:U:PGo:q")) != -1) {
 		switch (c) {
 		case 'b':
 		case 'c':
@@ -3137,8 +3678,10 @@
 		case 'u':
 		case 'C':
 		case 'D':
+		case 'M':
 		case 'R':
 		case 'S':
+		case 'G':
 			dump_opt[c]++;
 			dump_all = 0;
 			break;
@@ -3148,10 +3691,17 @@
 		case 'X':
 		case 'e':
 		case 'P':
+		case 'q':
 			dump_opt[c]++;
 			break;
-		case 'v':
-			verbose++;
+		case 'I':
+			max_inflight = strtoull(optarg, NULL, 0);
+			if (max_inflight == 0) {
+				(void) fprintf(stderr, "maximum number "
+				    "of inflight I/Os must be greater "
+				    "than 0\n");
+				usage();
+			}
 			break;
 		case 'p':
 			if (searchdirs == NULL) {
@@ -3179,6 +3729,17 @@
 		case 'U':
 			spa_config_path = optarg;
 			break;
+		case 'v':
+			verbose++;
+			break;
+		case 'x':
+			vn_dumpdir = optarg;
+			break;
+		case 'o':
+			error = set_global_var(optarg);
+			if (error != 0)
+				usage();
+			break;
 		default:
 			usage();
 			break;
@@ -3190,9 +3751,23 @@
 		usage();
 	}
 
+	/*
+	 * ZDB does not typically re-read blocks; therefore limit the ARC
+	 * to 256 MB, which can be used entirely for metadata.
+	 */
+	zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
+
+	/*
+	 * "zdb -c" uses checksum-verifying scrub i/os which are async reads.
+	 * "zdb -b" uses traversal prefetch which uses async reads.
+	 * For good performance, let several of them be active at once.
+	 */
+	zfs_vdev_async_read_max_active = 10;
+
 	kernel_init(FREAD);
 	g_zfs = libzfs_init();
-	ASSERT(g_zfs != NULL);
+	if (g_zfs == NULL)
+		fatal("Fail to initialize zfs");
 
 	if (dump_all)
 		verbose = MAX(verbose, 1);
@@ -3220,10 +3795,8 @@
 		usage();
 	}
 
-	if (dump_opt['l']) {
-		dump_label(argv[0]);
-		return (0);
-	}
+	if (dump_opt['l'])
+		return (dump_label(argv[0]));
 
 	if (dump_opt['X'] || dump_opt['F'])
 		rewind = ZPOOL_DO_REWIND |
@@ -3260,8 +3833,23 @@
 		}
 	}
 
+	if (strpbrk(target, "/@") != NULL) {
+		size_t targetlen;
+
+		target_is_spa = B_FALSE;
+		/*
+		 * Remove any trailing slash.  Later code would get confused
+		 * by it, but we want to allow it so that "pool/" can
+		 * indicate that we want to dump the topmost filesystem,
+		 * rather than the whole pool.
+		 */
+		targetlen = strlen(target);
+		if (targetlen != 0 && target[targetlen - 1] == '/')
+			target[targetlen - 1] = '\0';
+	}
+
 	if (error == 0) {
-		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
+		if (target_is_spa || dump_opt['R']) {
 			error = spa_open_rewind(target, &spa, FTAG, policy,
 			    NULL);
 			if (error) {
@@ -3333,6 +3921,8 @@
 	fuid_table_destroy();
 	sa_loaded = B_FALSE;
 
+	dump_debug_buffer();
+
 	libzfs_fini(g_zfs);
 	kernel_fini();
 

Modified: trunk/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -24,6 +25,10 @@
  */
 
 /*
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ */
+
+/*
  * Print intent log header and statistics.
  */
 
@@ -47,7 +52,7 @@
 {
 	char blkbuf[BP_SPRINTF_LEN];
 
-	sprintf_blkptr(blkbuf, bp);
+	snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
 	(void) printf("%s%s\n", prefix, blkbuf);
 }
 
@@ -118,7 +123,7 @@
 {
 	char *data, *dlimit;
 	blkptr_t *bp = &lr->lr_blkptr;
-	zbookmark_t zb;
+	zbookmark_phys_t zb;
 	char buf[SPA_MAXBLOCKSIZE];
 	int verbose = MAX(dump_opt['d'], dump_opt['i']);
 	int error;
@@ -132,6 +137,7 @@
 
 	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
 		(void) printf("%shas blkptr, %s\n", prefix,
+		    !BP_IS_HOLE(bp) &&
 		    bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
 		    "will claim" : "won't claim");
 		print_log_bp(bp, prefix);
@@ -139,8 +145,6 @@
 		if (BP_IS_HOLE(bp)) {
 			(void) printf("\t\t\tLSIZE 0x%llx\n",
 			    (u_longlong_t)BP_GET_LSIZE(bp));
-		}
-		if (bp->blk_birth == 0) {
 			bzero(buf, sizeof (buf));
 			(void) printf("%s<hole>\n", prefix);
 			return;
@@ -313,7 +317,8 @@
 
 	if (verbose >= 5) {
 		(void) strcpy(blkbuf, ", ");
-		sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
+		snprintf_blkptr(blkbuf + strlen(blkbuf),
+		    sizeof (blkbuf) - strlen(blkbuf), bp);
 	} else {
 		blkbuf[0] = '\0';
 	}
@@ -361,7 +366,7 @@
 	int verbose = MAX(dump_opt['d'], dump_opt['i']);
 	int i;
 
-	if (zh->zh_log.blk_birth == 0 || verbose < 1)
+	if (BP_IS_HOLE(&zh->zh_log) || verbose < 1)
 		return;
 
 	(void) printf("\n    ZIL header: claim_txg %llu, "

Modified: trunk/cddl/contrib/opensolaris/cmd/zhack/zhack.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zhack/zhack.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zhack/zhack.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -20,7 +21,7 @@
  */
 
 /*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
@@ -48,7 +49,6 @@
 #include <sys/zio_compress.h>
 #include <sys/zfeature.h>
 #include <sys/dmu_tx.h>
-#undef ZFS_MAXNAMELEN
 #undef verify
 #include <libzfs.h>
 
@@ -85,10 +85,15 @@
 
 
 static void
-fatal(const char *fmt, ...)
+fatal(spa_t *spa, void *tag, const char *fmt, ...)
 {
 	va_list ap;
 
+	if (spa != NULL) {
+		spa_close(spa, tag);
+		(void) spa_export(g_pool, NULL, B_TRUE, B_FALSE);
+	}
+
 	va_start(ap, fmt);
 	(void) fprintf(stderr, "%s: ", cmdname);
 	(void) vfprintf(stderr, fmt, ap);
@@ -159,13 +164,14 @@
 			g_importargs.can_be_active = B_TRUE;
 			if (zpool_search_import(g_zfs, &g_importargs) != NULL ||
 			    spa_open(target, &spa, FTAG) == 0) {
-				fatal("cannot import '%s': pool is active; run "
-				    "\"zpool export %s\" first\n",
-				    g_pool, g_pool);
+				fatal(spa, FTAG, "cannot import '%s': pool is "
+				    "active; run " "\"zpool export %s\" "
+				    "first\n", g_pool, g_pool);
 			}
 		}
 
-		fatal("cannot import '%s': no such pool available\n", g_pool);
+		fatal(NULL, FTAG, "cannot import '%s': no such pool "
+		    "available\n", g_pool);
 	}
 
 	elem = nvlist_next_nvpair(pools, NULL);
@@ -186,7 +192,8 @@
 		error = 0;
 
 	if (error)
-		fatal("can't import '%s': %s", name, strerror(error));
+		fatal(NULL, FTAG, "can't import '%s': %s", name,
+		    strerror(error));
 }
 
 static void
@@ -201,10 +208,11 @@
 	zfeature_checks_disable = B_FALSE;
 
 	if (err != 0)
-		fatal("cannot open '%s': %s", target, strerror(err));
+		fatal(*spa, FTAG, "cannot open '%s': %s", target,
+		    strerror(err));
 	if (spa_version(*spa) < SPA_VERSION_FEATURES) {
-		fatal("'%s' has version %d, features not enabled", target,
-		    (int)spa_version(*spa));
+		fatal(*spa, FTAG, "'%s' has version %d, features not enabled",
+		    target, (int)spa_version(*spa));
 	}
 }
 
@@ -269,6 +277,9 @@
 	dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
 	dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
 	dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
+	if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
+		dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg");
+	}
 	dump_mos(spa);
 
 	spa_close(spa, FTAG);
@@ -275,15 +286,16 @@
 }
 
 static void
-feature_enable_sync(void *arg, dmu_tx_t *tx)
+zhack_feature_enable_sync(void *arg, dmu_tx_t *tx)
 {
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	zfeature_info_t *feature = arg;
 
-	spa_feature_enable(spa, feature, tx);
+	feature_enable_sync(spa, feature, tx);
+
 	spa_history_log_internal(spa, "zhack enable feature", tx,
-	    "name=%s can_readonly=%u",
-	    feature->fi_guid, feature->fi_can_readonly);
+	    "guid=%s flags=%x",
+	    feature->fi_guid, feature->fi_flags);
 }
 
 static void
@@ -294,7 +306,7 @@
 	spa_t *spa;
 	objset_t *mos;
 	zfeature_info_t feature;
-	zfeature_info_t *nodeps[] = { NULL };
+	spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
 
 	/*
 	 * Features are not added to the pool's label until their refcounts
@@ -302,15 +314,15 @@
 	 */
 	desc = NULL;
 	feature.fi_uname = "zhack";
-	feature.fi_mos = B_FALSE;
-	feature.fi_can_readonly = B_FALSE;
+	feature.fi_flags = 0;
 	feature.fi_depends = nodeps;
+	feature.fi_feature = SPA_FEATURE_NONE;
 
 	optind = 1;
 	while ((c = getopt(argc, argv, "rmd:")) != -1) {
 		switch (c) {
 		case 'r':
-			feature.fi_can_readonly = B_TRUE;
+			feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
 			break;
 		case 'd':
 			desc = strdup(optarg);
@@ -336,18 +348,19 @@
 	feature.fi_guid = argv[1];
 
 	if (!zfeature_is_valid_guid(feature.fi_guid))
-		fatal("invalid feature guid: %s", feature.fi_guid);
+		fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
 
 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
 	mos = spa->spa_meta_objset;
 
-	if (0 == zfeature_lookup_guid(feature.fi_guid, NULL))
-		fatal("'%s' is a real feature, will not enable");
+	if (zfeature_is_supported(feature.fi_guid))
+		fatal(spa, FTAG, "'%s' is a real feature, will not enable");
 	if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
-		fatal("feature already enabled: %s", feature.fi_guid);
+		fatal(spa, FTAG, "feature already enabled: %s",
+		    feature.fi_guid);
 
 	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
-	    feature_enable_sync, &feature, 5));
+	    zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL));
 
 	spa_close(spa, FTAG);
 
@@ -359,8 +372,10 @@
 {
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	zfeature_info_t *feature = arg;
+	uint64_t refcount;
 
-	spa_feature_incr(spa, feature, tx);
+	VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
+	feature_sync(spa, feature, refcount + 1, tx);
 	spa_history_log_internal(spa, "zhack feature incr", tx,
 	    "name=%s", feature->fi_guid);
 }
@@ -370,8 +385,10 @@
 {
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	zfeature_info_t *feature = arg;
+	uint64_t refcount;
 
-	spa_feature_decr(spa, feature, tx);
+	VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
+	feature_sync(spa, feature, refcount - 1, tx);
 	spa_history_log_internal(spa, "zhack feature decr", tx,
 	    "name=%s", feature->fi_guid);
 }
@@ -385,7 +402,7 @@
 	spa_t *spa;
 	objset_t *mos;
 	zfeature_info_t feature;
-	zfeature_info_t *nodeps[] = { NULL };
+	spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
 
 	/*
 	 * fi_desc does not matter here because it was written to disk
@@ -394,15 +411,16 @@
 	 * disk later.
 	 */
 	feature.fi_uname = "zhack";
-	feature.fi_mos = B_FALSE;
+	feature.fi_flags = 0;
 	feature.fi_desc = NULL;
 	feature.fi_depends = nodeps;
+	feature.fi_feature = SPA_FEATURE_NONE;
 
 	optind = 1;
 	while ((c = getopt(argc, argv, "md")) != -1) {
 		switch (c) {
 		case 'm':
-			feature.fi_mos = B_TRUE;
+			feature.fi_flags |= ZFEATURE_FLAG_MOS;
 			break;
 		case 'd':
 			decr = B_TRUE;
@@ -423,29 +441,38 @@
 	feature.fi_guid = argv[1];
 
 	if (!zfeature_is_valid_guid(feature.fi_guid))
-		fatal("invalid feature guid: %s", feature.fi_guid);
+		fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
 
 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
 	mos = spa->spa_meta_objset;
 
-	if (0 == zfeature_lookup_guid(feature.fi_guid, NULL))
-		fatal("'%s' is a real feature, will not change refcount");
+	if (zfeature_is_supported(feature.fi_guid)) {
+		fatal(spa, FTAG,
+		    "'%s' is a real feature, will not change refcount");
+	}
 
 	if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
 	    feature.fi_guid)) {
-		feature.fi_can_readonly = B_FALSE;
+		feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT;
 	} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
 	    feature.fi_guid)) {
-		feature.fi_can_readonly = B_TRUE;
+		feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
 	} else {
-		fatal("feature is not enabled: %s", feature.fi_guid);
+		fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid);
 	}
 
-	if (decr && !spa_feature_is_active(spa, &feature))
-		fatal("feature refcount already 0: %s", feature.fi_guid);
+	if (decr) {
+		uint64_t count;
+		if (feature_get_refcount_from_disk(spa, &feature,
+		    &count) == 0 && count != 0) {
+			fatal(spa, FTAG, "feature refcount already 0: %s",
+			    feature.fi_guid);
+		}
+	}
 
 	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
-	    decr ? feature_decr_sync : feature_incr_sync, &feature, 5));
+	    decr ? feature_decr_sync : feature_incr_sync, &feature,
+	    5, ZFS_SPACE_CHECK_NORMAL));
 
 	spa_close(spa, FTAG);
 }
@@ -530,8 +557,8 @@
 		usage();
 	}
 
-	if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_TRUE) != 0) {
-		fatal("pool export failed; "
+	if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) {
+		fatal(NULL, FTAG, "pool export failed; "
 		    "changes may not be committed to disk\n");
 	}
 

Modified: trunk/cddl/contrib/opensolaris/cmd/zinject/translate.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zinject/translate.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zinject/translate.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/zinject/zinject.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zinject/zinject.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zinject/zinject.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -20,7 +21,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  */
 
 /*
@@ -148,6 +149,7 @@
 #include <sys/mount.h>
 
 #include <libzfs.h>
+#include <libzfs_compat.h>
 
 #undef verify	/* both libzfs.h and zfs_context.h want to define this */
 
@@ -228,6 +230,7 @@
 	    "\t\tall records if 'all' is specificed.\n"
 	    "\n"
 	    "\tzinject -p <function name> pool\n"
+	    "\n"
 	    "\t\tInject a panic fault at the specified function. Only \n"
 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
@@ -234,6 +237,7 @@
 	    "\n"
 	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
 	    "\t    [-T <read|write|free|claim|all> pool\n"
+	    "\n"
 	    "\t\tInject a fault into a particular device or the device's\n"
 	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
 	    "\t\t'pad1', or 'pad2'.\n"
@@ -240,9 +244,43 @@
 	    "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
 	    "\n"
 	    "\tzinject -d device -A <degrade|fault> pool\n"
+	    "\n"
 	    "\t\tPerform a specific action on a particular device\n"
 	    "\n"
+	    "\tzinject -d device -D latency:lanes pool\n"
+	    "\n"
+	    "\t\tAdd an artificial delay to IO requests on a particular\n"
+	    "\t\tdevice, such that the requests take a minimum of 'latency'\n"
+	    "\t\tmilliseconds to complete. Each delay has an associated\n"
+	    "\t\tnumber of 'lanes' which defines the number of concurrent\n"
+	    "\t\tIO requests that can be processed.\n"
+	    "\n"
+	    "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
+	    "\t\tthe device will only be able to service a single IO request\n"
+	    "\t\tat a time with each request taking 10 ms to complete. So,\n"
+	    "\t\tif only a single request is submitted every 10 ms, the\n"
+	    "\t\taverage latency will be 10 ms; but if more than one request\n"
+	    "\t\tis submitted every 10 ms, the average latency will be more\n"
+	    "\t\tthan 10 ms.\n"
+	    "\n"
+	    "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
+	    "\t\tlanes (-D 10:2), then the device will be able to service\n"
+	    "\t\ttwo requests at a time, each with a minimum latency of\n"
+	    "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
+	    "\t\tthe average latency will be 10 ms; but if more than two\n"
+	    "\t\trequests are submitted every 10 ms, the average latency\n"
+	    "\t\twill be more than 10 ms.\n"
+	    "\n"
+	    "\t\tAlso note, these delays are additive. So two invocations\n"
+	    "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
+	    "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
+	    "\t\tlanes with differing target latencies. For example, an\n"
+	    "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
+	    "\t\tcreate 3 lanes on the device; one lane with a latency\n"
+	    "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
+	    "\n"
 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
+	    "\n"
 	    "\t\tCause the pool to stop writing blocks yet not\n"
 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
 	    "\t\tthat fails to honor cache flush requests.\n"
@@ -356,6 +394,9 @@
 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
 		return (0);
 
+	if (record->zi_cmd == ZINJECT_DELAY_IO)
+		return (0);
+
 	if (*count == 0) {
 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "GUID");
 		(void) printf("---  ---------------  ----------------\n");
@@ -370,6 +411,35 @@
 }
 
 static int
+print_delay_handler(int id, const char *pool, zinject_record_t *record,
+    void *data)
+{
+	int *count = data;
+
+	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
+		return (0);
+
+	if (record->zi_cmd != ZINJECT_DELAY_IO)
+		return (0);
+
+	if (*count == 0) {
+		(void) printf("%3s  %-15s  %-15s  %-15s  %s\n",
+		    "ID", "POOL", "DELAY (ms)", "LANES", "GUID");
+		(void) printf("---  ---------------  ---------------  "
+		    "---------------  ----------------\n");
+	}
+
+	*count += 1;
+
+	(void) printf("%3d  %-15s  %-15llu  %-15llu  %llx\n", id, pool,
+	    (u_longlong_t)NSEC2MSEC(record->zi_timer),
+	    (u_longlong_t)record->zi_nlanes,
+	    (u_longlong_t)record->zi_guid);
+
+	return (0);
+}
+
+static int
 print_panic_handler(int id, const char *pool, zinject_record_t *record,
     void *data)
 {
@@ -406,6 +476,13 @@
 		count = 0;
 	}
 
+	(void) iter_handlers(print_delay_handler, &count);
+	if (count > 0) {
+		total += count;
+		(void) printf("\n");
+		count = 0;
+	}
+
 	(void) iter_handlers(print_data_handler, &count);
 	if (count > 0) {
 		total += count;
@@ -548,6 +625,35 @@
 	return (1);
 }
 
+static int
+parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
+{
+	unsigned long scan_delay;
+	unsigned long scan_nlanes;
+
+	if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
+		return (1);
+
+	/*
+	 * We explicitly disallow a delay of zero here, because we key
+	 * off this value being non-zero in translate_device(), to
+	 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
+	 */
+	if (scan_delay == 0)
+		return (1);
+
+	/*
+	 * The units for the CLI delay parameter is milliseconds, but
+	 * the data passed to the kernel is interpreted as nanoseconds.
+	 * Thus we scale the milliseconds to nanoseconds here, and this
+	 * nanosecond value is used to pass the delay to the kernel.
+	 */
+	*delay = MSEC2NSEC(scan_delay);
+	*nlanes = scan_nlanes;
+
+	return (0);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -631,8 +737,9 @@
 			device = optarg;
 			break;
 		case 'D':
-			record.zi_timer = strtoull(optarg, &end, 10);
-			if (errno != 0 || *end != '\0') {
+			ret = parse_delay(optarg, &record.zi_timer,
+			    &record.zi_nlanes);
+			if (ret != 0) {
 				(void) fprintf(stderr, "invalid i/o delay "
 				    "value: '%s'\n", optarg);
 				usage();

Modified: trunk/cddl/contrib/opensolaris/cmd/zinject/zinject.h
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zinject/zinject.h	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zinject/zinject.h	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/zlook/zlook.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zlook/zlook.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zlook/zlook.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7	2018-06-02 16:07:17 UTC (rev 10228)
@@ -19,10 +19,11 @@
 .\"
 .\" Copyright (c) 2012 by Delphix. All rights reserved.
 .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2013, Joyent, Inc. All rights reserved.
 .\"
-.\" $FreeBSD: release/9.2.0/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 247309 2013-02-26 05:58:05Z delphij $
-.\"
-.Dd February 8, 2013
+.\" $FreeBSD: stable/10/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 276081 2014-12-22 20:58:51Z delphij $
+.\" $MidnightBSD$
+.Dd November 10, 2014
 .Dt ZPOOL-FEATURES 7
 .Os
 .Sh NAME
@@ -186,6 +187,23 @@
 .Sy active
 while there are any filesystems, volumes, or snapshots which were created
 after enabling this feature.
+.It Sy filesystem_limits
+.Bl -column "READ\-ONLY COMPATIBLE" "com.joyent:filesystem_limits"
+.It GUID Ta com.joyent:filesystem_limits
+.It READ\-ONLY COMPATIBLE Ta yes
+.It DEPENDENCIES Ta extensible_dataset
+.El
+.Pp
+This feature enables filesystem and snapshot limits.
+These limits can be used
+to control how many filesystems and/or snapshots can be created at the point in
+the tree on which the limits are set.
+.Pp
+This feature is
+.Sy active
+once either of the limit properties has been
+set on a dataset.
+Once activated the feature is never deactivated.
 .It Sy lz4_compress
 .Bl -column "READ\-ONLY COMPATIBLE" "org.illumos:lz4_compress"
 .It GUID Ta org.illumos:lz4_compress
@@ -217,19 +235,226 @@
 compression on any dataset on the
 pool using the
 .Xr zfs 8
-command. Please note that doing so will
-immediately activate the
+command.
+Also, all newly written metadata
+will be compressed with
+.Sy lz4
+algorithm.
+Since this feature is not read-only compatible, this
+operation will render the pool unimportable on systems without support
+for the
 .Sy lz4_compress
-feature on the underlying
-pool (even before any data is written). Since this feature is not
-read-only compatible, this operation will render the pool unimportable
-on systems without support for the
-.Sy lz4_compress
-feature. At the
-moment, this operation cannot be reversed. Booting off of
+feature.
+Booting off of
 .Sy lz4
 -compressed root pools is supported.
+.Pp
+This feature becomes
+.Sy active
+as soon as it is enabled and will
+never return to being
+.Sy enabled .
+.It Sy multi_vdev_crash_dump
+.Bl -column "READ\-ONLY COMPATIBLE" "com.joyent:multi_vdev_crash_dump"
+.It GUID Ta com.joyent:multi_vdev_crash_dump
+.It READ\-ONLY COMPATIBLE Ta no
+.It DEPENDENCIES Ta none
 .El
+.Pp
+This feature allows a dump device to be configured with a pool comprised
+of multiple vdevs.
+Those vdevs may be arranged in any mirrored or raidz
+configuration.
+.\" TODO: this is not yet supported on FreeBSD.
+.\" .Pp
+.\" When the
+.\" .Sy multi_vdev_crash_dump
+.\" feature is set to
+.\" .Sy enabled ,
+.\" the administrator can use the
+.\" .Xr dumpon 8
+.\" command to configure a
+.\" dump device on a pool comprised of multiple vdevs.
+.It Sy spacemap_histogram
+.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:spacemap_histogram"
+.It GUID Ta com.delphix:spacemap_histogram
+.It READ\-ONLY COMPATIBLE Ta yes
+.It DEPENDENCIES Ta none
+.El
+.Pp
+This features allows ZFS to maintain more information about how free space
+is organized within the pool. If this feature is 
+.Sy enabled ,
+ZFS will
+set this feature to
+.Sy active
+when a new space map object is created or
+an existing space map is upgraded to the new format.
+Once the feature is
+.Sy active ,
+it will remain in that state until the pool is destroyed.
+.It Sy extensible_dataset
+.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:extensible_dataset"
+.It GUID Ta com.delphix:extensible_dataset
+.It READ\-ONLY COMPATIBLE Ta no
+.It DEPENDENCIES Ta none
+.El
+.Pp
+This feature allows more flexible use of internal ZFS data structures,
+and exists for other features to depend on.
+.Pp
+This feature will be
+.Sy active
+when the first dependent feature uses it,
+and will be returned to the
+.Sy enabled
+state when all datasets that use
+this feature are destroyed.
+.It Sy bookmarks
+.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:bookmarks"
+.It GUID Ta com.delphix:bookmarks
+.It READ\-ONLY COMPATIBLE Ta yes
+.It DEPENDENCIES Ta extensible_dataset
+.El
+.Pp
+This feature enables use of the
+.Nm zfs
+.Cm bookmark
+subcommand.
+.Pp
+This feature is
+.Sy active
+while any bookmarks exist in the pool.
+All bookmarks in the pool can be listed by running
+.Nm zfs
+.Cm list
+.Fl t No bookmark Fl r Ar poolname .
+.It Sy enabled_txg
+.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:enabled_txg"
+.It GUID Ta com.delphix:enabled_txg
+.It READ\-ONLY COMPATIBLE Ta yes
+.It DEPENDENCIES Ta none
+.El
+.Pp
+Once this feature is enabled ZFS records the transaction group number
+in which new features are enabled. This has no user-visible impact,
+but other features may depend on this feature.
+.Pp
+This feature becomes
+.Sy active
+as soon as it is enabled and will
+never return to being
+.Sy enabled .
+.It Sy hole_birth
+.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:hole_birth"
+.It GUID Ta com.delphix:hole_birth
+.It READ\-ONLY COMPATIBLE Ta no
+.It DEPENDENCIES Ta enabled_txg
+.El
+.Pp
+This feature improves performance of incremental sends
+.Pq Dq zfs send -i
+and receives for objects with many holes.
+The most common case of
+hole-filled objects is zvols.
+.Pp
+An incremental send stream from snapshot
+.Sy A
+to snapshot
+.Sy B
+contains information about every block that changed between
+.Sy A
+and
+.Sy B .
+Blocks which did not change between those snapshots can be
+identified and omitted from the stream using a piece of metadata called
+the 'block birth time', but birth times are not recorded for holes
+.Pq blocks filled only with zeroes .
+Since holes created after
+.Sy A
+cannot be
+distinguished from holes created before
+.Sy A ,
+information about every
+hole in the entire filesystem or zvol is included in the send stream.
+.Pp
+For workloads where holes are rare this is not a problem.
+However, when
+incrementally replicating filesystems or zvols with many holes
+.Pq for example a zvol formatted with another filesystem
+a lot of time will
+be spent sending and receiving unnecessary information about holes that
+already exist on the receiving side.
+.Pp
+Once the
+.Sy hole_birth
+feature has been enabled the block birth times
+of all new holes will be recorded.
+Incremental sends between snapshots
+created after this feature is enabled will use this new metadata to avoid
+sending information about holes that already exist on the receiving side.
+.Pp
+This feature becomes
+.Sy active
+as soon as it is enabled and will
+never return to being
+.Sy enabled .
+.It Sy embedded_data
+.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:embedded_data"
+.It GUID Ta com.delphix:embedded_data
+.It READ\-ONLY COMPATIBLE Ta no
+.It DEPENDENCIES Ta none
+.El
+.Pp
+This feature improves the performance and compression ratio of
+highly-compressible blocks.
+Blocks whose contents can compress to 112 bytes
+or smaller can take advantage of this feature.
+.Pp
+When this feature is enabled, the contents of highly-compressible blocks are
+stored in the block "pointer" itself
+.Po a misnomer in this case, as it contains
+the compressed data, rather than a pointer to its location on disk
+.Pc .
+Thus
+the space of the block
+.Pq one sector, typically 512 bytes or 4KB
+is saved,
+and no additional i/o is needed to read and write the data block.
+.Pp
+This feature becomes
+.Sy active
+as soon as it is enabled and will
+never return to being
+.Sy enabled .
+.It Sy large_blocks
+.Bl -column "READ\-ONLY COMPATIBLE" "org.open-zfs:large_block"
+.It GUID Ta org.open-zfs:large_block
+.It READ\-ONLY COMPATIBLE Ta no
+.It DEPENDENCIES Ta extensible_dataset
+.El
+.Pp
+The
+.Sy large_block
+feature allows the record size on a dataset to be
+set larger than 128KB.
+.Pp
+This feature becomes
+.Sy active
+once a
+.Sy recordsize
+property has been set larger than 128KB, and will return to being 
+.Sy enabled
+once all filesystems that have ever had their recordsize larger than 128KB
+are destroyed.
+.Pp
+Please note that booting from datasets that have recordsize greater than
+128KB is
+.Em NOT
+supported by the
+.Fx
+boot loader.
+.El
 .Sh SEE ALSO
 .Xr zpool 8
 .Sh AUTHORS

Modified: trunk/cddl/contrib/opensolaris/cmd/zpool/zpool.8
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zpool/zpool.8	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zpool/zpool.8	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,5 +1,6 @@
 '\" te
 .\" Copyright (c) 2012, Martin Matuska <mm at FreeBSD.org>.
+.\" Copyright (c) 2013-2014, Xin Li <delphij at FreeBSD.org>.
 .\" All Rights Reserved.
 .\"
 .\" The contents of this file are subject to the terms of the
@@ -20,12 +21,12 @@
 .\" Copyright (c) 2010, Sun Microsystems, Inc. All Rights Reserved.
 .\" Copyright 2011, Nexenta Systems, Inc. All Rights Reserved.
 .\" Copyright (c) 2011, Justin T. Gibbs <gibbs at FreeBSD.org>
-.\" Copyright (c) 2012 by Delphix. All Rights Reserved.
+.\" Copyright (c) 2013 by Delphix. All Rights Reserved.
 .\" Copyright (c) 2012, Glen Barber <gjb at FreeBSD.org>
 .\"
-.\" $FreeBSD: release/9.2.0/cddl/contrib/opensolaris/cmd/zpool/zpool.8 248369 2013-03-16 08:16:11Z mm $
-.\"
-.Dd March 14, 2013
+.\" $FreeBSD: stable/10/cddl/contrib/opensolaris/cmd/zpool/zpool.8 333196 2018-05-03 07:22:24Z avg $
+.\" $MidnightBSD$
+.Dd July 26, 2014
 .Dt ZPOOL 8
 .Os
 .Sh NAME
@@ -56,6 +57,7 @@
 .Ar ...
 .Op Fl m Ar mountpoint
 .Op Fl R Ar root
+.Op Fl t Ar tempname
 .Ar pool vdev ...
 .Nm
 .Cm destroy
@@ -70,6 +72,8 @@
 .Ar pool ...
 .Nm
 .Cm get
+.Op Fl Hp
+.Op Fl o Ar field Ns Op , Ns Ar ...
 .Ar all | property Ns Op , Ns Ar ...
 .Ar pool ...
 .Nm
@@ -105,6 +109,7 @@
 .Op Fl m
 .Op Fl N
 .Op Fl R Ar root
+.Op Fl t
 .Op Fl F Op Fl n
 .Ar pool | id
 .Op Ar newpool
@@ -120,7 +125,7 @@
 .Ar device
 .Nm
 .Cm list
-.Op Fl H
+.Op Fl Hpv
 .Op Fl o Ar property Ns Op , Ns Ar ...
 .Op Fl T Cm d Ns | Ns Cm u
 .Op Ar pool
@@ -141,6 +146,9 @@
 .Cm remove
 .Ar pool device ...
 .Nm
+.Cm reopen
+.Ar pool
+.Nm
 .Cm replace
 .Op Fl f
 .Ar pool device
@@ -537,6 +545,15 @@
 value of 1.76 indicates that 1.76 units of data were stored but only 1 unit of disk space was actually consumed. See
 .Xr zfs 8
 for a description of the deduplication feature.
+.It Sy expandsize
+Amount of uninitialized space within the pool or device that can be used to
+increase the total capacity of the pool.
+Uninitialized space consists of
+any space on an EFI labeled vdev which has not been brought online
+.Pq i.e. zpool online -e .
+This space occurs when a LUN is dynamically expanded.
+.It Sy fragmentation
+The amount of fragmentation in the pool.
 .It Sy free
 Number of blocks within the pool that are not allocated.
 .It Sy freeing
@@ -549,8 +566,6 @@
 will decrease while
 .Sy free
 increases.
-.It Sy expandsize
-This property has currently no value on FreeBSD.
 .It Sy guid
 A unique identifier for the pool.
 .It Sy health
@@ -621,6 +636,9 @@
 .It
 To write to a read-only pool, a export and import of the pool is required.
 .El
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy rdonly .
 .El
 .Pp
 The following properties can be set at creation time and import time, and later
@@ -679,7 +697,9 @@
 Threshold for the number of block ditto copies. If the reference count for a
 deduplicated block increases above this number, a new ditto copy of this block
 is automatically stored. Default setting is
-.Cm 0 .
+.Cm 0
+which causes no ditto copies to be created for deduplicated blocks.
+The miniumum legal nonzero setting is 100.
 .It Sy delegation Ns = Ns Cm on No | Cm off
 Controls whether a non-privileged user is granted access based on the dataset
 permissions defined on the dataset. See
@@ -850,6 +870,7 @@
 .Ar ...
 .Op Fl m Ar mountpoint
 .Op Fl R Ar root
+.Op Fl t Ar tempname
 .Ar pool vdev ...
 .Xc
 .Pp
@@ -951,6 +972,18 @@
 .Qq Cm none .
 For more information on dataset mount points, see
 .Xr zfs 8 .
+.It Fl t Ar tempname
+Sets the in-core pool name to
+.Pa tempname
+while the on-disk name will be the name specified as the pool name
+.Pa pool .
+This will set the default
+.Sy cachefile
+property to
+.Sy none .
+This is intended to handle name space collisions when creating pools
+for other systems, such as virtual machines or physical machines
+whose pools live on network block devices.
 .El
 .It Xo
 .Nm
@@ -1010,6 +1043,8 @@
 .It Xo
 .Nm
 .Cm get
+.Op Fl Hp
+.Op Fl o Ar field Ns Op , Ns Ar ...
 .Ar all | property Ns Op , Ns Ar ...
 .Ar pool ...
 .Xc
@@ -1028,6 +1063,18 @@
 See the
 .Qq Sx Properties
 section for more information on the available pool properties.
+.It Fl H
+Scripted mode. Do not display headers, and separate fields by a single tab
+instead of arbitrary space.
+.It Fl p
+Display numbers in parsable (exact) values.
+.It Fl o Ar field
+A comma-separated list of columns to display.
+.Sy name Ns , Ns
+.Sy property Ns , Ns
+.Sy value Ns , Ns
+.Sy source
+is the default value.
 .It Xo
 .Nm
 .Cm history
@@ -1149,9 +1196,10 @@
 .It Fl f
 Forces import, even if the pool appears to be potentially active.
 .It Fl m
-Enables import with missing log devices.
+Allows a pool to import when there is a missing log device. Recent transactions
+can be lost because the log device will be discarded.
 .It Fl N
-Do not mount any filesystems from the imported pool.
+Import the pool without mounting any file systems.
 .It Fl R Ar root
 Sets the
 .Qq Sy cachefile
@@ -1190,6 +1238,7 @@
 .Op Fl m
 .Op Fl N
 .Op Fl R Ar root
+.Op Fl t
 .Op Fl F Op Fl n
 .Ar pool | id
 .Op Ar newpool
@@ -1242,12 +1291,27 @@
 .It Fl f
 Forces import, even if the pool appears to be potentially active.
 .It Fl m
-Enables import with missing log devices.
+Allows a pool to import when there is a missing log device. Recent transactions
+can be lost because the log device will be discarded.
 .It Fl N
-Do not mount any filesystems from the imported pool.
+Import the pool without mounting any file systems.
 .It Fl R Ar root
 Equivalent to
 .Qq Fl o Cm cachefile=none,altroot= Ns Pa root
+.It Fl t
+Used with
+.Ar newpool .
+Specifies that
+.Ar newpool
+is temporary.
+Temporary pool names last until export.
+Ensures that the original pool name will be used in all label updates and
+therefore is retained upon export.
+Will also set
+.Sy cachefile
+property to
+.Sy none
+when not explicitly specified.
 .It Fl F
 Recovery mode for a non-importable pool. Attempt to return the pool to an
 importable state by discarding the last few transactions. Not all damaged pools
@@ -1319,13 +1383,13 @@
 .Ar device
 must not be part of an active pool configuration.
 .Bl -tag -width indent
-.It Fl v
+.It Fl f
 Treat exported or foreign devices as inactive.
 .El
 .It Xo
 .Nm
 .Cm list
-.Op Fl Hv
+.Op Fl Hpv
 .Op Fl o Ar property Ns Op , Ns Ar ...
 .Op Fl T Cm d Ns | Ns Cm u
 .Op Ar pool
@@ -1333,8 +1397,9 @@
 .Op Ar inverval Op Ar count
 .Xc
 .Pp
-Lists the given pools along with a health status and space usage. When given no
-arguments, all pools in the system are listed.
+Lists the given pools along with a health status and space usage. If no
+.Ar pools
+are specified, all pools in the system are listed.
 .Pp
 When given an interval, the output is printed every
 .Ar interval
@@ -1346,11 +1411,27 @@
 .Ar count
 reports are printed.
 .Bl -tag -width indent
+.It Fl T Cm d Ns | Ns Cm u
+Print a timestamp.
+.Pp
+Use modifier
+.Cm d
+for standard date format. See
+.Xr date 1 .
+Use modifier
+.Cm u
+for unixtime
+.Pq equals Qq Ic date +%s .
 .It Fl H
 Scripted mode. Do not display headers, and separate fields by a single tab
 instead of arbitrary space.
+.It Fl p
+Display numbers in parsable (exact) values.
 .It Fl v
-Show more detailed information.
+Verbose statistics. Reports usage statistics for individual
+.Em vdevs
+within
+the pool, in addition to the pool-wide statistics.
 .It Fl o Ar property Ns Op , Ns Ar ...
 Comma-separated list of properties to display. See the
 .Qq Sx Properties
@@ -1359,6 +1440,8 @@
 .Sy size ,
 .Sy used ,
 .Sy available ,
+.Sy fragmentation ,
+.Sy expandsize ,
 .Sy capacity  ,
 .Sy health ,
 .Sy altroot .
@@ -1431,6 +1514,13 @@
 devices cannot be removed from a pool.
 .It Xo
 .Nm
+.Cm reopen
+.Ar pool
+.Xc
+.Pp
+Reopen all the vdevs associated with the pool.
+.It Xo
+.Nm
 .Cm replace
 .Op Fl f
 .Ar pool device
@@ -1616,7 +1706,8 @@
 .It Fl x
 Only display status for pools that are exhibiting errors or are otherwise
 unavailable.
-Warnings about pools not using the latest on-disk format will not be included.
+Warnings about pools not using the latest on-disk format, having non-native
+block size or disabled features will not be included.
 .It Fl v
 Displays verbose data error information, printing out a complete list of all
 data errors since the last complete pool scrub.
@@ -1667,7 +1758,7 @@
 not support feature flags.
 See
 .Xr zpool-features 7
-for details on compatability with system sthat support feature flags, but do
+for details on compatibility with systems that support feature flags, but do
 not support all features enabled on the pool.
 .Bl -tag -width indent
 .It Fl a
@@ -1736,9 +1827,9 @@
 The following command lists all available pools on the system.
 .Bd -literal -offset 2n
 .Li # Ic zpool list
-NAME   SIZE  ALLOC   FREE    CAP  DEDUP  HEALTH  ALTROOT
-pool  2.70T   473G  2.24T    17%  1.00x  ONLINE  -
-test  1.98G  89.5K  1.98G     0%  1.00x  ONLINE  -
+NAME   SIZE  ALLOC   FREE   FRAG  EXPANDSZ    CAP  DEDUP  HEALTH  ALTROOT
+pool  2.70T   473G  2.24T    33%         -    17%  1.00x  ONLINE  -
+test  1.98G  89.5K  1.98G    48%         -     0%  1.00x  ONLINE  -
 .Ed
 .It Sy Example 7 No Listing All Properties for a Pool
 .Pp
@@ -1866,8 +1957,36 @@
 .Bd -literal -offset 2n
 .Li # Ic zpool iostat -v pool 5
 .Ed
-.It Sy Example 15 No Removing a Mirrored Log Device
+.It Xo
+.Sy Example 15
+Displaying expanded space on a device
+.Xc
 .Pp
+The following command dipslays the detailed information for the
+.Em data
+pool.
+This pool is comprised of a single
+.Em raidz
+vdev where one of its
+devices increased its capacity by 10GB.
+In this example, the pool will not
+be able to utilized this extra capacity until all the devices under the
+.Em raidz
+vdev have been expanded.
+.Bd -literal -offset 2n
+.Li # Ic zpool list -v data
+NAME       SIZE  ALLOC   FREE   FRAG  EXPANDSZ    CAP  DEDUP  HEALTH  ALTROOT
+data      23.9G  14.6G  9.30G    48%         -    61%  1.00x  ONLINE  -
+  raidz1  23.9G  14.6G  9.30G    48%         -
+    ada0      -      -      -      -         -
+    ada1      -      -      -      -       10G
+    ada2      -      -      -      -         -
+.Ed
+.It Xo
+.Sy Example 16
+Removing a Mirrored Log Device
+.Xc
+.Pp
 The following command removes the mirrored log device
 .Em mirror-2 .
 .Pp
@@ -1898,7 +2017,12 @@
 .Bd -literal -offset 2n
 .Li # Ic zpool remove tank mirror-2
 .Ed
-.It Sy Example 16 No Recovering a Faulted Tn ZFS No Pool
+.It Xo
+.Sy Example 17
+Recovering a Faulted
+.Tn ZFS
+Pool
+.Xc
 .Pp
 If a pool is faulted but recoverable, a message indicating this state is
 provided by
@@ -1946,3 +2070,9 @@
 .Xr mdoc 7
 implementation of this manual page was initially written by
 .An Martin Matuska Aq mm at FreeBSD.org .
+.Sh CAVEATS
+The
+.Cm spare
+feature requires a utility to detect zpool degradation and initiate
+disk replacement within the zpool. FreeBSD does not provide such a
+utility at this time.


Property changes on: trunk/cddl/contrib/opensolaris/cmd/zpool/zpool.8
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -22,9 +23,10 @@
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov at gmail.com>.
+ */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <solaris.h>
 #include <libintl.h>
 #include <libuutil.h>
@@ -132,7 +134,8 @@
 		for (i = 0; i < argc; i++) {
 			zpool_handle_t *zhp;
 
-			if (zhp = zpool_open_canfail(g_zfs, argv[i])) {
+			if ((zhp = zpool_open_canfail(g_zfs, argv[i])) !=
+			    NULL) {
 				if (add_pool(zhp, zlp) != 0)
 					*err = B_TRUE;
 			} else {

Modified: trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -21,10 +22,12 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2012 by Frederik Wessels. All rights reserved.
  * Copyright (c) 2012 Martin Matuska <mm at FreeBSD.org>. All rights reserved.
+ * Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov at gmail.com>.
+ * Copyright 2016 Nexenta Systems, Inc.
  */
 
 #include <solaris.h>
@@ -198,7 +201,8 @@
 static uint_t timestamp_fmt = NODATE;
 
 static const char *
-get_usage(zpool_help_t idx) {
+get_usage(zpool_help_t idx)
+{
 	switch (idx) {
 	case HELP_ADD:
 		return (gettext("\tadd [-fn] <pool> <vdev> ...\n"));
@@ -209,8 +213,9 @@
 		return (gettext("\tclear [-nF] <pool> [device]\n"));
 	case HELP_CREATE:
 		return (gettext("\tcreate [-fnd] [-o property=value] ... \n"
-		    "\t    [-O file-system-property=value] ... \n"
-		    "\t    [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
+		    "\t    [-O file-system-property=value] ...\n"
+		    "\t    [-m mountpoint] [-R root] [-t tempname] "
+		    "<pool> <vdev> ...\n"));
 	case HELP_DESTROY:
 		return (gettext("\tdestroy [-f] <pool>\n"));
 	case HELP_DETACH:
@@ -227,7 +232,7 @@
 		    "[-R root] [-F [-n]] -a\n"
 		    "\timport [-o mntopts] [-o property=value] ... \n"
 		    "\t    [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
-		    "[-R root] [-F [-n]]\n"
+		    "[-R root] [-F [-n]] [-t]\n"
 		    "\t    <pool | id> [newpool]\n"));
 	case HELP_IOSTAT:
 		return (gettext("\tiostat [-v] [-T d|u] [pool] ... [interval "
@@ -235,7 +240,7 @@
 	case HELP_LABELCLEAR:
 		return (gettext("\tlabelclear [-f] <vdev>\n"));
 	case HELP_LIST:
-		return (gettext("\tlist [-Hv] [-o property[,...]] "
+		return (gettext("\tlist [-Hpv] [-o property[,...]] "
 		    "[-T d|u] [pool] ... [interval [count]]\n"));
 	case HELP_OFFLINE:
 		return (gettext("\toffline [-t] <pool> <device> ...\n"));
@@ -247,7 +252,7 @@
 	case HELP_REMOVE:
 		return (gettext("\tremove <pool> <device> ...\n"));
 	case HELP_REOPEN:
-		return (""); /* Undocumented command */
+		return (gettext("\treopen <pool>\n"));
 	case HELP_SCRUB:
 		return (gettext("\tscrub [-s] <pool> ...\n"));
 	case HELP_STATUS:
@@ -257,8 +262,8 @@
 		return (gettext("\tupgrade [-v]\n"
 		    "\tupgrade [-V version] <-a | pool ...>\n"));
 	case HELP_GET:
-		return (gettext("\tget <\"all\" | property[,...]> "
-		    "<pool> ...\n"));
+		return (gettext("\tget [-Hp] [-o \"all\" | field[,...]] "
+		    "<\"all\" | property[,...]> <pool> ...\n"));
 	case HELP_SET:
 		return (gettext("\tset <property=value> <pool> \n"));
 	case HELP_SPLIT:
@@ -477,6 +482,21 @@
 }
 
 /*
+ * Set a default property pair (name, string-value) in a property nvlist
+ */
+static int
+add_prop_list_default(const char *propname, char *propval, nvlist_t **props,
+    boolean_t poolprop)
+{
+	char *pval;
+
+	if (nvlist_lookup_string(*props, propname, &pval) == 0)
+		return (0);
+
+	return (add_prop_list(propname, propval, props, poolprop));
+}
+
+/*
  * zpool add [-fn] <pool> <vdev> ...
  *
  *	-f	Force addition of devices, even if they appear in use
@@ -623,8 +643,11 @@
 }
 
 /*
- * zpool labelclear <vdev>
+ * zpool labelclear [-f] <vdev>
  *
+ *	-f	Force clearing the label for the vdevs which are members of
+ *		the exported or foreign pools.
+ *
  * Verifies that the vdev is not active and zeros out the label information
  * on the device.
  */
@@ -631,8 +654,11 @@
 int
 zpool_do_labelclear(int argc, char **argv)
 {
-	char *vdev, *name;
-	int c, fd = -1, ret = 0;
+	char vdev[MAXPATHLEN];
+	char *name = NULL;
+	struct stat st;
+	int c, fd, ret = 0;
+	nvlist_t *config;
 	pool_state_t state;
 	boolean_t inuse = B_FALSE;
 	boolean_t force = B_FALSE;
@@ -655,88 +681,110 @@
 
 	/* get vdev name */
 	if (argc < 1) {
-		(void) fprintf(stderr, gettext("missing vdev device name\n"));
+		(void) fprintf(stderr, gettext("missing vdev name\n"));
 		usage(B_FALSE);
 	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
 
-	vdev = argv[0];
+	/*
+	 * Check if we were given absolute path and use it as is.
+	 * Otherwise if the provided vdev name doesn't point to a file,
+	 * try prepending dsk path and appending s0.
+	 */
+	(void) strlcpy(vdev, argv[0], sizeof (vdev));
+	if (vdev[0] != '/' && stat(vdev, &st) != 0) {
+		char *s;
+
+		(void) snprintf(vdev, sizeof (vdev), "%s/%s",
+#ifdef illumos
+		    ZFS_DISK_ROOT, argv[0]);
+		if ((s = strrchr(argv[0], 's')) == NULL ||
+		    !isdigit(*(s + 1)))
+			(void) strlcat(vdev, "s0", sizeof (vdev));
+#else
+		    "/dev", argv[0]);
+#endif
+		if (stat(vdev, &st) != 0) {
+			(void) fprintf(stderr, gettext(
+			    "failed to find device %s, try specifying absolute "
+			    "path instead\n"), argv[0]);
+			return (1);
+		}
+	}
+
 	if ((fd = open(vdev, O_RDWR)) < 0) {
-		(void) fprintf(stderr, gettext("Unable to open %s\n"), vdev);
-		return (B_FALSE);
+		(void) fprintf(stderr, gettext("failed to open %s: %s\n"),
+		    vdev, strerror(errno));
+		return (1);
 	}
 
-	name = NULL;
-	if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0) {
-		if (force)
-			goto wipe_label;
-		
+	if (zpool_read_label(fd, &config) != 0) {
 		(void) fprintf(stderr,
-		    gettext("Unable to determine pool state for %s\n"
-		    "Use -f to force the clearing any label data\n"), vdev);
+		    gettext("failed to read label from %s\n"), vdev);
+		return (1);
+	}
+	nvlist_free(config);
 
+	ret = zpool_in_use(g_zfs, fd, &state, &name, &inuse);
+	if (ret != 0) {
+		(void) fprintf(stderr,
+		    gettext("failed to check state for %s\n"), vdev);
 		return (1);
 	}
 
-	if (inuse) {
-		switch (state) {
-		default:
-		case POOL_STATE_ACTIVE:
-		case POOL_STATE_SPARE:
-		case POOL_STATE_L2CACHE:
-			(void) fprintf(stderr,
-gettext("labelclear operation failed.\n"
-	"\tVdev %s is a member (%s), of pool \"%s\".\n"
-	"\tTo remove label information from this device, export or destroy\n"
-	"\tthe pool, or remove %s from the configuration of this pool\n"
-	"\tand retry the labelclear operation\n"),
-			    vdev, zpool_pool_state_to_name(state), name, vdev);
-			ret = 1;
-			goto errout;
+	if (!inuse)
+		goto wipe_label;
 
-		case POOL_STATE_EXPORTED:
-			if (force)
-				break;
+	switch (state) {
+	default:
+	case POOL_STATE_ACTIVE:
+	case POOL_STATE_SPARE:
+	case POOL_STATE_L2CACHE:
+		(void) fprintf(stderr, gettext(
+		    "%s is a member (%s) of pool \"%s\"\n"),
+		    vdev, zpool_pool_state_to_name(state), name);
+		ret = 1;
+		goto errout;
 
-			(void) fprintf(stderr,
-gettext("labelclear operation failed.\n"
-	"\tVdev %s is a member of the exported pool \"%s\".\n"
-	"\tUse \"zpool labelclear -f %s\" to force the removal of label\n"
-	"\tinformation.\n"),
-			    vdev, name, vdev);
-			ret = 1;
-			goto errout;
+	case POOL_STATE_EXPORTED:
+		if (force)
+			break;
+		(void) fprintf(stderr, gettext(
+		    "use '-f' to override the following error:\n"
+		    "%s is a member of exported pool \"%s\"\n"),
+		    vdev, name);
+		ret = 1;
+		goto errout;
 
-		case POOL_STATE_POTENTIALLY_ACTIVE:
-			if (force)
-				break;
+	case POOL_STATE_POTENTIALLY_ACTIVE:
+		if (force)
+			break;
+		(void) fprintf(stderr, gettext(
+		    "use '-f' to override the following error:\n"
+		    "%s is a member of potentially active pool \"%s\"\n"),
+		    vdev, name);
+		ret = 1;
+		goto errout;
 
-			(void) fprintf(stderr,
-gettext("labelclear operation failed.\n"
-	"\tVdev %s is a member of the pool \"%s\".\n"
-	"\tThis pool is unknown to this system, but may be active on\n"
-	"\tanother system. Use \'zpool labelclear -f %s\' to force the\n"
-	"\tremoval of label information.\n"),
-			    vdev, name, vdev);
-			ret = 1;
-			goto errout;
-
-		case POOL_STATE_DESTROYED:
-			/* inuse should never be set for a destoryed pool... */
-			break;
-		}
+	case POOL_STATE_DESTROYED:
+		/* inuse should never be set for a destroyed pool */
+		assert(0);
+		break;
 	}
 
 wipe_label:
-	if (zpool_clear_label(fd) != 0) {
+	ret = zpool_clear_label(fd);
+	if (ret != 0) {
 		(void) fprintf(stderr,
-		    gettext("Label clear failed on vdev %s\n"), vdev);
-		ret = 1;
+		    gettext("failed to clear label for %s\n"), vdev);
 	}
 
 errout:
-	close(fd);
-	if (name != NULL)
-		free(name);
+	free(name);
+	(void) close(fd);
 
 	return (ret);
 }
@@ -744,14 +792,15 @@
 /*
  * zpool create [-fnd] [-o property=value] ...
  *		[-O file-system-property=value] ...
- *		[-R root] [-m mountpoint] <pool> <dev> ...
+ *		[-R root] [-m mountpoint] [-t tempname] <pool> <dev> ...
  *
  *	-f	Force creation, even if devices appear in use
  *	-n	Do not create the pool, but display the resulting layout if it
  *		were to be created.
- *      -R	Create a pool under an alternate root
- *      -m	Set default mountpoint for the root dataset.  By default it's
+ *	-R	Create a pool under an alternate root
+ *	-m	Set default mountpoint for the root dataset.  By default it's
  *		'/<pool>'
+ *	-t	Use the temporary name until the pool is exported.
  *	-o	Set property=value.
  *	-d	Don't automatically enable all supported pool features
  *		(individual features can be enabled with -o).
@@ -771,6 +820,7 @@
 	int c;
 	nvlist_t *nvroot = NULL;
 	char *poolname;
+	char *tname = NULL;
 	int ret = 1;
 	char *altroot = NULL;
 	char *mountpoint = NULL;
@@ -779,7 +829,7 @@
 	char *propval;
 
 	/* check options */
-	while ((c = getopt(argc, argv, ":fndR:m:o:O:")) != -1) {
+	while ((c = getopt(argc, argv, ":fndR:m:o:O:t:")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
@@ -795,11 +845,7 @@
 			if (add_prop_list(zpool_prop_to_name(
 			    ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE))
 				goto errout;
-			if (nvlist_lookup_string(props,
-			    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
-			    &propval) == 0)
-				break;
-			if (add_prop_list(zpool_prop_to_name(
+			if (add_prop_list_default(zpool_prop_to_name(
 			    ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
 				goto errout;
 			break;
@@ -833,6 +879,8 @@
 					enable_all_pool_feat = B_FALSE;
 				}
 			}
+			if (zpool_name_to_prop(optarg) == ZPOOL_PROP_ALTROOT)
+				altroot = propval;
 			break;
 		case 'O':
 			if ((propval = strchr(optarg, '=')) == NULL) {
@@ -856,6 +904,27 @@
 				goto errout;
 			}
 			break;
+		case 't':
+			/*
+			 * Sanity check temporary pool name.
+			 */
+			if (strchr(optarg, '/') != NULL) {
+				(void) fprintf(stderr, gettext("cannot create "
+				    "'%s': invalid character '/' in temporary "
+				    "name\n"), optarg);
+				(void) fprintf(stderr, gettext("use 'zfs "
+				    "create' to create a dataset\n"));
+				goto errout;
+			}
+
+			if (add_prop_list(zpool_prop_to_name(
+			    ZPOOL_PROP_TNAME), optarg, &props, B_TRUE))
+				goto errout;
+			if (add_prop_list_default(zpool_prop_to_name(
+			    ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
+				goto errout;
+			tname = optarg;
+			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
@@ -917,10 +986,11 @@
 	/*
 	 * Check the validity of the mountpoint and direct the user to use the
 	 * '-m' mountpoint option if it looks like its in use.
+	 * Ignore the checks if the '-f' option is given.
 	 */
-	if (mountpoint == NULL ||
+	if (!force && (mountpoint == NULL ||
 	    (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 &&
-	    strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) {
+	    strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0))) {
 		char buf[MAXPATHLEN];
 		DIR *dirp;
 
@@ -1002,7 +1072,7 @@
 		 * Hand off to libzfs.
 		 */
 		if (enable_all_pool_feat) {
-			int i;
+			spa_feature_t i;
 			for (i = 0; i < SPA_FEATURES; i++) {
 				char propname[MAXPATHLEN];
 				zfeature_info_t *feat = &spa_feature_table[i];
@@ -1027,8 +1097,8 @@
 		ret = 1;
 		if (zpool_create(g_zfs, poolname,
 		    nvroot, props, fsprops) == 0) {
-			zfs_handle_t *pool = zfs_open(g_zfs, poolname,
-			    ZFS_TYPE_FILESYSTEM);
+			zfs_handle_t *pool = zfs_open(g_zfs,
+			    tname ? tname : poolname, ZFS_TYPE_FILESYSTEM);
 			if (pool != NULL) {
 				if (zfs_mount(pool, NULL, 0) == 0)
 					ret = zfs_shareall(pool);
@@ -1294,12 +1364,13 @@
     int namewidth, int depth, boolean_t isspare)
 {
 	nvlist_t **child;
-	uint_t c, children;
+	uint_t c, vsc, children;
 	pool_scan_stat_t *ps = NULL;
 	vdev_stat_t *vs;
 	char rbuf[6], wbuf[6], cbuf[6];
 	char *vname;
 	uint64_t notpresent;
+	uint64_t ashift;
 	spare_cbdata_t cb;
 	const char *state;
 
@@ -1308,7 +1379,7 @@
 		children = 0;
 
 	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
-	    (uint64_t **)&vs, &c) == 0);
+	    (uint64_t **)&vs, &vsc) == 0);
 
 	state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 	if (isspare) {
@@ -1362,6 +1433,10 @@
 			(void) printf(gettext("unsupported feature(s)"));
 			break;
 
+		case VDEV_AUX_ASHIFT_TOO_BIG:
+			(void) printf(gettext("unsupported minimum blocksize"));
+			break;
+
 		case VDEV_AUX_SPARED:
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
 			    &cb.cb_guid) == 0);
@@ -1404,6 +1479,12 @@
 			(void) printf(gettext("corrupted data"));
 			break;
 		}
+	} else if (children == 0 && !isspare &&
+	    VDEV_STAT_VALID(vs_physical_ashift, vsc) &&
+	    vs->vs_configured_ashift < vs->vs_physical_ashift) {
+		(void) printf(
+		    gettext("  block size: %dB configured, %dB native"),
+		    1 << vs->vs_configured_ashift, 1 << vs->vs_physical_ashift);
 	}
 
 	(void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS,
@@ -1689,6 +1770,12 @@
 		    "resilvered.\n"));
 		break;
 
+	case ZPOOL_STATUS_NON_NATIVE_ASHIFT:
+		(void) printf(gettext("status: One or more devices were "
+		    "configured to use a non-native block size.\n"
+		    "\tExpect reduced performance.\n"));
+		break;
+
 	default:
 		/*
 		 * No other status can be seen when importing pools.
@@ -1880,7 +1967,8 @@
  *       import [-o mntopts] [-o prop=value] ... [-R root] [-D]
  *              [-d dir | -c cachefile] [-f] -a
  *       import [-o mntopts] [-o prop=value] ... [-R root] [-D]
- *              [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool]
+ *              [-d dir | -c cachefile] [-f] [-n] [-F] [-t]
+ *              <pool | id> [newpool]
  *
  *	 -c	Read pool information from a cachefile instead of searching
  *		devices.
@@ -1909,6 +1997,9 @@
  *
  *       -N     Import the pool but don't mount datasets.
  *
+ *       -t     Use newpool as a temporary pool name instead of renaming
+ *       	the pool.
+ *
  *       -T     Specify a starting txg to use for import. This option is
  *       	intentionally undocumented option for testing purposes.
  *
@@ -1950,7 +2041,7 @@
 	char *endptr;
 
 	/* check options */
-	while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:rR:T:VX")) != -1) {
+	while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:R:tT:VX")) != -1) {
 		switch (c) {
 		case 'a':
 			do_all = B_TRUE;
@@ -2004,17 +2095,19 @@
 			if (add_prop_list(zpool_prop_to_name(
 			    ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE))
 				goto error;
-			if (nvlist_lookup_string(props,
-			    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
-			    &propval) == 0)
-				break;
-			if (add_prop_list(zpool_prop_to_name(
+			if (add_prop_list_default(zpool_prop_to_name(
 			    ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
 				goto error;
 			break;
+		case 't':
+			flags |= ZFS_IMPORT_TEMP_NAME;
+			if (add_prop_list_default(zpool_prop_to_name(
+			    ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
+				goto error;
+			break;
 		case 'T':
 			errno = 0;
-			txg = strtoull(optarg, &endptr, 10);
+			txg = strtoull(optarg, &endptr, 0);
 			if (errno != 0 || *endptr != '\0') {
 				(void) fprintf(stderr,
 				    gettext("invalid txg value\n"));
@@ -2116,8 +2209,10 @@
 
 		errno = 0;
 		searchguid = strtoull(argv[0], &endptr, 10);
-		if (errno != 0 || *endptr != '\0')
+		if (errno != 0 || *endptr != '\0') {
 			searchname = argv[0];
+			searchguid = 0;
+		}
 		found_config = NULL;
 
 		/*
@@ -2140,9 +2235,9 @@
 		(void) fprintf(stderr, gettext("cannot import '%s': "
 		    "a pool with that name already exists\n"),
 		    argv[0]);
-		(void) fprintf(stderr, gettext("use the form '%s "
-		    "<pool | id> <newpool>' to give it a new name\n"),
-		    "zpool import");
+		(void) fprintf(stderr, gettext("use the form 'zpool import "
+		    "[-t] <pool | id> <newpool>' to give it a new temporary "
+		    "or permanent name\n"));
 		err = 1;
 	} else if (pools == NULL && idata.exists) {
 		(void) fprintf(stderr, gettext("cannot import '%s': "
@@ -2744,6 +2839,7 @@
 	int		cb_namewidth;
 	boolean_t	cb_scripted;
 	zprop_list_t	*cb_proplist;
+	boolean_t	cb_literal;
 } list_cbdata_t;
 
 /*
@@ -2835,12 +2931,9 @@
 
 		right_justify = B_FALSE;
 		if (pl->pl_prop != ZPROP_INVAL) {
-			if (pl->pl_prop == ZPOOL_PROP_EXPANDSZ &&
-			    zpool_get_prop_int(zhp, pl->pl_prop, NULL) == 0)
+			if (zpool_get_prop(zhp, pl->pl_prop, property,
+			    sizeof (property), NULL, cb->cb_literal) != 0)
 				propstr = "-";
-			else if (zpool_get_prop(zhp, pl->pl_prop, property,
-			    sizeof (property), NULL) != 0)
-				propstr = "-";
 			else
 				propstr = property;
 
@@ -2872,15 +2965,36 @@
 }
 
 static void
-print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted)
+print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted,
+    boolean_t valid)
 {
 	char propval[64];
 	boolean_t fixed;
 	size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL);
 
-	zfs_nicenum(value, propval, sizeof (propval));
+	switch (prop) {
+	case ZPOOL_PROP_EXPANDSZ:
+		if (value == 0)
+			(void) strlcpy(propval, "-", sizeof (propval));
+		else
+			zfs_nicenum(value, propval, sizeof (propval));
+		break;
+	case ZPOOL_PROP_FRAGMENTATION:
+		if (value == ZFS_FRAG_INVALID) {
+			(void) strlcpy(propval, "-", sizeof (propval));
+		} else {
+			(void) snprintf(propval, sizeof (propval), "%llu%%",
+			    value);
+		}
+		break;
+	case ZPOOL_PROP_CAPACITY:
+		(void) snprintf(propval, sizeof (propval), "%llu%%", value);
+		break;
+	default:
+		zfs_nicenum(value, propval, sizeof (propval));
+	}
 
-	if (prop == ZPOOL_PROP_EXPANDSZ && value == 0)
+	if (!valid)
 		(void) strlcpy(propval, "-", sizeof (propval));
 
 	if (scripted)
@@ -2898,11 +3012,17 @@
 	uint_t c, children;
 	char *vname;
 	boolean_t scripted = cb->cb_scripted;
+	uint64_t islog = B_FALSE;
+	boolean_t haslog = B_FALSE;
+	char *dashes = "%-*s      -      -      -         -      -      -\n";
 
 	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 
 	if (name != NULL) {
+		boolean_t toplevel = (vs->vs_space != 0);
+		uint64_t cap;
+
 		if (scripted)
 			(void) printf("\t%s", name);
 		else if (strlen(name) + depth > cb->cb_namewidth)
@@ -2911,22 +3031,26 @@
 			(void) printf("%*s%s%*s", depth, "", name,
 			    (int)(cb->cb_namewidth - strlen(name) - depth), "");
 
-		/* only toplevel vdevs have capacity stats */
-		if (vs->vs_space == 0) {
-			if (scripted)
-				(void) printf("\t-\t-\t-");
-			else
-				(void) printf("      -      -      -");
-		} else {
-			print_one_column(ZPOOL_PROP_SIZE, vs->vs_space,
-			    scripted);
-			print_one_column(ZPOOL_PROP_CAPACITY, vs->vs_alloc,
-			    scripted);
-			print_one_column(ZPOOL_PROP_FREE,
-			    vs->vs_space - vs->vs_alloc, scripted);
-		}
-		print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize,
-		    scripted);
+		/*
+		 * Print the properties for the individual vdevs. Some
+		 * properties are only applicable to toplevel vdevs. The
+		 * 'toplevel' boolean value is passed to the print_one_column()
+		 * to indicate that the value is valid.
+		 */
+		print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, scripted,
+		    toplevel);
+		print_one_column(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, scripted,
+		    toplevel);
+		print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc,
+		    scripted, toplevel);
+		print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, scripted,
+		    B_TRUE);
+		print_one_column(ZPOOL_PROP_FRAGMENTATION,
+		    vs->vs_fragmentation, scripted,
+		    (vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel));
+		cap = (vs->vs_space == 0) ? 0 :
+		    (vs->vs_alloc * 100 / vs->vs_space);
+		print_one_column(ZPOOL_PROP_CAPACITY, cap, scripted, toplevel);
 		(void) printf("\n");
 	}
 
@@ -2941,24 +3065,47 @@
 		    ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole)
 			continue;
 
+		if (nvlist_lookup_uint64(child[c],
+		    ZPOOL_CONFIG_IS_LOG, &islog) == 0 && islog) {
+			haslog = B_TRUE;
+			continue;
+		}
+
 		vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
 		print_list_stats(zhp, vname, child[c], cb, depth + 2);
 		free(vname);
 	}
 
-	/*
-	 * Include level 2 ARC devices in iostat output
-	 */
+	if (haslog == B_TRUE) {
+		/* LINTED E_SEC_PRINTF_VAR_FMT */
+		(void) printf(dashes, cb->cb_namewidth, "log");
+		for (c = 0; c < children; c++) {
+			if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+			    &islog) != 0 || !islog)
+				continue;
+			vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
+			print_list_stats(zhp, vname, child[c], cb, depth + 2);
+			free(vname);
+		}
+	}
+
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
-	    &child, &children) != 0)
-		return;
+	    &child, &children) == 0 && children > 0) {
+		/* LINTED E_SEC_PRINTF_VAR_FMT */
+		(void) printf(dashes, cb->cb_namewidth, "cache");
+		for (c = 0; c < children; c++) {
+			vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
+			print_list_stats(zhp, vname, child[c], cb, depth + 2);
+			free(vname);
+		}
+	}
 
-	if (children > 0) {
-		(void) printf("%-*s      -      -      -      -      -      "
-		    "-\n", cb->cb_namewidth, "cache");
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child,
+	    &children) == 0 && children > 0) {
+		/* LINTED E_SEC_PRINTF_VAR_FMT */
+		(void) printf(dashes, cb->cb_namewidth, "spare");
 		for (c = 0; c < children; c++) {
-			vname = zpool_vdev_name(g_zfs, zhp, child[c],
-			    B_FALSE);
+			vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
 			print_list_stats(zhp, vname, child[c], cb, depth + 2);
 			free(vname);
 		}
@@ -2990,12 +3137,14 @@
 }
 
 /*
- * zpool list [-H] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]]
+ * zpool list [-Hp] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]]
  *
  *	-H	Scripted mode.  Don't display headers, and separate properties
  *		by a single tab.
  *	-o	List of properties to display.  Defaults to
- *		"name,size,allocated,free,capacity,health,altroot"
+ *		"name,size,allocated,free,expandsize,fragmentation,capacity,"
+ *		"dedupratio,health,altroot"
+ * 	-p	Diplay values in parsable (exact) format.
  *	-T	Display a timestamp in date(1) or Unix format
  *
  * List all pools in the system, whether or not they're healthy.  Output space
@@ -3008,8 +3157,8 @@
 	int ret;
 	list_cbdata_t cb = { 0 };
 	static char default_props[] =
-	    "name,size,allocated,free,capacity,dedupratio,"
-	    "health,altroot";
+	    "name,size,allocated,free,expandsize,fragmentation,capacity,"
+	    "dedupratio,health,altroot";
 	char *props = default_props;
 	unsigned long interval = 0, count = 0;
 	zpool_list_t *list;
@@ -3016,7 +3165,7 @@
 	boolean_t first = B_TRUE;
 
 	/* check options */
-	while ((c = getopt(argc, argv, ":Ho:T:v")) != -1) {
+	while ((c = getopt(argc, argv, ":Ho:pT:v")) != -1) {
 		switch (c) {
 		case 'H':
 			cb.cb_scripted = B_TRUE;
@@ -3024,6 +3173,9 @@
 		case 'o':
 			props = optarg;
 			break;
+		case 'p':
+			cb.cb_literal = B_TRUE;
+			break;
 		case 'T':
 			get_timestamp_arg(*optarg);
 			break;
@@ -3050,17 +3202,10 @@
 	if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0)
 		usage(B_FALSE);
 
-	if ((list = pool_list_get(argc, argv, &cb.cb_proplist, &ret)) == NULL)
-		return (1);
-
-	if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) {
-		(void) printf(gettext("no pools available\n"));
-		zprop_free_list(cb.cb_proplist);
-		return (0);
-	}
-
 	for (;;) {
-		pool_list_update(list);
+		if ((list = pool_list_get(argc, argv, &cb.cb_proplist,
+		    &ret)) == NULL)
+			return (1);
 
 		if (pool_list_count(list) == 0)
 			break;
@@ -3083,40 +3228,20 @@
 		if (count != 0 && --count == 0)
 			break;
 
+		pool_list_free(list);
 		(void) sleep(interval);
 	}
 
+	if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) {
+		(void) printf(gettext("no pools available\n"));
+		ret = 0;
+	}
+
+	pool_list_free(list);
 	zprop_free_list(cb.cb_proplist);
 	return (ret);
 }
 
-static nvlist_t *
-zpool_get_vdev_by_name(nvlist_t *nv, char *name)
-{
-	nvlist_t **child;
-	uint_t c, children;
-	nvlist_t *match;
-	char *path;
-
-	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) != 0) {
-		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
-		if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
-			name += sizeof(_PATH_DEV) - 1;
-		if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
-			path += sizeof(_PATH_DEV) - 1;
-		if (strcmp(name, path) == 0)
-			return (nv);
-		return (NULL);
-	}
-
-	for (c = 0; c < children; c++)
-		if ((match = zpool_get_vdev_by_name(child[c], name)) != NULL)
-			return (match);
-
-	return (NULL);
-}
-
 static int
 zpool_do_attach_or_replace(int argc, char **argv, int replacing)
 {
@@ -3332,8 +3457,7 @@
 			if (add_prop_list(
 			    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), optarg,
 			    &props, B_TRUE) != 0) {
-				if (props)
-					nvlist_free(props);
+				nvlist_free(props);
 				usage(B_FALSE);
 			}
 			break;
@@ -3346,8 +3470,7 @@
 				propval++;
 				if (add_prop_list(optarg, propval,
 				    &props, B_TRUE) != 0) {
-					if (props)
-						nvlist_free(props);
+					nvlist_free(props);
 					usage(B_FALSE);
 				}
 			} else {
@@ -3699,23 +3822,38 @@
  * zpool reopen <pool>
  *
  * Reopen the pool so that the kernel can update the sizes of all vdevs.
- *
- * NOTE: This command is currently undocumented.  If the command is ever
- * exposed then the appropriate usage() messages will need to be made.
  */
 int
 zpool_do_reopen(int argc, char **argv)
 {
+	int c;
 	int ret = 0;
 	zpool_handle_t *zhp;
 	char *pool;
 
+	/* check options */
+	while ((c = getopt(argc, argv, "")) != -1) {
+		switch (c) {
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
 	argc--;
 	argv++;
 
-	if (argc != 1)
-		return (2);
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name\n"));
+		usage(B_FALSE);
+	}
 
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
 	pool = argv[0];
 	if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
 		return (1);
@@ -3832,7 +3970,7 @@
 	 */
 	if (ps->pss_state == DSS_FINISHED) {
 		uint64_t minutes_taken = (end - start) / 60;
-		char *fmt;
+		char *fmt = NULL;
 
 		if (ps->pss_func == POOL_SCAN_SCRUB) {
 			fmt = gettext("scrub repaired %s in %lluh%um with "
@@ -3996,7 +4134,7 @@
 
 	/*
 	 * If the pool was faulted then we may not have been able to
-	 * obtain the config. Otherwise, if have anything in the dedup
+	 * obtain the config. Otherwise, if we have anything in the dedup
 	 * table continue processing the stats.
 	 */
 	if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS,
@@ -4060,6 +4198,7 @@
 	if (cbp->cb_explain &&
 	    (reason == ZPOOL_STATUS_OK ||
 	    reason == ZPOOL_STATUS_VERSION_OLDER ||
+	    reason == ZPOOL_STATUS_NON_NATIVE_ASHIFT ||
 	    reason == ZPOOL_STATUS_FEAT_DISABLED)) {
 		if (!cbp->cb_allpools) {
 			(void) printf(gettext("pool '%s' is healthy\n"),
@@ -4267,6 +4406,15 @@
 		    "'zpool clear'.\n"));
 		break;
 
+	case ZPOOL_STATUS_NON_NATIVE_ASHIFT:
+		(void) printf(gettext("status: One or more devices are "
+		    "configured to use a non-native block size.\n"
+		    "\tExpect reduced performance.\n"));
+		(void) printf(gettext("action: Replace affected devices with "
+		    "devices that support the\n\tconfigured block size, or "
+		    "migrate data to a properly configured\n\tpool.\n"));
+		break;
+
 	default:
 		/*
 		 * The remaining errors can't actually be generated, yet.
@@ -4432,11 +4580,12 @@
 }
 
 typedef struct upgrade_cbdata {
-	int	cb_first;
-	char	cb_poolname[ZPOOL_MAXNAMELEN];
-	int	cb_argc;
-	uint64_t cb_version;
-	char	**cb_argv;
+	boolean_t	cb_first;
+	boolean_t	cb_unavail;
+	char		cb_poolname[ZFS_MAX_DATASET_NAME_LEN];
+	int		cb_argc;
+	uint64_t	cb_version;
+	char		**cb_argv;
 } upgrade_cbdata_t;
 
 #ifdef __FreeBSD__
@@ -4466,7 +4615,8 @@
 }
 
 static void
-root_pool_upgrade_check(zpool_handle_t *zhp, char *poolname, int size) {
+root_pool_upgrade_check(zpool_handle_t *zhp, char *poolname, int size)
+{
 
 	if (poolname[0] == '\0' && is_root_pool(zhp))
 		(void) strlcpy(poolname, zpool_get_name(zhp), size);
@@ -4551,6 +4701,14 @@
 	boolean_t printnl = B_FALSE;
 	int ret;
 
+	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+		(void) fprintf(stderr, gettext("cannot upgrade '%s': pool is "
+		    "currently unavailable.\n\n"), zpool_get_name(zhp));
+		cbp->cb_unavail = B_TRUE;
+		/* Allow iteration to continue. */
+		return (0);
+	}
+
 	config = zpool_get_config(zhp, NULL);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &version) == 0);
@@ -4565,7 +4723,7 @@
 #ifdef __FreeBSD__
 		root_pool_upgrade_check(zhp, cbp->cb_poolname,
 		    sizeof(cbp->cb_poolname));
-#endif	/* ___FreeBSD__ */
+#endif	/* __FreeBSD__ */
 		printnl = B_TRUE;
 
 #ifdef illumos
@@ -4589,6 +4747,10 @@
 		if (count > 0) {
 			cbp->cb_first = B_FALSE;
 			printnl = B_TRUE;
+#ifdef __FreeBSD__
+			root_pool_upgrade_check(zhp, cbp->cb_poolname,
+			    sizeof(cbp->cb_poolname));
+#endif	/* __FreeBSD__ */
 			/*
 			 * If they did "zpool upgrade -a", then we could
 			 * be doing ioctls to different pools.  We need
@@ -4608,6 +4770,26 @@
 }
 
 static int
+upgrade_list_unavail(zpool_handle_t *zhp, void *arg)
+{
+	upgrade_cbdata_t *cbp = arg;
+
+	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+		if (cbp->cb_first) {
+			(void) fprintf(stderr, gettext("The following pools "
+			    "are unavailable and cannot be upgraded as this "
+			    "time.\n\n"));
+			(void) fprintf(stderr, gettext("POOL\n"));
+			(void) fprintf(stderr, gettext("------------\n"));
+			cbp->cb_first = B_FALSE;
+		}
+		(void) printf(gettext("%s\n"), zpool_get_name(zhp));
+		cbp->cb_unavail = B_TRUE;
+	}
+	return (0);
+}
+
+static int
 upgrade_list_older_cb(zpool_handle_t *zhp, void *arg)
 {
 	upgrade_cbdata_t *cbp = arg;
@@ -4614,6 +4796,15 @@
 	nvlist_t *config;
 	uint64_t version;
 
+	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+		/*
+		 * This will have been reported by upgrade_list_unavail so
+		 * just allow iteration to continue.
+		 */
+		cbp->cb_unavail = B_TRUE;
+		return (0);
+	}
+
 	config = zpool_get_config(zhp, NULL);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &version) == 0);
@@ -4647,6 +4838,15 @@
 	nvlist_t *config;
 	uint64_t version;
 
+	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+		/*
+		 * This will have been reported by upgrade_list_unavail so
+		 * just allow iteration to continue.
+		 */
+		cbp->cb_unavail = B_TRUE;
+		return (0);
+	}
+
 	config = zpool_get_config(zhp, NULL);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &version) == 0);
@@ -4700,10 +4900,17 @@
 	uint64_t cur_version;
 	int ret;
 
+	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+		(void) fprintf(stderr, gettext("cannot upgrade '%s': pool is "
+		    "is currently unavailable.\n\n"), zpool_get_name(zhp));
+		cbp->cb_unavail = B_TRUE;
+		return (1);
+	}
+
 	if (strcmp("log", zpool_get_name(zhp)) == 0) {
 		(void) printf(gettext("'log' is now a reserved word\n"
 		    "Pool 'log' must be renamed using export and import"
-		    " to upgrade.\n"));
+		    " to upgrade.\n\n"));
 		return (1);
 	}
 
@@ -4730,7 +4937,7 @@
 #ifdef __FreeBSD__
 		root_pool_upgrade_check(zhp, cbp->cb_poolname,
 		    sizeof(cbp->cb_poolname));
-#endif	/* ___FreeBSD__ */
+#endif	/* __FreeBSD__ */
 	}
 
 	if (cbp->cb_version >= SPA_VERSION_FEATURES) {
@@ -4747,7 +4954,7 @@
 #endif	/* __FreeBSD __*/
 		} else if (cur_version == SPA_VERSION) {
 			(void) printf(gettext("Pool '%s' already has all "
-			    "supported features enabled.\n"),
+			    "supported features enabled.\n\n"),
 			    zpool_get_name(zhp));
 		}
 	}
@@ -4848,7 +5055,8 @@
 		    "---------------\n");
 		for (i = 0; i < SPA_FEATURES; i++) {
 			zfeature_info_t *fi = &spa_feature_table[i];
-			const char *ro = fi->fi_can_readonly ?
+			const char *ro =
+			    (fi->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
 			    " (read-only compatible)" : "";
 
 			(void) printf("%-37s%s\n", fi->fi_uname, ro);
@@ -4904,11 +5112,13 @@
 		ret = zpool_iter(g_zfs, upgrade_cb, &cb);
 		if (ret == 0 && cb.cb_first) {
 			if (cb.cb_version == SPA_VERSION) {
-				(void) printf(gettext("All pools are already "
-				    "formatted using feature flags.\n\n"));
-				(void) printf(gettext("Every feature flags "
+				(void) printf(gettext("All %spools are already "
+				    "formatted using feature flags.\n\n"),
+				    cb.cb_unavail ? gettext("available ") : "");
+				(void) printf(gettext("Every %sfeature flags "
 				    "pool already has all supported features "
-				    "enabled.\n"));
+				    "enabled.\n"),
+				    cb.cb_unavail ? gettext("available ") : "");
 			} else {
 				(void) printf(gettext("All pools are already "
 				    "formatted with version %llu or higher.\n"),
@@ -4917,12 +5127,21 @@
 		}
 	} else if (argc == 0) {
 		cb.cb_first = B_TRUE;
+		ret = zpool_iter(g_zfs, upgrade_list_unavail, &cb);
+		assert(ret == 0);
+
+		if (!cb.cb_first) {
+			(void) fprintf(stderr, "\n");
+		}
+
+		cb.cb_first = B_TRUE;
 		ret = zpool_iter(g_zfs, upgrade_list_older_cb, &cb);
 		assert(ret == 0);
 
 		if (cb.cb_first) {
-			(void) printf(gettext("All pools are formatted "
-			    "using feature flags.\n\n"));
+			(void) printf(gettext("All %spools are formatted using "
+			    "feature flags.\n\n"), cb.cb_unavail ?
+			    gettext("available ") : "");
 		} else {
 			(void) printf(gettext("\nUse 'zpool upgrade -v' "
 			    "for a list of available legacy versions.\n"));
@@ -4933,13 +5152,14 @@
 		assert(ret == 0);
 
 		if (cb.cb_first) {
-			(void) printf(gettext("Every feature flags pool has "
-			    "all supported features enabled.\n"));
+			(void) printf(gettext("Every %sfeature flags pool has "
+			    "all supported features enabled.\n"),
+			    cb.cb_unavail ? gettext("available ") : "");
 		} else {
 			(void) printf(gettext("\n"));
 		}
 	} else {
-		ret = for_each_pool(argc, argv, B_FALSE, NULL,
+		ret = for_each_pool(argc, argv, B_TRUE, NULL,
 		    upgrade_one, &cb);
 	}
 
@@ -5154,7 +5374,7 @@
 			}
 		} else {
 			if (zpool_get_prop(zhp, pl->pl_prop, value,
-			    sizeof (value), &srctype) != 0)
+			    sizeof (value), &srctype, cbp->cb_literal) != 0)
 				continue;
 
 			zprop_print_one_property(zpool_get_name(zhp), cbp,
@@ -5165,6 +5385,18 @@
 	return (0);
 }
 
+/*
+ * zpool get [-Hp] [-o "all" | field[,...]] <"all" | property[,...]> <pool> ...
+ *
+ *	-H	Scripted mode.  Don't display headers, and separate properties
+ *		by a single tab.
+ *	-o	List of columns to display.  Defaults to
+ *		"name,property,value,source".
+ * 	-p	Diplay values in parsable (exact) format.
+ *
+ * Get properties of pools in the system. Output space statistics
+ * for each one as well as other attributes.
+ */
 int
 zpool_do_get(int argc, char **argv)
 {
@@ -5171,14 +5403,14 @@
 	zprop_get_cbdata_t cb = { 0 };
 	zprop_list_t fake_name = { 0 };
 	int ret;
+	int c, i;
+	char *value;
 
-	if (argc < 2) {
-		(void) fprintf(stderr, gettext("missing property "
-		    "argument\n"));
-		usage(B_FALSE);
-	}
+	cb.cb_first = B_TRUE;
 
-	cb.cb_first = B_TRUE;
+	/*
+	 * Set up default columns and sources.
+	 */
 	cb.cb_sources = ZPROP_SRC_ALL;
 	cb.cb_columns[0] = GET_COL_NAME;
 	cb.cb_columns[1] = GET_COL_PROPERTY;
@@ -5186,10 +5418,89 @@
 	cb.cb_columns[3] = GET_COL_SOURCE;
 	cb.cb_type = ZFS_TYPE_POOL;
 
-	if (zprop_get_list(g_zfs, argv[1], &cb.cb_proplist,
+	/* check options */
+	while ((c = getopt(argc, argv, ":Hpo:")) != -1) {
+		switch (c) {
+		case 'p':
+			cb.cb_literal = B_TRUE;
+			break;
+		case 'H':
+			cb.cb_scripted = B_TRUE;
+			break;
+		case 'o':
+			bzero(&cb.cb_columns, sizeof (cb.cb_columns));
+			i = 0;
+			while (*optarg != '\0') {
+				static char *col_subopts[] =
+				{ "name", "property", "value", "source",
+				"all", NULL };
+
+				if (i == ZFS_GET_NCOLS) {
+					(void) fprintf(stderr, gettext("too "
+					"many fields given to -o "
+					"option\n"));
+					usage(B_FALSE);
+				}
+
+				switch (getsubopt(&optarg, col_subopts,
+				    &value)) {
+				case 0:
+					cb.cb_columns[i++] = GET_COL_NAME;
+					break;
+				case 1:
+					cb.cb_columns[i++] = GET_COL_PROPERTY;
+					break;
+				case 2:
+					cb.cb_columns[i++] = GET_COL_VALUE;
+					break;
+				case 3:
+					cb.cb_columns[i++] = GET_COL_SOURCE;
+					break;
+				case 4:
+					if (i > 0) {
+						(void) fprintf(stderr,
+						    gettext("\"all\" conflicts "
+						    "with specific fields "
+						    "given to -o option\n"));
+						usage(B_FALSE);
+					}
+					cb.cb_columns[0] = GET_COL_NAME;
+					cb.cb_columns[1] = GET_COL_PROPERTY;
+					cb.cb_columns[2] = GET_COL_VALUE;
+					cb.cb_columns[3] = GET_COL_SOURCE;
+					i = ZFS_GET_NCOLS;
+					break;
+				default:
+					(void) fprintf(stderr,
+					    gettext("invalid column name "
+					    "'%s'\n"), suboptarg);
+					usage(B_FALSE);
+				}
+			}
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing property "
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+
+	if (zprop_get_list(g_zfs, argv[0], &cb.cb_proplist,
 	    ZFS_TYPE_POOL) != 0)
 		usage(B_FALSE);
 
+	argc--;
+	argv++;
+
 	if (cb.cb_proplist != NULL) {
 		fake_name.pl_prop = ZPOOL_PROP_NAME;
 		fake_name.pl_width = strlen(gettext("NAME"));
@@ -5197,7 +5508,7 @@
 		cb.cb_proplist = &fake_name;
 	}
 
-	ret = for_each_pool(argc - 2, argv + 2, B_TRUE, &cb.cb_proplist,
+	ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist,
 	    get_callback, &cb);
 
 	if (cb.cb_proplist == &fake_name)
@@ -5293,7 +5604,7 @@
 int
 main(int argc, char **argv)
 {
-	int ret;
+	int ret = 0;
 	int i;
 	char *cmdname;
 

Modified: trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *

Modified: trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -21,6 +22,8 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov at gmail.com>.
  */
 
 /*
@@ -77,8 +80,6 @@
 
 #include "zpool_util.h"
 
-#define	DISK_ROOT	"/dev/dsk"
-#define	RDISK_ROOT	"/dev/rdsk"
 #define	BACKUP_SLICE	"s2"
 
 /*
@@ -111,7 +112,7 @@
 	va_end(ap);
 }
 
-#ifdef sun
+#ifdef illumos
 static void
 libdiskmgt_error(int error)
 {
@@ -273,7 +274,7 @@
 
 	return (check_slice(path, force, B_FALSE, isspare));
 }
-#endif	/* sun */
+#endif	/* illumos */
 
 /*
  * Check that a file is valid.  All we can do in this case is check that it's
@@ -289,7 +290,7 @@
 	pool_state_t state;
 	boolean_t inuse;
 
-#ifdef sun
+#ifdef illumos
 	if (dm_inuse_swap(file, &err)) {
 		if (err)
 			libdiskmgt_error(err);
@@ -376,13 +377,13 @@
 static boolean_t
 is_whole_disk(const char *arg)
 {
-#ifdef sun
+#ifdef illumos
 	struct dk_gpt *label;
 	int	fd;
 	char	path[MAXPATHLEN];
 
 	(void) snprintf(path, sizeof (path), "%s%s%s",
-	    RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
+	    ZFS_RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
 	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
 		return (B_FALSE);
 	if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
@@ -512,6 +513,7 @@
 		verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
 		    (uint64_t)wholedisk) == 0);
 
+#ifdef have_devid
 	/*
 	 * For a whole disk, defer getting its devid until after labeling it.
 	 */
@@ -546,6 +548,7 @@
 
 		(void) close(fd);
 	}
+#endif
 
 	return (vdev);
 }
@@ -585,7 +588,9 @@
 	uint_t c, children;
 	nvlist_t *nv;
 	char *type;
-	replication_level_t lastrep, rep, *ret;
+	replication_level_t lastrep = {0};
+	replication_level_t rep;
+	replication_level_t *ret;
 	boolean_t dontreport;
 
 	ret = safe_malloc(sizeof (replication_level_t));
@@ -593,7 +598,6 @@
 	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &top, &toplevels) == 0);
 
-	lastrep.zprl_type = NULL;
 	for (t = 0; t < toplevels; t++) {
 		uint64_t is_log = B_FALSE;
 
@@ -681,6 +685,21 @@
 					verify(nvlist_lookup_string(cnv,
 					    ZPOOL_CONFIG_TYPE,
 					    &childtype) == 0);
+					if (strcmp(childtype,
+					    VDEV_TYPE_SPARE) == 0) {
+						/* We have a replacing vdev with
+						 * a spare child.  Get the first
+						 * real child of the spare
+						 */
+						verify(
+						    nvlist_lookup_nvlist_array(
+							cnv,
+							ZPOOL_CONFIG_CHILDREN,
+							&rchild,
+						    &rchildren) == 0);
+						assert(rchildren >= 2);
+						cnv = rchild[0];
+					}
 				}
 
 				verify(nvlist_lookup_string(cnv,
@@ -908,7 +927,7 @@
 	return (ret);
 }
 
-#ifdef sun
+#ifdef illumos
 /*
  * Go through and find any whole disks in the vdev specification, labelling them
  * as appropriate.  When constructing the vdev spec, we were unable to open this
@@ -1012,7 +1031,7 @@
 
 	return (0);
 }
-#endif	/* sun */
+#endif	/* illumos */
 
 /*
  * Determine if the given path is a hot spare within the given configuration.
@@ -1066,16 +1085,17 @@
  * Go through and find any devices that are in use.  We rely on libdiskmgt for
  * the majority of this task.
  */
-static int
-check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
+static boolean_t
+is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
     boolean_t replacing, boolean_t isspare)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	char *type, *path;
-	int ret;
+	int ret = 0;
 	char buf[MAXPATHLEN];
 	uint64_t wholedisk;
+	boolean_t anyinuse = B_FALSE;
 
 	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 
@@ -1090,7 +1110,7 @@
 		 * regardless of what libdiskmgt or zpool_in_use() says.
 		 */
 		if (replacing) {
-#ifdef sun
+#ifdef illumos
 			if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 			    &wholedisk) == 0 && wholedisk)
 				(void) snprintf(buf, sizeof (buf), "%ss0",
@@ -1100,38 +1120,37 @@
 				(void) strlcpy(buf, path, sizeof (buf));
 
 			if (is_spare(config, buf))
-				return (0);
+				return (B_FALSE);
 		}
 
 		if (strcmp(type, VDEV_TYPE_DISK) == 0)
 			ret = check_device(path, force, isspare);
-
-		if (strcmp(type, VDEV_TYPE_FILE) == 0)
+		else if (strcmp(type, VDEV_TYPE_FILE) == 0)
 			ret = check_file(path, force, isspare);
 
-		return (ret);
+		return (ret != 0);
 	}
 
 	for (c = 0; c < children; c++)
-		if ((ret = check_in_use(config, child[c], force,
-		    replacing, B_FALSE)) != 0)
-			return (ret);
+		if (is_device_in_use(config, child[c], force, replacing,
+		    B_FALSE))
+			anyinuse = B_TRUE;
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0)
 		for (c = 0; c < children; c++)
-			if ((ret = check_in_use(config, child[c], force,
-			    replacing, B_TRUE)) != 0)
-				return (ret);
+			if (is_device_in_use(config, child[c], force, replacing,
+			    B_TRUE))
+				anyinuse = B_TRUE;
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) == 0)
 		for (c = 0; c < children; c++)
-			if ((ret = check_in_use(config, child[c], force,
-			    replacing, B_FALSE)) != 0)
-				return (ret);
+			if (is_device_in_use(config, child[c], force, replacing,
+			    B_FALSE))
+				anyinuse = B_TRUE;
 
-	return (0);
+	return (anyinuse);
 }
 
 static const char *
@@ -1415,7 +1434,7 @@
 			return (NULL);
 		}
 
-#ifdef sun
+#ifdef illumos
 		if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
 			nvlist_free(newroot);
 			return (NULL);
@@ -1442,8 +1461,7 @@
 	}
 
 	if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
-		if (newroot != NULL)
-			nvlist_free(newroot);
+		nvlist_free(newroot);
 		return (NULL);
 	}
 
@@ -1485,7 +1503,7 @@
 	 * uses (such as a dedicated dump device) that even '-f' cannot
 	 * override.
 	 */
-	if (check_in_use(poolconfig, newroot, force, replacing, B_FALSE) != 0) {
+	if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) {
 		nvlist_free(newroot);
 		return (NULL);
 	}
@@ -1500,7 +1518,7 @@
 		return (NULL);
 	}
 
-#ifdef sun
+#ifdef illumos
 	/*
 	 * Run through the vdev specification and label any whole disks found.
 	 */

Modified: trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1	2018-06-02 16:07:17 UTC (rev 10228)
@@ -18,10 +18,11 @@
 .\" information: Portions Copyright [yyyy] [name of copyright owner]
 .\"
 .\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2013, Delphix. All Rights Reserved.
 .\"
-.\" $FreeBSD$
+.\" $MidnightBSD$
 .\"
-.Dd November 26, 2011
+.Dd December 31, 2013
 .Dt ZSTREAMDUMP 8
 .Os
 .Sh NAME
@@ -30,6 +31,7 @@
 .Sh SYNOPSIS
 .Nm
 .Op Fl C
+.Op Fl d
 .Op Fl v
 .Sh DESCRIPTION
 The
@@ -43,6 +45,8 @@
 .Bl -tag -width indent
 .It Fl C
 Suppress the validation of checksums.
+.It Fl d
+Dump contents of blocks modified, implies verbose.
 .It Fl v
 Verbose. Dump all headers, not only begin and end headers.
 .El


Property changes on: trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -24,39 +25,67 @@
  * Use is subject to license terms.
  */
 
+/*
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ */
+
+#include <ctype.h>
 #include <libnvpair.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
+#include <stddef.h>
 
 #include <sys/dmu.h>
 #include <sys/zfs_ioctl.h>
 #include <zfs_fletcher.h>
 
-uint64_t drr_record_count[DRR_NUMTYPES];
+/*
+ * If dump mode is enabled, the number of bytes to print per line
+ */
+#define	BYTES_PER_LINE	16
+/*
+ * If dump mode is enabled, the number of bytes to group together, separated
+ * by newlines or spaces
+ */
+#define	DUMP_GROUPING	4
+
 uint64_t total_write_size = 0;
 uint64_t total_stream_len = 0;
 FILE *send_stream = 0;
 boolean_t do_byteswap = B_FALSE;
 boolean_t do_cksum = B_TRUE;
-#define	INITIAL_BUFLEN (1<<20)
 
 static void
 usage(void)
 {
-	(void) fprintf(stderr, "usage: zstreamdump [-v] [-C] < file\n");
+	(void) fprintf(stderr, "usage: zstreamdump [-v] [-C] [-d] < file\n");
 	(void) fprintf(stderr, "\t -v -- verbose\n");
 	(void) fprintf(stderr, "\t -C -- suppress checksum verification\n");
+	(void) fprintf(stderr, "\t -d -- dump contents of blocks modified, "
+	    "implies verbose\n");
 	exit(1);
 }
 
+static void *
+safe_malloc(size_t size)
+{
+	void *rv = malloc(size);
+	if (rv == NULL) {
+		(void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n",
+		    size);
+		abort();
+	}
+	return (rv);
+}
+
 /*
  * ssread - send stream read.
  *
  * Read while computing incremental checksum
  */
-
 static size_t
 ssread(void *buf, size_t len, zio_cksum_t *cksum)
 {
@@ -65,7 +94,7 @@
 	if ((outlen = fread(buf, len, 1, send_stream)) == 0)
 		return (0);
 
-	if (do_cksum && cksum) {
+	if (do_cksum) {
 		if (do_byteswap)
 			fletcher_4_incremental_byteswap(buf, len, cksum);
 		else
@@ -75,10 +104,104 @@
 	return (outlen);
 }
 
+static size_t
+read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum)
+{
+	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum);
+	if (r == 0)
+		return (0);
+	zio_cksum_t saved_cksum = *cksum;
+	r = ssread(&drr->drr_u.drr_checksum.drr_checksum,
+	    sizeof (zio_cksum_t), cksum);
+	if (r == 0)
+		return (0);
+	if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) &&
+	    !ZIO_CHECKSUM_EQUAL(saved_cksum,
+	    drr->drr_u.drr_checksum.drr_checksum)) {
+		fprintf(stderr, "invalid checksum\n");
+		(void) printf("Incorrect checksum in record header.\n");
+		(void) printf("Expected checksum = %llx/%llx/%llx/%llx\n",
+		    saved_cksum.zc_word[0],
+		    saved_cksum.zc_word[1],
+		    saved_cksum.zc_word[2],
+		    saved_cksum.zc_word[3]);
+		return (0);
+	}
+	return (sizeof (*drr));
+}
+
+/*
+ * Print part of a block in ASCII characters
+ */
+static void
+print_ascii_block(char *subbuf, int length)
+{
+	int i;
+
+	for (i = 0; i < length; i++) {
+		char char_print = isprint(subbuf[i]) ? subbuf[i] : '.';
+		if (i != 0 && i % DUMP_GROUPING == 0) {
+			(void) printf(" ");
+		}
+		(void) printf("%c", char_print);
+	}
+	(void) printf("\n");
+}
+
+/*
+ * print_block - Dump the contents of a modified block to STDOUT
+ *
+ * Assume that buf has capacity evenly divisible by BYTES_PER_LINE
+ */
+static void
+print_block(char *buf, int length)
+{
+	int i;
+	/*
+	 * Start printing ASCII characters at a constant offset, after
+	 * the hex prints. Leave 3 characters per byte on a line (2 digit
+	 * hex number plus 1 space) plus spaces between characters and
+	 * groupings.
+	 */
+	int ascii_start = BYTES_PER_LINE * 3 +
+	    BYTES_PER_LINE / DUMP_GROUPING + 2;
+
+	for (i = 0; i < length; i += BYTES_PER_LINE) {
+		int j;
+		int this_line_length = MIN(BYTES_PER_LINE, length - i);
+		int print_offset = 0;
+
+		for (j = 0; j < this_line_length; j++) {
+			int buf_offset = i + j;
+
+			/*
+			 * Separate every DUMP_GROUPING bytes by a space.
+			 */
+			if (buf_offset % DUMP_GROUPING == 0) {
+				print_offset += printf(" ");
+			}
+
+			/*
+			 * Print the two-digit hex value for this byte.
+			 */
+			unsigned char hex_print = buf[buf_offset];
+			print_offset += printf("%02x ", hex_print);
+		}
+
+		(void) printf("%*s", ascii_start - print_offset, " ");
+
+		print_ascii_block(buf + i, this_line_length);
+	}
+}
+
 int
 main(int argc, char *argv[])
 {
-	char *buf = malloc(INITIAL_BUFLEN);
+	char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
+	uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
+	uint64_t total_records = 0;
 	dmu_replay_record_t thedrr;
 	dmu_replay_record_t *drr = &thedrr;
 	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
@@ -89,21 +212,37 @@
 	struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
 	struct drr_free *drrf = &thedrr.drr_u.drr_free;
 	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
+	struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
+	struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
 	char c;
 	boolean_t verbose = B_FALSE;
+	boolean_t very_verbose = B_FALSE;
 	boolean_t first = B_TRUE;
+	/*
+	 * dump flag controls whether the contents of any modified data blocks
+	 * are printed to the console during processing of the stream. Warning:
+	 * for large streams, this can obviously lead to massive prints.
+	 */
+	boolean_t dump = B_FALSE;
 	int err;
 	zio_cksum_t zc = { 0 };
 	zio_cksum_t pcksum = { 0 };
 
-	while ((c = getopt(argc, argv, ":vC")) != -1) {
+	while ((c = getopt(argc, argv, ":vCd")) != -1) {
 		switch (c) {
 		case 'C':
 			do_cksum = B_FALSE;
 			break;
 		case 'v':
+			if (verbose)
+				very_verbose = B_TRUE;
 			verbose = B_TRUE;
 			break;
+		case 'd':
+			dump = B_TRUE;
+			verbose = B_TRUE;
+			very_verbose = B_TRUE;
+			break;
 		case ':':
 			(void) fprintf(stderr,
 			    "missing argument for '%c' option\n", optopt);
@@ -126,8 +265,12 @@
 
 	send_stream = stdin;
 	pcksum = zc;
-	while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) {
+	while (read_hdr(drr, &zc)) {
 
+		/*
+		 * If this is the first DMU record being processed, check for
+		 * the magic bytes and figure out the endian-ness based on them.
+		 */
 		if (first) {
 			if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
 				do_byteswap = B_TRUE;
@@ -169,6 +312,7 @@
 		}
 
 		drr_record_count[drr->drr_type]++;
+		total_records++;
 
 		switch (drr->drr_type) {
 		case DRR_BEGIN:
@@ -204,14 +348,13 @@
 			if (verbose)
 				(void) printf("\n");
 
-			if ((DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
-			    DMU_COMPOUNDSTREAM) && drr->drr_payloadlen != 0) {
+			if (drr->drr_payloadlen != 0) {
 				nvlist_t *nv;
 				int sz = drr->drr_payloadlen;
 
-				if (sz > 1<<20) {
+				if (sz > SPA_MAXBLOCKSIZE) {
 					free(buf);
-					buf = malloc(sz);
+					buf = safe_malloc(sz);
 				}
 				(void) ssread(buf, sz, &zc);
 				if (ferror(send_stream))
@@ -281,8 +424,12 @@
 				    drro->drr_bonuslen);
 			}
 			if (drro->drr_bonuslen > 0) {
-				(void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen,
-				    8), &zc);
+				(void) ssread(buf,
+				    P2ROUNDUP(drro->drr_bonuslen, 8), &zc);
+				if (dump) {
+					print_block(buf,
+					    P2ROUNDUP(drro->drr_bonuslen, 8));
+				}
 			}
 			break;
 
@@ -312,10 +459,14 @@
 				drrw->drr_key.ddk_prop =
 				    BSWAP_64(drrw->drr_key.ddk_prop);
 			}
+			/*
+			 * If this is verbose and/or dump output,
+			 * print info on the modified block
+			 */
 			if (verbose) {
 				(void) printf("WRITE object = %llu type = %u "
 				    "checksum type = %u\n"
-				    "offset = %llu length = %llu "
+				    "    offset = %llu length = %llu "
 				    "props = %llx\n",
 				    (u_longlong_t)drrw->drr_object,
 				    drrw->drr_type,
@@ -324,7 +475,16 @@
 				    (u_longlong_t)drrw->drr_length,
 				    (u_longlong_t)drrw->drr_key.ddk_prop);
 			}
+			/*
+			 * Read the contents of the block in from STDIN to buf
+			 */
 			(void) ssread(buf, drrw->drr_length, &zc);
+			/*
+			 * If in dump mode
+			 */
+			if (dump) {
+				print_block(buf, drrw->drr_length);
+			}
 			total_write_size += drrw->drr_length;
 			break;
 
@@ -350,9 +510,9 @@
 			if (verbose) {
 				(void) printf("WRITE_BYREF object = %llu "
 				    "checksum type = %u props = %llx\n"
-				    "offset = %llu length = %llu\n"
+				    "    offset = %llu length = %llu\n"
 				    "toguid = %llx refguid = %llx\n"
-				    "refobject = %llu refoffset = %llu\n",
+				    "    refobject = %llu refoffset = %llu\n",
 				    (u_longlong_t)drrwbr->drr_object,
 				    drrwbr->drr_checksumtype,
 				    (u_longlong_t)drrwbr->drr_key.ddk_prop,
@@ -390,8 +550,50 @@
 				    drrs->drr_length);
 			}
 			(void) ssread(buf, drrs->drr_length, &zc);
+			if (dump) {
+				print_block(buf, drrs->drr_length);
+			}
 			break;
+		case DRR_WRITE_EMBEDDED:
+			if (do_byteswap) {
+				drrwe->drr_object =
+				    BSWAP_64(drrwe->drr_object);
+				drrwe->drr_offset =
+				    BSWAP_64(drrwe->drr_offset);
+				drrwe->drr_length =
+				    BSWAP_64(drrwe->drr_length);
+				drrwe->drr_toguid =
+				    BSWAP_64(drrwe->drr_toguid);
+				drrwe->drr_lsize =
+				    BSWAP_32(drrwe->drr_lsize);
+				drrwe->drr_psize =
+				    BSWAP_32(drrwe->drr_psize);
+			}
+			if (verbose) {
+				(void) printf("WRITE_EMBEDDED object = %llu "
+				    "offset = %llu length = %llu\n"
+				    "    toguid = %llx comp = %u etype = %u "
+				    "lsize = %u psize = %u\n",
+				    (u_longlong_t)drrwe->drr_object,
+				    (u_longlong_t)drrwe->drr_offset,
+				    (u_longlong_t)drrwe->drr_length,
+				    (u_longlong_t)drrwe->drr_toguid,
+				    drrwe->drr_compression,
+				    drrwe->drr_etype,
+				    drrwe->drr_lsize,
+				    drrwe->drr_psize);
+			}
+			(void) ssread(buf,
+			    P2ROUNDUP(drrwe->drr_psize, 8), &zc);
+			break;
 		}
+		if (drr->drr_type != DRR_BEGIN && very_verbose) {
+			(void) printf("    checksum = %llx/%llx/%llx/%llx\n",
+			    (longlong_t)drrc->drr_checksum.zc_word[0],
+			    (longlong_t)drrc->drr_checksum.zc_word[1],
+			    (longlong_t)drrc->drr_checksum.zc_word[2],
+			    (longlong_t)drrc->drr_checksum.zc_word[3]);
+		}
 		pcksum = zc;
 	}
 	free(buf);
@@ -409,18 +611,16 @@
 	    (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]);
 	(void) printf("\tTotal DRR_WRITE records = %lld\n",
 	    (u_longlong_t)drr_record_count[DRR_WRITE]);
+	(void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n",
+	    (u_longlong_t)drr_record_count[DRR_WRITE_BYREF]);
+	(void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n",
+	    (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]);
 	(void) printf("\tTotal DRR_FREE records = %lld\n",
 	    (u_longlong_t)drr_record_count[DRR_FREE]);
 	(void) printf("\tTotal DRR_SPILL records = %lld\n",
 	    (u_longlong_t)drr_record_count[DRR_SPILL]);
 	(void) printf("\tTotal records = %lld\n",
-	    (u_longlong_t)(drr_record_count[DRR_BEGIN] +
-	    drr_record_count[DRR_OBJECT] +
-	    drr_record_count[DRR_FREEOBJECTS] +
-	    drr_record_count[DRR_WRITE] +
-	    drr_record_count[DRR_FREE] +
-	    drr_record_count[DRR_SPILL] +
-	    drr_record_count[DRR_END]));
+	    (u_longlong_t)total_records);
 	(void) printf("\tTotal write size = %lld (0x%llx)\n",
 	    (u_longlong_t)total_write_size, (u_longlong_t)total_write_size);
 	(void) printf("\tTotal stream length = %lld (0x%llx)\n",

Modified: trunk/cddl/contrib/opensolaris/cmd/ztest/ztest.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/ztest/ztest.c	2018-06-02 16:04:51 UTC (rev 10227)
+++ trunk/cddl/contrib/opensolaris/cmd/ztest/ztest.c	2018-06-02 16:07:17 UTC (rev 10228)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * CDDL HEADER START
  *
@@ -20,10 +21,12 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012 Martin Matuska <mm at FreeBSD.org>.  All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2017 Joyent, Inc.
  */
 
 /*
@@ -53,7 +56,7 @@
  *     At random times, the child self-immolates with a SIGKILL.
  *     This is the software equivalent of pulling the power cord.
  *     The parent then runs the test again, using the existing
- *     storage pool, as many times as desired. If backwards compatability
+ *     storage pool, as many times as desired. If backwards compatibility
  *     testing is enabled ztest will sometimes run the "older" version
  *     of ztest after a SIGKILL.
  *
@@ -123,6 +126,7 @@
 #include <errno.h>
 #include <sys/fs/zfs.h>
 #include <libnvpair.h>
+#include <libcmdutils.h>
 
 static int ztest_fd_data = -1;
 static int ztest_fd_rand = -1;
@@ -140,8 +144,8 @@
 static ztest_shared_hdr_t *ztest_shared_hdr;
 
 typedef struct ztest_shared_opts {
-	char zo_pool[MAXNAMELEN];
-	char zo_dir[MAXNAMELEN];
+	char zo_pool[ZFS_MAX_DATASET_NAME_LEN];
+	char zo_dir[ZFS_MAX_DATASET_NAME_LEN];
 	char zo_alt_ztest[MAXNAMELEN];
 	char zo_alt_libpath[MAXNAMELEN];
 	uint64_t zo_vdevs;
@@ -172,7 +176,7 @@
 	.zo_mirrors = 2,
 	.zo_raidz = 4,
 	.zo_raidz_parity = 1,
-	.zo_vdev_size = SPA_MINDEVSIZE,
+	.zo_vdev_size = SPA_MINDEVSIZE * 2,
 	.zo_datasets = 7,
 	.zo_threads = 23,
 	.zo_passtime = 60,		/* 60 seconds */
@@ -186,6 +190,9 @@
 
 extern uint64_t metaslab_gang_bang;
 extern uint64_t metaslab_df_alloc_threshold;
+extern uint64_t zfs_deadman_synctime_ms;
+extern int metaslab_preload_limit;
+extern boolean_t zfs_compressed_arc_enabled;
 
 static ztest_shared_opts_t *ztest_shared_opts;
 static ztest_shared_opts_t ztest_opts;
@@ -265,7 +272,7 @@
 	uint64_t	od_crblocksize;
 	uint64_t	od_gen;
 	uint64_t	od_crgen;
-	char		od_name[MAXNAMELEN];
+	char		od_name[ZFS_MAX_DATASET_NAME_LEN];
 } ztest_od_t;
 
 /*
@@ -277,7 +284,7 @@
 	rwlock_t	zd_zilog_lock;
 	zilog_t		*zd_zilog;
 	ztest_od_t	*zd_od;		/* debugging aid */
-	char		zd_name[MAXNAMELEN];
+	char		zd_name[ZFS_MAX_DATASET_NAME_LEN];
 	mutex_t		zd_dirobj_lock;
 	rll_t		zd_object_lock[ZTEST_OBJECT_LOCKS];
 	rll_t		zd_range_lock[ZTEST_RANGE_LOCKS];
@@ -365,7 +372,7 @@
 	{ ztest_fault_inject,			1,	&zopt_sometimes	},
 	{ ztest_ddt_repair,			1,	&zopt_sometimes	},
 	{ ztest_dmu_snapshot_hold,		1,	&zopt_sometimes	},
-	{ ztest_reguid,				1,	&zopt_sometimes },
+	{ ztest_reguid,				1,	&zopt_rarely	},
 	{ ztest_spa_rename,			1,	&zopt_rarely	},
 	{ ztest_scrub,				1,	&zopt_rarely	},
 	{ ztest_spa_upgrade,			1,	&zopt_rarely	},
@@ -550,12 +557,13 @@
 {
 	const ztest_shared_opts_t *zo = &ztest_opts_defaults;
 
-	char nice_vdev_size[10];
-	char nice_gang_bang[10];
+	char nice_vdev_size[NN_NUMBUF_SZ];
+	char nice_gang_bang[NN_NUMBUF_SZ];
 	FILE *fp = requested ? stdout : stderr;
 
-	nicenum(zo->zo_vdev_size, nice_vdev_size);
-	nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang);
+	nicenum(zo->zo_vdev_size, nice_vdev_size, sizeof (nice_vdev_size));
+	nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang,
+	    sizeof (nice_gang_bang));
 
 	(void) fprintf(fp, "Usage: %s\n"
 	    "\t[-v vdevs (default: %llu)]\n"
@@ -577,6 +585,8 @@
 	    "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n"
 	    "\t[-P passtime (default: %llu sec)] time per pass\n"
 	    "\t[-B alt_ztest (default: <none>)] alternate ztest path\n"
+	    "\t[-o variable=value] ... set global variable to an unsigned\n"
+	    "\t    32-bit integer value\n"
 	    "\t[-h] (print help)\n"
 	    "",
 	    zo->zo_pool,
@@ -612,7 +622,7 @@
 	bcopy(&ztest_opts_defaults, zo, sizeof (*zo));
 
 	while ((opt = getopt(argc, argv,
-	    "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:")) != EOF) {
+	    "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:o:")) != EOF) {
 		value = 0;
 		switch (opt) {
 		case 'v':
@@ -699,6 +709,10 @@
 		case 'B':
 			(void) strlcpy(altdir, optarg, sizeof (altdir));
 			break;
+		case 'o':
+			if (set_global_var(optarg) != 0)
+				usage(B_FALSE);
+			break;
 		case 'h':
 			usage(B_TRUE);
 			break;
@@ -768,6 +782,16 @@
 {
 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
 	zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
+
+	/*
+	 * Before we kill off ztest, make sure that the config is updated.
+	 * See comment above spa_config_sync().
+	 */
+	mutex_enter(&spa_namespace_lock);
+	spa_config_sync(ztest_spa, B_FALSE, B_FALSE);
+	mutex_exit(&spa_namespace_lock);
+
+	zfs_dbgmsg_print(FTAG);
 	(void) kill(getpid(), SIGKILL);
 }
 
@@ -798,7 +822,7 @@
 ztest_get_ashift(void)
 {
 	if (ztest_opts.zo_ashift == 0)
-		return (SPA_MINBLOCKSHIFT + ztest_random(3));
+		return (SPA_MINBLOCKSHIFT + ztest_random(5));
 	return (ztest_opts.zo_ashift);
 }
 
@@ -960,8 +984,16 @@
 static int
 ztest_random_blocksize(void)
 {
-	return (1 << (SPA_MINBLOCKSHIFT +
-	    ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)));
+	uint64_t block_shift;
+	/*
+	 * Choose a block size >= the ashift.
+	 * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks.
+	 */
+	int maxbs = SPA_OLD_MAXBLOCKSHIFT;
+	if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE)
+		maxbs = 20;
+	block_shift = ztest_random(maxbs - ztest_spa->spa_max_ashift + 1);
+	return (1 << (SPA_MINBLOCKSHIFT + block_shift));
 }
 
 static int
@@ -1256,13 +1288,13 @@
 ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
     uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
 {
-	ASSERT(bt->bt_magic == BT_MAGIC);
-	ASSERT(bt->bt_objset == dmu_objset_id(os));
-	ASSERT(bt->bt_object == object);
-	ASSERT(bt->bt_offset == offset);
-	ASSERT(bt->bt_gen <= gen);
-	ASSERT(bt->bt_txg <= txg);
-	ASSERT(bt->bt_crtxg == crtxg);
+	ASSERT3U(bt->bt_magic, ==, BT_MAGIC);
+	ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
+	ASSERT3U(bt->bt_object, ==, object);
+	ASSERT3U(bt->bt_offset, ==, offset);
+	ASSERT3U(bt->bt_gen, <=, gen);
+	ASSERT3U(bt->bt_txg, <=, txg);
+	ASSERT3U(bt->bt_crtxg, ==, crtxg);
 }
 
 static ztest_block_tag_t *
@@ -1349,7 +1381,6 @@
 	itx->itx_private = zd;
 	itx->itx_wr_state = write_state;
 	itx->itx_sync = (ztest_random(8) == 0);
-	itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0);
 
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) - sizeof (lr_t));
@@ -2732,7 +2763,7 @@
 	uint64_t leaf, top;
 	uint64_t ashift = ztest_get_ashift();
 	uint64_t oldguid, pguid;
-	size_t oldsize, newsize;
+	uint64_t oldsize, newsize;
 	char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
 	int replacing;
 	int oldvd_has_siblings = B_FALSE;
@@ -2891,8 +2922,8 @@
 	if (error != expected_error && expected_error != EBUSY) {
 		fatal(0, "attach (%s %llu, %s %llu, %d) "
 		    "returned %d, expected %d",
-		    oldpath, (longlong_t)oldsize, newpath,
-		    (longlong_t)newsize, replacing, error, expected_error);
+		    oldpath, oldsize, newpath,
+		    newsize, replacing, error, expected_error);
 	}
 
 	VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
@@ -3129,10 +3160,10 @@
 		    old_class_space, new_class_space);
 
 	if (ztest_opts.zo_verbose >= 5) {
-		char oldnumbuf[6], newnumbuf[6];
+		char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ];
 
-		nicenum(old_class_space, oldnumbuf);
-		nicenum(new_class_space, newnumbuf);
+		nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf));
+		nicenum(new_class_space, newnumbuf, sizeof (newnumbuf));
 		(void) printf("%s grew from %s to %s\n",
 		    spa->spa_name, oldnumbuf, newnumbuf);
 	}
@@ -3206,7 +3237,7 @@
 static boolean_t
 ztest_snapshot_create(char *osname, uint64_t id)
 {
-	char snapname[MAXNAMELEN];
+	char snapname[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
 	(void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id);
@@ -3226,10 +3257,10 @@
 static boolean_t
 ztest_snapshot_destroy(char *osname, uint64_t id)
 {
-	char snapname[MAXNAMELEN];
+	char snapname[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
-	(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
+	(void) snprintf(snapname, sizeof (snapname), "%s@%llu", osname,
 	    (u_longlong_t)id);
 
 	error = dsl_destroy_snapshot(snapname, B_FALSE);
@@ -3246,12 +3277,12 @@
 	int iters;
 	int error;
 	objset_t *os, *os2;
-	char name[MAXNAMELEN];
+	char name[ZFS_MAX_DATASET_NAME_LEN];
 	zilog_t *zilog;
 
 	(void) rw_rdlock(&ztest_name_lock);
 
-	(void) snprintf(name, MAXNAMELEN, "%s/temp_%llu",
+	(void) snprintf(name, sizeof (name), "%s/temp_%llu",
 	    ztest_opts.zo_pool, (u_longlong_t)id);
 
 	/*
@@ -3357,18 +3388,23 @@
 void
 ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
 {
-	char snap1name[MAXNAMELEN];
-	char clone1name[MAXNAMELEN];
-	char snap2name[MAXNAMELEN];
-	char clone2name[MAXNAMELEN];
-	char snap3name[MAXNAMELEN];
+	char snap1name[ZFS_MAX_DATASET_NAME_LEN];
+	char clone1name[ZFS_MAX_DATASET_NAME_LEN];
+	char snap2name[ZFS_MAX_DATASET_NAME_LEN];
+	char clone2name[ZFS_MAX_DATASET_NAME_LEN];
+	char snap3name[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
-	(void) snprintf(snap1name, MAXNAMELEN, "%s at s1_%llu", osname, id);
-	(void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id);
-	(void) snprintf(snap2name, MAXNAMELEN, "%s at s2_%llu", clone1name, id);
-	(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
-	(void) snprintf(snap3name, MAXNAMELEN, "%s at s3_%llu", clone1name, id);
+	(void) snprintf(snap1name, sizeof (snap1name),
+	    "%s at s1_%llu", osname, id);
+	(void) snprintf(clone1name, sizeof (clone1name),
+	    "%s/c1_%llu", osname, id);
+	(void) snprintf(snap2name, sizeof (snap2name),
+	    "%s at s2_%llu", clone1name, id);
+	(void) snprintf(clone2name, sizeof (clone2name),
+	    "%s/c2_%llu", osname, id);
+	(void) snprintf(snap3name, sizeof (snap3name),
+	    "%s at s3_%llu", clone1name, id);
 
 	error = dsl_destroy_head(clone2name);
 	if (error && error != ENOENT)
@@ -3394,11 +3430,11 @@
 ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os;
-	char snap1name[MAXNAMELEN];
-	char clone1name[MAXNAMELEN];
-	char snap2name[MAXNAMELEN];
-	char clone2name[MAXNAMELEN];
-	char snap3name[MAXNAMELEN];
+	char snap1name[ZFS_MAX_DATASET_NAME_LEN];
+	char clone1name[ZFS_MAX_DATASET_NAME_LEN];
+	char snap2name[ZFS_MAX_DATASET_NAME_LEN];
+	char clone2name[ZFS_MAX_DATASET_NAME_LEN];
+	char snap3name[ZFS_MAX_DATASET_NAME_LEN];
 	char *osname = zd->zd_name;
 	int error;
 
@@ -3406,11 +3442,16 @@
 
 	ztest_dsl_dataset_cleanup(osname, id);
 
-	(void) snprintf(snap1name, MAXNAMELEN, "%s at s1_%llu", osname, id);
-	(void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id);
-	(void) snprintf(snap2name, MAXNAMELEN, "%s at s2_%llu", clone1name, id);
-	(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
-	(void) snprintf(snap3name, MAXNAMELEN, "%s at s3_%llu", clone1name, id);
+	(void) snprintf(snap1name, sizeof (snap1name),
+	    "%s at s1_%llu", osname, id);
+	(void) snprintf(clone1name, sizeof (clone1name),
+	    "%s/c1_%llu", osname, id);
+	(void) snprintf(snap2name, sizeof (snap2name),
+	    "%s at s2_%llu", clone1name, id);
+	(void) snprintf(clone2name, sizeof (clone2name),
+	    "%s/c2_%llu", osname, id);
+	(void) snprintf(snap3name, sizeof (snap3name),
+	    "%s at s3_%llu", clone1name, id);
 
 	error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);
 	if (error && error != EEXIST) {
@@ -3461,6 +3502,11 @@
 	if (error)
 		fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
 	error = dsl_dataset_promote(clone2name, NULL);
+	if (error == ENOSPC) {
+		dmu_objset_disown(os, FTAG);
+		ztest_record_enospc(FTAG);
+		goto out;
+	}
 	if (error != EBUSY)
 		fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
 		    error);
@@ -3561,7 +3607,8 @@
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(2 * width - 1);
-	dmu_prefetch(os, bigobj, n * chunksize, s * chunksize);
+	dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize,
+	    ZIO_PRIORITY_SYNC_READ);
 
 	/*
 	 * Pick a random index and compute the offsets into packobj and bigobj.
@@ -3606,6 +3653,9 @@
 	else
 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
 
+	/* This accounts for setting the checksum/compression. */
+	dmu_tx_hold_bonus(tx, bigobj);
+
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0) {
 		umem_free(packbuf, packsize);
@@ -3613,11 +3663,19 @@
 		return;
 	}
 
-	dmu_object_set_checksum(os, bigobj,
-	    (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx);
+	enum zio_checksum cksum;
+	do {
+		cksum = (enum zio_checksum)
+		    ztest_random_dsl_prop(ZFS_PROP_CHECKSUM);
+	} while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS);
+	dmu_object_set_checksum(os, bigobj, cksum, tx);
 
-	dmu_object_set_compress(os, bigobj,
-	    (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx);
+	enum zio_compress comp;
+	do {
+		comp = (enum zio_compress)
+		    ztest_random_dsl_prop(ZFS_PROP_COMPRESSION);
+	} while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS);
+	dmu_object_set_compress(os, bigobj, comp, tx);
 
 	/*
 	 * For each index from n to n + s, verify that the existing bufwad
@@ -4211,7 +4269,7 @@
 	 * 2050 entries we should see ptrtbl growth and leaf-block split.
 	 */
 	for (int i = 0; i < 2050; i++) {
-		char name[MAXNAMELEN];
+		char name[ZFS_MAX_DATASET_NAME_LEN];
 		uint64_t value = i;
 		dmu_tx_t *tx;
 		int error;
@@ -4616,7 +4674,7 @@
 	char fullname[100];
 	char clonename[100];
 	char tag[100];
-	char osname[MAXNAMELEN];
+	char osname[ZFS_MAX_DATASET_NAME_LEN];
 	nvlist_t *holds;
 
 	(void) rw_rdlock(&ztest_name_lock);
@@ -4697,8 +4755,13 @@
 	error = dsl_dataset_user_hold(holds, 0, NULL);
 	fnvlist_free(holds);
 
-	if (error)
-		fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
+	if (error == ENOSPC) {
+		ztest_record_enospc("dsl_dataset_user_hold");
+		goto out;
+	} else if (error) {
+		fatal(0, "dsl_dataset_user_hold(%s, %s) = %u",
+		    fullname, tag, error);
+	}
 
 	error = dsl_destroy_snapshot(fullname, B_FALSE);
 	if (error != EBUSY) {
@@ -4739,7 +4802,7 @@
 	char path0[MAXPATHLEN];
 	char pathrand[MAXPATHLEN];
 	size_t fsize;
-	int bshift = SPA_MAXBLOCKSHIFT + 2;	/* don't scrog all labels */
+	int bshift = SPA_MAXBLOCKSHIFT + 2;
 	int iters = 1000;
 	int maxfaults;
 	int mirror_save;
@@ -4756,6 +4819,14 @@
 	ASSERT(leaves >= 1);
 
 	/*
+	 * Grab the name lock as reader. There are some operations
+	 * which don't like to have their vdevs changed while
+	 * they are in progress (i.e. spa_change_guid). Those
+	 * operations will have grabbed the name lock as writer.
+	 */
+	(void) rw_rdlock(&ztest_name_lock);
+
+	/*
 	 * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
 	 */
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
@@ -4784,7 +4855,14 @@
 		if (vd0 != NULL && vd0->vdev_top->vdev_islog)
 			islog = B_TRUE;
 
-		if (vd0 != NULL && maxfaults != 1) {
+		/*
+		 * If the top-level vdev needs to be resilvered
+		 * then we only allow faults on the device that is
+		 * resilvering.
+		 */
+		if (vd0 != NULL && maxfaults != 1 &&
+		    (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
+		    vd0->vdev_resilver_txg != 0)) {
 			/*
 			 * Make vd0 explicitly claim to be unreadable,
 			 * or unwriteable, or reach behind its back
@@ -4815,6 +4893,7 @@
 
 		if (sav->sav_count == 0) {
 			spa_config_exit(spa, SCL_STATE, FTAG);
+			(void) rw_unlock(&ztest_name_lock);
 			return;
 		}
 		vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
@@ -4828,6 +4907,7 @@
 	}
 
 	spa_config_exit(spa, SCL_STATE, FTAG);
+	(void) rw_unlock(&ztest_name_lock);
 
 	/*
 	 * If we can tolerate two or more faults, or we're dealing
@@ -4883,13 +4963,60 @@
 	fsize = lseek(fd, 0, SEEK_END);
 
 	while (--iters != 0) {
+		/*
+		 * The offset must be chosen carefully to ensure that
+		 * we do not inject a given logical block with errors
+		 * on two different leaf devices, because ZFS can not
+		 * tolerate that (if maxfaults==1).
+		 *
+		 * We divide each leaf into chunks of size
+		 * (# leaves * SPA_MAXBLOCKSIZE * 4).  Within each chunk
+		 * there is a series of ranges to which we can inject errors.
+		 * Each range can accept errors on only a single leaf vdev.
+		 * The error injection ranges are separated by ranges
+		 * which we will not inject errors on any device (DMZs).
+		 * Each DMZ must be large enough such that a single block
+		 * can not straddle it, so that a single block can not be
+		 * a target in two different injection ranges (on different
+		 * leaf vdevs).
+		 *
+		 * For example, with 3 leaves, each chunk looks like:
+		 *    0 to  32M: injection range for leaf 0
+		 *  32M to  64M: DMZ - no injection allowed
+		 *  64M to  96M: injection range for leaf 1
+		 *  96M to 128M: DMZ - no injection allowed
+		 * 128M to 160M: injection range for leaf 2
+		 * 160M to 192M: DMZ - no injection allowed
+		 */
 		offset = ztest_random(fsize / (leaves << bshift)) *
 		    (leaves << bshift) + (leaf << bshift) +
 		    (ztest_random(1ULL << (bshift - 1)) & -8ULL);
 
-		if (offset >= fsize)
+		/*
+		 * Only allow damage to the labels at one end of the vdev.
+		 *
+		 * If all labels are damaged, the device will be totally
+		 * inaccessible, which will result in loss of data,
+		 * because we also damage (parts of) the other side of
+		 * the mirror/raidz.
+		 *
+		 * Additionally, we will always have both an even and an
+		 * odd label, so that we can handle crashes in the
+		 * middle of vdev_config_sync().
+		 */
+		if ((leaf & 1) == 0 && offset < VDEV_LABEL_START_SIZE)
 			continue;
 
+		/*
+		 * The two end labels are stored at the "end" of the disk, but
+		 * the end of the disk (vdev_psize) is aligned to
+		 * sizeof (vdev_label_t).
+		 */
+		uint64_t psize = P2ALIGN(fsize, sizeof (vdev_label_t));
+		if ((leaf & 1) == 1 &&
+		    offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE)
+			continue;
+
 		VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
 		if (mirror_save != zs->zs_mirrors) {
 			VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
@@ -4951,9 +5078,14 @@
 		return;
 	}
 
+	dmu_objset_stats_t dds;
+	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+	dmu_objset_fast_stat(os, &dds);
+	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
+
 	object = od[0].od_object;
 	blocksize = od[0].od_blocksize;
-	pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os);
+	pattern = zs->zs_guid ^ dds.dds_guid;
 
 	ASSERT(object != 0);
 
@@ -5134,7 +5266,7 @@
 	isa = strdup(isa);
 	/* LINTED */
 	(void) sprintf(bin,
-	    "/usr/sbin%.*s/zdb -bcc%s%s -U %s %s",
+	    "/usr/sbin%.*s/zdb -bcc%s%s -d -U %s %s",
 	    isalen,
 	    isa,
 	    ztest_opts.zo_verbose >= 3 ? "s" : "",
@@ -5285,6 +5417,12 @@
 		if (spa_suspended(spa))
 			ztest_resume(spa);
 		(void) poll(NULL, 0, 100);
+
+		/*
+		 * Periodically change the zfs_compressed_arc_enabled setting.
+		 */
+		if (ztest_random(10) == 0)
+			zfs_compressed_arc_enabled = ztest_random(2);
 	}
 	return (NULL);
 }
@@ -5293,16 +5431,33 @@
 ztest_deadman_thread(void *arg)
 {
 	ztest_shared_t *zs = arg;
-	int grace = 300;
-	hrtime_t delta;
+	spa_t *spa = ztest_spa;
+	hrtime_t delta, total = 0;
 
-	delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace;
+	for (;;) {
+		delta = zs->zs_thread_stop - zs->zs_thread_start +
+		    MSEC2NSEC(zfs_deadman_synctime_ms);
 
-	(void) poll(NULL, 0, (int)(1000 * delta));
+		(void) poll(NULL, 0, (int)NSEC2MSEC(delta));
 
-	fatal(0, "failed to complete within %d seconds of deadline", grace);
+		/*
+		 * If the pool is suspended then fail immediately. Otherwise,
+		 * check to see if the pool is making any progress. If
+		 * vdev_deadman() discovers that there hasn't been any recent
+		 * I/Os then it will end up aborting the tests.
+		 */
+		if (spa_suspended(spa) || spa->spa_root_vdev == NULL) {
+			fatal(0, "aborting test after %llu seconds because "
+			    "pool has transitioned to a suspended state.",
+			    zfs_deadman_synctime_ms / 1000);
+			return (NULL);
+		}
+		vdev_deadman(spa->spa_root_vdev);
 
-	return (NULL);
+		total += zfs_deadman_synctime_ms/1000;
+		(void) printf("ztest has been running for %lld seconds\n",
+		    total);
+	}
 }
 
 static void
@@ -5373,13 +5528,13 @@
 static void
 ztest_dataset_name(char *dsname, char *pool, int d)
 {
-	(void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d);
+	(void) snprintf(dsname, ZFS_MAX_DATASET_NAME_LEN, "%s/ds_%d", pool, d);
 }
 
 static void
 ztest_dataset_destroy(int d)
 {
-	char name[MAXNAMELEN];
+	char name[ZFS_MAX_DATASET_NAME_LEN];
 
 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
 
@@ -5428,7 +5583,7 @@
 	uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq;
 	objset_t *os;
 	zilog_t *zilog;
-	char name[MAXNAMELEN];
+	char name[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
@@ -5530,11 +5685,16 @@
 	kernel_init(FREAD | FWRITE);
 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	spa->spa_debug = B_TRUE;
+	metaslab_preload_limit = ztest_random(20) + 1;
 	ztest_spa = spa;
 
+	dmu_objset_stats_t dds;
 	VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
 	    DMU_OST_ANY, B_TRUE, FTAG, &os));
-	zs->zs_guid = dmu_objset_fsid_guid(os);
+	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+	dmu_objset_fast_stat(os, &dds);
+	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
+	zs->zs_guid = dds.dds_guid;
 	dmu_objset_disown(os, FTAG);
 
 	spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
@@ -5616,6 +5776,7 @@
 
 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
 	zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
+	zfs_dbgmsg_print(FTAG);
 
 	umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t));
 
@@ -5628,8 +5789,10 @@
 	 * Right before closing the pool, kick off a bunch of async I/O;
 	 * spa_close() should wait for it to complete.
 	 */
-	for (uint64_t object = 1; object < 50; object++)
-		dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
+	for (uint64_t object = 1; object < 50; object++) {
+		dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
+		    ZIO_PRIORITY_SYNC_READ);
+	}
 
 	spa_close(spa, FTAG);
 
@@ -5647,8 +5810,8 @@
 	 * different name.
 	 */
 	if (ztest_random(2) == 0) {
-		char name[MAXNAMELEN];
-		(void) snprintf(name, MAXNAMELEN, "%s_import",
+		char name[ZFS_MAX_DATASET_NAME_LEN];
+		(void) snprintf(name, sizeof (name), "%s_import",
 		    ztest_opts.zo_pool);
 		ztest_spa_import_export(ztest_opts.zo_pool, name);
 		ztest_spa_import_export(name, ztest_opts.zo_pool);
@@ -5699,16 +5862,30 @@
 	spa_freeze(spa);
 
 	/*
+	 * Because it is hard to predict how much space a write will actually
+	 * require beforehand, we leave ourselves some fudge space to write over
+	 * capacity.
+	 */
+	uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2;
+
+	/*
 	 * Run tests that generate log records but don't alter the pool config
 	 * or depend on DSL sync tasks (snapshots, objset create/destroy, etc).
 	 * We do a txg_wait_synced() after each iteration to force the txg
 	 * to increase well beyond the last synced value in the uberblock.
 	 * The ZIL should be OK with that.
+	 *
+	 * Run a random number of times less than zo_maxloops and ensure we do
+	 * not run out of space on the pool.
 	 */
 	while (ztest_random(10) != 0 &&
-	    numloops++ < ztest_opts.zo_maxloops) {
-		ztest_dmu_write_parallel(zd, 0);
-		ztest_dmu_object_alloc_free(zd, 0);
+	    numloops++ < ztest_opts.zo_maxloops &&
+	    metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) {
+		ztest_od_t od;
+		ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
+		VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE));
+		ztest_io(zd, od.od_object,
+		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 		txg_wait_synced(spa_get_dsl(spa), 0);
 	}
 
@@ -5814,6 +5991,7 @@
 	}
 	VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
 	nvlist_free(nvroot);
+	nvlist_free(props);
 
 	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	zs->zs_metaslab_sz =
@@ -6022,7 +6200,7 @@
 	ztest_info_t *zi;
 	ztest_shared_callstate_t *zc;
 	char timebuf[100];
-	char numbuf[6];
+	char numbuf[NN_NUMBUF_SZ];
 	spa_t *spa;
 	char *cmd;
 	boolean_t hasalt;
@@ -6031,6 +6209,7 @@
 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
 
 	dprintf_setup(&argc, argv);
+	zfs_deadman_synctime_ms = 300000;
 
 	ztest_fd_rand = open("/dev/urandom", O_RDONLY);
 	ASSERT3S(ztest_fd_rand, >=, 0);
@@ -6158,7 +6337,7 @@
 
 			now = MIN(now, zs->zs_proc_stop);
 			print_time(zs->zs_proc_stop - now, timebuf);
-			nicenum(zs->zs_space, numbuf);
+			nicenum(zs->zs_space, numbuf, sizeof (numbuf));
 
 			(void) printf("Pass %3d, %8s, %3llu ENOSPC, "
 			    "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
@@ -6200,7 +6379,7 @@
 		if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) {
 			spa_close(spa, FTAG);
 		} else {
-			char tmpname[MAXNAMELEN];
+			char tmpname[ZFS_MAX_DATASET_NAME_LEN];
 			kernel_fini();
 			kernel_init(FREAD | FWRITE);
 			(void) snprintf(tmpname, sizeof (tmpname), "%s_tmp",



More information about the Midnightbsd-cvs mailing list