[Midnightbsd-cvs] src [10029] trunk/sys/dev/xen: sync

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun May 27 18:21:26 EDT 2018


Revision: 10029
          http://svnweb.midnightbsd.org/src/?rev=10029
Author:   laffer1
Date:     2018-05-27 18:21:25 -0400 (Sun, 27 May 2018)
Log Message:
-----------
sync

Modified Paths:
--------------
    trunk/sys/dev/xen/balloon/balloon.c
    trunk/sys/dev/xen/blkback/blkback.c
    trunk/sys/dev/xen/blkfront/blkfront.c
    trunk/sys/dev/xen/blkfront/block.h
    trunk/sys/dev/xen/console/console.c
    trunk/sys/dev/xen/console/xencons_ring.c
    trunk/sys/dev/xen/console/xencons_ring.h
    trunk/sys/dev/xen/control/control.c
    trunk/sys/dev/xen/netback/netback.c
    trunk/sys/dev/xen/netback/netback_unit_tests.c
    trunk/sys/dev/xen/netfront/mbufq.h
    trunk/sys/dev/xen/netfront/netfront.c
    trunk/sys/dev/xen/pcifront/pcifront.c
    trunk/sys/dev/xen/xenpci/xenpci.c
    trunk/sys/dev/xen/xenpci/xenpcivar.h

Added Paths:
-----------
    trunk/sys/dev/xen/timer/
    trunk/sys/dev/xen/timer/timer.c

Modified: trunk/sys/dev/xen/balloon/balloon.c
===================================================================
--- trunk/sys/dev/xen/balloon/balloon.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/balloon/balloon.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /******************************************************************************
  * balloon.c
  *
@@ -30,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/balloon/balloon.c 292906 2015-12-30 08:15:43Z royger $");
 
 #include <sys/param.h>
 #include <sys/lock.h>
@@ -40,29 +41,25 @@
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
+#include <xen/features.h>
 #include <xen/xenstore/xenstorevar.h>
 
-#include <vm/vm.h>
-#include <vm/vm_page.h>
+#include <machine/xen/xenvar.h>
 
 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
 
+/* Convert from KB (as fetched from xenstore) to number of PAGES */
+#define KB_TO_PAGE_SHIFT	(PAGE_SHIFT - 10)
+
 struct mtx balloon_mutex;
 
-/*
- * Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and driver_pages, and
- * balloon lists.
- */
-struct mtx balloon_lock;
-
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
-#define ARRAY_SIZE(A)	(sizeof(A) / sizeof(A[0]))
 
 struct balloon_stats {
 	/* We aim for 'current allocation' == 'target allocation'. */
@@ -98,13 +95,8 @@
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
     &bs.balloon_high, 0, "High-mem balloon");
 
-struct balloon_entry {
-	vm_page_t page;
-	STAILQ_ENTRY(balloon_entry) list;
-};
-
 /* List of ballooned pages, threaded through the mem_map array. */
-static STAILQ_HEAD(,balloon_entry) ballooned_pages;
+static TAILQ_HEAD(,vm_page) ballooned_pages;
 
 /* Main work function, always executed in process context. */
 static void balloon_process(void *unused);
@@ -114,39 +106,6 @@
 #define WPRINTK(fmt, args...) \
 	printk(KERN_WARNING "xen_mem: " fmt, ##args)
 
-/* balloon_append: add the given page to the balloon. */
-static void 
-balloon_append(vm_page_t page)
-{
-	struct balloon_entry *entry;
-
-	entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
-	entry->page = page;
-	STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
-	bs.balloon_low++;
-}
-
-/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
-static vm_page_t
-balloon_retrieve(void)
-{
-	vm_page_t page;
-	struct balloon_entry *entry;
-
-	if (STAILQ_EMPTY(&ballooned_pages))
-		return NULL;
-
-	entry = STAILQ_FIRST(&ballooned_pages);
-	STAILQ_REMOVE_HEAD(&ballooned_pages, list);
-
-	page = entry->page;
-	free(entry, M_BALLOON);
-	
-	bs.balloon_low--;
-
-	return page;
-}
-
 static unsigned long 
 current_target(void)
 {
@@ -153,7 +112,7 @@
 	unsigned long target = min(bs.target_pages, bs.hard_limit);
 	if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
 		target = bs.current_pages + bs.balloon_low + bs.balloon_high;
-	return target;
+	return (target);
 }
 
 static unsigned long
@@ -160,7 +119,7 @@
 minimum_target(void)
 {
 #ifdef XENHVM
-#define max_pfn physmem
+#define max_pfn realmem
 #else
 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn
 #endif
@@ -167,7 +126,8 @@
 	unsigned long min_pages, curr_pages = current_target();
 
 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
-	/* Simple continuous piecewiese linear function:
+	/*
+	 * Simple continuous piecewiese linear function:
 	 *  max MiB -> min MiB	gradient
 	 *       0	   0
 	 *      16	  16
@@ -188,12 +148,10 @@
 	else
 		min_pages = MB2PAGES(296) + (max_pfn >> 5);
 #undef MB2PAGES
+#undef max_pfn
 
 	/* Don't enforce growth */
-	return min(min_pages, curr_pages);
-#ifndef CONFIG_XEN
-#undef max_pfn
-#endif
+	return (min(min_pages, curr_pages));
 }
 
 static int 
@@ -200,7 +158,6 @@
 increase_reservation(unsigned long nr_pages)
 {
 	unsigned long  pfn, i;
-	struct balloon_entry *entry;
 	vm_page_t      page;
 	long           rc;
 	struct xen_memory_reservation reservation = {
@@ -209,15 +166,14 @@
 		.domid        = DOMID_SELF
 	};
 
-	if (nr_pages > ARRAY_SIZE(frame_list))
-		nr_pages = ARRAY_SIZE(frame_list);
+	mtx_assert(&balloon_mutex, MA_OWNED);
 
-	mtx_lock(&balloon_lock);
+	if (nr_pages > nitems(frame_list))
+		nr_pages = nitems(frame_list);
 
-	for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
-	     i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
-		KASSERT(entry, ("ballooned_pages list corrupt"));
-		page = entry->page;
+	for (page = TAILQ_FIRST(&ballooned_pages), i = 0;
+	    i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) {
+		KASSERT(page != NULL, ("ballooned_pages list corrupt"));
 		frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
 	}
 
@@ -242,8 +198,10 @@
 	}
 
 	for (i = 0; i < nr_pages; i++) {
-		page = balloon_retrieve();
-		KASSERT(page, ("balloon_retrieve failed"));
+		page = TAILQ_FIRST(&ballooned_pages);
+		KASSERT(page != NULL, ("Unable to get ballooned page"));
+		TAILQ_REMOVE(&ballooned_pages, page, plinks.q);
+		bs.balloon_low--;
 
 		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
 		KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
@@ -252,33 +210,13 @@
 
 		set_phys_to_machine(pfn, frame_list[i]);
 
-#if 0
-#ifndef XENHVM
-		/* Link back into the page tables if not highmem. */
-		if (pfn < max_low_pfn) {
-			int ret;
-			ret = HYPERVISOR_update_va_mapping(
-				(unsigned long)__va(pfn << PAGE_SHIFT),
-				pfn_pte_ma(frame_list[i], PAGE_KERNEL),
-				0);
-			PASSING(ret == 0,
-			    ("HYPERVISOR_update_va_mapping failed"));
-		}
-#endif
-#endif
-
-		/* Relinquish the page back to the allocator. */
-		vm_page_unwire(page, 0);
 		vm_page_free(page);
 	}
 
 	bs.current_pages += nr_pages;
-	//totalram_pages = bs.current_pages;
 
  out:
-	mtx_unlock(&balloon_lock);
-
-	return 0;
+	return (0);
 }
 
 static int
@@ -294,54 +232,37 @@
 		.domid        = DOMID_SELF
 	};
 
-	if (nr_pages > ARRAY_SIZE(frame_list))
-		nr_pages = ARRAY_SIZE(frame_list);
+	mtx_assert(&balloon_mutex, MA_OWNED);
 
+	if (nr_pages > nitems(frame_list))
+		nr_pages = nitems(frame_list);
+
 	for (i = 0; i < nr_pages; i++) {
 		if ((page = vm_page_alloc(NULL, 0, 
 			    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 
-			    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+			    VM_ALLOC_ZERO)) == NULL) {
 			nr_pages = i;
 			need_sleep = 1;
 			break;
 		}
 
+		if ((page->flags & PG_ZERO) == 0) {
+			/*
+			 * Zero the page, or else we might be leaking
+			 * important data to other domains on the same
+			 * host. Xen doesn't scrub ballooned out memory
+			 * pages, the guest is in charge of making
+			 * sure that no information is leaked.
+			 */
+			pmap_zero_page(page);
+		}
+
 		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
 		frame_list[i] = PFNTOMFN(pfn);
 
-#if 0
-		if (!PageHighMem(page)) {
-			v = phys_to_virt(pfn << PAGE_SHIFT);
-			scrub_pages(v, 1);
-#ifdef CONFIG_XEN
-			ret = HYPERVISOR_update_va_mapping(
-				(unsigned long)v, __pte_ma(0), 0);
-			BUG_ON(ret);
-#endif
-		}
-#endif
-#ifdef CONFIG_XEN_SCRUB_PAGES
-		else {
-			v = kmap(page);
-			scrub_pages(v, 1);
-			kunmap(page);
-		}
-#endif
-	}
-
-#ifdef CONFIG_XEN
-	/* Ensure that ballooned highmem pages don't have kmaps. */
-	kmap_flush_unused();
-	flush_tlb_all();
-#endif
-
-	mtx_lock(&balloon_lock);
-
-	/* No more mappings: invalidate P2M and add to balloon. */
-	for (i = 0; i < nr_pages; i++) {
-		pfn = MFNTOPFN(frame_list[i]);
 		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-		balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
+		TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
+		bs.balloon_low++;
 	}
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
@@ -350,10 +271,7 @@
 	KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
 
 	bs.current_pages -= nr_pages;
-	//totalram_pages = bs.current_pages;
 
-	mtx_unlock(&balloon_lock);
-
 	return (need_sleep);
 }
 
@@ -424,11 +342,11 @@
 		return;
 	} 
         
-	/* The given memory/target value is in KiB, so it needs converting to
-	   pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
-	*/
-	set_new_target(new_target >> (PAGE_SHIFT - 10));
-    
+	/*
+	 * The given memory/target value is in KiB, so it needs converting to
+	 * pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+	 */
+	set_new_target(new_target >> KB_TO_PAGE_SHIFT);
 }
 
 static void 
@@ -460,13 +378,12 @@
 	if (!is_running_on_xen())
 		return;
 
-	mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
 	mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
 
 #ifndef XENHVM
 	bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
 #else
-	bs.current_pages = physmem;
+	bs.current_pages = realmem;
 #endif
 	bs.target_pages  = bs.current_pages;
 	bs.balloon_low   = 0;
@@ -480,7 +397,8 @@
 	/* Initialise the balloon with excess memory space. */
 	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
 		page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
-		balloon_append(page);
+		TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
+		bs.balloon_low++;
 	}
 #undef max_pfn
 #endif
@@ -496,76 +414,7 @@
 void 
 balloon_update_driver_allowance(long delta)
 {
-	mtx_lock(&balloon_lock);
+	mtx_lock(&balloon_mutex);
 	bs.driver_pages += delta;
-	mtx_unlock(&balloon_lock);
+	mtx_unlock(&balloon_mutex);
 }
-
-#if 0
-static int dealloc_pte_fn(
-	pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
-{
-	unsigned long mfn = pte_mfn(*pte);
-	int ret;
-	struct xen_memory_reservation reservation = {
-		.extent_start = &mfn,
-		.nr_extents   = 1,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-	set_pte_at(&init_mm, addr, pte, __pte_ma(0));
-	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
-	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
-	KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
-	return 0;
-}
-
-#endif
-
-#if 0
-vm_page_t
-balloon_alloc_empty_page_range(unsigned long nr_pages)
-{
-	vm_page_t pages;
-	int i, rc;
-	unsigned long *mfn_list;
-	struct xen_memory_reservation reservation = {
-		.address_bits = 0,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-
-	pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
-	if (pages == NULL)
-		return NULL;
-	
-	mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
-	
-	for (i = 0; i < nr_pages; i++) {
-		mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
-		PFNTOMFN(i) = INVALID_P2M_ENTRY;
-		reservation.extent_start = mfn_list;
-		reservation.nr_extents = nr_pages;
-		rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-		    &reservation);
-		KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
-	}
-
-	current_pages -= nr_pages;
-
-	wakeup(balloon_process);
-
-	return pages;
-}
-
-void 
-balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
-{
-	unsigned long i;
-
-	for (i = 0; i < nr_pages; i++)
-		balloon_append(page + i);
-
-	wakeup(balloon_process);
-}
-#endif

Modified: trunk/sys/dev/xen/blkback/blkback.c
===================================================================
--- trunk/sys/dev/xen/blkback/blkback.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/blkback/blkback.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009-2011 Spectra Logic Corporation
  * All rights reserved.
@@ -31,7 +32,7 @@
  *          Ken Merry           (Spectra Logic Corporation)
  */
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/blkback/blkback.c 285738 2015-07-21 07:22:18Z royger $");
 
 /**
  * \file blkback.c
@@ -70,14 +71,13 @@
 #include <geom/geom.h>
 
 #include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
+#include <xen/xen-os.h>
 #include <xen/blkif.h>
-#include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/xen_intr.h>
 
@@ -88,11 +88,19 @@
 
 /*--------------------------- Compile-time Tunables --------------------------*/
 /**
+ * The maximum number of shared memory ring pages we will allow in a
+ * negotiated block-front/back communication channel.  Allow enough
+ * ring space for all requests to be XBB_MAX_REQUEST_SIZE'd.
+ */
+#define	XBB_MAX_RING_PAGES		32
+
+/**
  * The maximum number of outstanding request blocks (request headers plus
  * additional segment blocks) we will allow in a negotiated block-front/back
  * communication channel.
  */
-#define	XBB_MAX_REQUESTS	256
+#define	XBB_MAX_REQUESTS 					\
+	__CONST_RING_SIZE(blkif, PAGE_SIZE * XBB_MAX_RING_PAGES)
 
 /**
  * \brief Define to force all I/O to be performed on memory owned by the
@@ -151,14 +159,6 @@
 		 (XBB_MAX_REQUEST_SIZE / PAGE_SIZE) + 1)))
 
 /**
- * The maximum number of shared memory ring pages we will allow in a
- * negotiated block-front/back communication channel.  Allow enough
- * ring space for all requests to be XBB_MAX_REQUEST_SIZE'd.
- */
-#define	XBB_MAX_RING_PAGES						    \
-	BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBB_MAX_SEGMENTS_PER_REQUEST) \
-		       * XBB_MAX_REQUESTS)
-/**
  * The maximum number of ring pages that we can allow per request list.
  * We limit this to the maximum number of segments per request, because
  * that is already a reasonable number of segments to aggregate.  This
@@ -231,7 +231,7 @@
 	int			 num_children;
 
 	/**
-	 * Number of I/O requests dispatched to the backend.
+	 * Number of I/O requests still pending on the backend.
 	 */
 	int			 pendcnt;
 
@@ -328,13 +328,6 @@
 	int			  nr_512b_sectors;
 
 	/**
-	 * The number of struct bio requests still outstanding for this
-	 * request on the backend device.  This field is only used for	
-	 * device (rather than file) backed I/O.
-	 */
-	int			  pendcnt;
-
-	/**
 	 * BLKIF_OP code for this request.
 	 */
 	int			  operation;
@@ -682,7 +675,7 @@
 	blkif_back_rings_t	  rings;
 
 	/** IRQ mapping for the communication ring event channel. */
-	int			  irq;
+	xen_intr_handle_t	  xen_intr_handle;
 
 	/**
 	 * \brief Backend access mode flags (e.g. write, or read-only).
@@ -1240,6 +1233,8 @@
 
 	nreq->reqlist = *reqlist;
 	nreq->req_ring_idx = ring_idx;
+	nreq->id = ring_req->id;
+	nreq->operation = ring_req->operation;
 
 	if (xbb->abi != BLKIF_PROTOCOL_NATIVE) {
 		bcopy(ring_req, &nreq->ring_req_storage, sizeof(*ring_req));
@@ -1323,7 +1318,7 @@
 	resp->operation = req->operation;
 	resp->status    = status;
 
-	xbb->rings.common.rsp_prod_pvt += BLKIF_SEGS_TO_BLOCKS(req->nr_pages);
+	xbb->rings.common.rsp_prod_pvt++;
 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbb->rings.common, notify);
 
 	if (xbb->rings.common.rsp_prod_pvt == xbb->rings.common.req_cons) {
@@ -1347,7 +1342,7 @@
 		taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 
 
 	if (notify)
-		notify_remote_via_irq(xbb->irq);
+		xen_intr_signal(xbb->xen_intr_handle);
 }
 
 /**
@@ -1609,7 +1604,6 @@
 		req_ring_idx	      = nreq->req_ring_idx;
 		nr_sects              = 0;
 		nseg                  = ring_req->nr_segments;
-		nreq->id              = ring_req->id;
 		nreq->nr_pages        = nseg;
 		nreq->nr_512b_sectors = 0;
 		req_seg_idx	      = 0;
@@ -1616,8 +1610,8 @@
 		sg	              = NULL;
 
 		/* Check that number of segments is sane. */
-		if (unlikely(nseg == 0)
-		 || unlikely(nseg > xbb->max_request_segments)) {
+		if (__predict_false(nseg == 0)
+		 || __predict_false(nseg > xbb->max_request_segments)) {
 			DPRINTF("Bad number of segments in request (%d)\n",
 				nseg);
 			reqlist->status = BLKIF_RSP_ERROR;
@@ -1624,87 +1618,49 @@
 			goto send_response;
 		}
 
-		block_segs    = MIN(nreq->nr_pages,
-				    BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
+		block_segs    = nseg;
 		sg            = ring_req->seg;
 		last_block_sg = sg + block_segs;
-		while (1) {
 
-			while (sg < last_block_sg) {
-				KASSERT(seg_idx <
-					XBB_MAX_SEGMENTS_PER_REQLIST,
-					("seg_idx %d is too large, max "
-					"segs %d\n", seg_idx,
-					XBB_MAX_SEGMENTS_PER_REQLIST));
-			
-				xbb_sg->first_sect = sg->first_sect;
-				xbb_sg->last_sect  = sg->last_sect;
-				xbb_sg->nsect =
-				    (int8_t)(sg->last_sect -
-				    sg->first_sect + 1);
+		while (sg < last_block_sg) {
+			KASSERT(seg_idx <
+				XBB_MAX_SEGMENTS_PER_REQLIST,
+				("seg_idx %d is too large, max "
+				"segs %d\n", seg_idx,
+				XBB_MAX_SEGMENTS_PER_REQLIST));
 
-				if ((sg->last_sect >= (PAGE_SIZE >> 9))
-				 || (xbb_sg->nsect <= 0)) {
-					reqlist->status = BLKIF_RSP_ERROR;
-					goto send_response;
-				}
+			xbb_sg->first_sect = sg->first_sect;
+			xbb_sg->last_sect  = sg->last_sect;
+			xbb_sg->nsect =
+			    (int8_t)(sg->last_sect -
+			    sg->first_sect + 1);
 
-				nr_sects += xbb_sg->nsect;
-				map->host_addr = xbb_get_gntaddr(reqlist,
-							seg_idx, /*sector*/0);
-				KASSERT(map->host_addr + PAGE_SIZE <=
-					xbb->ring_config.gnt_addr,
-					("Host address %#jx len %d overlaps "
-					 "ring address %#jx\n",
-					(uintmax_t)map->host_addr, PAGE_SIZE,
-					(uintmax_t)xbb->ring_config.gnt_addr));
-					
-				map->flags     = GNTMAP_host_map;
-				map->ref       = sg->gref;
-				map->dom       = xbb->otherend_id;
-				if (operation == BIO_WRITE)
-					map->flags |= GNTMAP_readonly;
-				sg++;
-				map++;
-				xbb_sg++;
-				seg_idx++;
-				req_seg_idx++;
+			if ((sg->last_sect >= (PAGE_SIZE >> 9))
+			 || (xbb_sg->nsect <= 0)) {
+				reqlist->status = BLKIF_RSP_ERROR;
+				goto send_response;
 			}
 
-			block_segs = MIN(nseg - req_seg_idx,
-					 BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
-			if (block_segs == 0)
-				break;
+			nr_sects += xbb_sg->nsect;
+			map->host_addr = xbb_get_gntaddr(reqlist,
+						seg_idx, /*sector*/0);
+			KASSERT(map->host_addr + PAGE_SIZE <=
+				xbb->ring_config.gnt_addr,
+				("Host address %#jx len %d overlaps "
+				 "ring address %#jx\n",
+				(uintmax_t)map->host_addr, PAGE_SIZE,
+				(uintmax_t)xbb->ring_config.gnt_addr));
 
-			/*
-			 * Fetch the next request block full of SG elements.
-			 * For now, only the spacing between entries is
-			 * different in the different ABIs, not the sg entry
-			 * layout.
-			 */
-			req_ring_idx++;
-			switch (xbb->abi) {
-			case BLKIF_PROTOCOL_NATIVE:
-				sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.native,
-							   req_ring_idx);
-				break;
-			case BLKIF_PROTOCOL_X86_32:
-			{
-				sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_32,
-							   req_ring_idx);
-				break;
-			}
-			case BLKIF_PROTOCOL_X86_64:
-			{
-				sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_64,
-							   req_ring_idx);
-				break;
-			}
-			default:
-				panic("Unexpected blkif protocol ABI.");
-				/* NOTREACHED */
-			} 
-			last_block_sg = sg + block_segs;
+			map->flags     = GNTMAP_host_map;
+			map->ref       = sg->gref;
+			map->dom       = xbb->otherend_id;
+			if (operation == BIO_WRITE)
+				map->flags |= GNTMAP_readonly;
+			sg++;
+			map++;
+			xbb_sg++;
+			seg_idx++;
+			req_seg_idx++;
 		}
 
 		/* Convert to the disk's sector size */
@@ -1734,7 +1690,7 @@
 	for (seg_idx = 0, map = xbb->maps; seg_idx < reqlist->nr_segments;
 	     seg_idx++, map++){
 
-		if (unlikely(map->status != 0)) {
+		if (__predict_false(map->status != 0)) {
 			DPRINTF("invalid buffer -- could not remap "
 			        "it (%d)\n", map->status);
 			DPRINTF("Mapping(%d): Host Addr 0x%lx, flags "
@@ -1958,8 +1914,7 @@
 			 * response be generated before we make room in 
 			 * the queue for that response.
 			 */
-			xbb->rings.common.req_cons +=
-			    BLKIF_SEGS_TO_BLOCKS(ring_req->nr_segments);
+			xbb->rings.common.req_cons++;
 			xbb->reqs_received++;
 
 			cur_size = xbb_count_sects(ring_req);
@@ -2026,21 +1981,23 @@
  * \param arg  Callback argument registerd during event channel
  *             binding - the xbb_softc for this instance.
  */
-static void
-xbb_intr(void *arg)
+static int
+xbb_filter(void *arg)
 {
 	struct xbb_softc *xbb;
 
-	/* Defer to kernel thread. */
+	/* Defer to taskqueue thread. */
 	xbb = (struct xbb_softc *)arg;
 	taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 
+
+	return (FILTER_HANDLED);
 }
 
 SDT_PROVIDER_DEFINE(xbb);
-SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, flush, "int");
-SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, read, "int", "uint64_t",
+SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, "int");
+SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, "int", "uint64_t",
 		  "uint64_t");
-SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, write, "int",
+SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, "int",
 		  "uint64_t", "uint64_t");
 
 /*----------------------------- Backend Handlers -----------------------------*/
@@ -2061,7 +2018,6 @@
 {
 	struct xbb_dev_data *dev_data;
 	struct bio          *bios[XBB_MAX_SEGMENTS_PER_REQLIST];
-	struct xbb_xen_req  *nreq;
 	off_t                bio_offset;
 	struct bio          *bio;
 	struct xbb_sg       *xbb_sg;
@@ -2079,9 +2035,8 @@
 	bio_idx    = 0;
 
 	if (operation == BIO_FLUSH) {
-		nreq = STAILQ_FIRST(&reqlist->contig_req_list);
 		bio = g_new_bio();
-		if (unlikely(bio == NULL)) {
+		if (__predict_false(bio == NULL)) {
 			DPRINTF("Unable to allocate bio for BIO_FLUSH\n");
 			error = ENOMEM;
 			return (error);
@@ -2093,10 +2048,10 @@
 		bio->bio_offset	 = 0;
 		bio->bio_data	 = 0;
 		bio->bio_done	 = xbb_bio_done;
-		bio->bio_caller1 = nreq;
+		bio->bio_caller1 = reqlist;
 		bio->bio_pblkno	 = 0;
 
-		nreq->pendcnt	 = 1;
+		reqlist->pendcnt = 1;
 
 		SDT_PROBE1(xbb, kernel, xbb_dispatch_dev, flush,
 			   device_get_unit(xbb->dev));
@@ -2143,7 +2098,7 @@
 			}
 
 			bio = bios[nbio++] = g_new_bio();
-			if (unlikely(bio == NULL)) {
+			if (__predict_false(bio == NULL)) {
 				error = ENOMEM;
 				goto fail_free_bios;
 			}
@@ -2218,10 +2173,10 @@
 	return (error);
 }
 
-SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, flush, "int");
-SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, read, "int", "uint64_t",
+SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, "int");
+SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, "int", "uint64_t",
 		  "uint64_t");
-SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, write, "int",
+SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, "int",
 		  "uint64_t", "uint64_t");
 
 /**
@@ -2250,7 +2205,6 @@
 	void                **p_vaddr;
 	int                   saved_uio_iovcnt;
 #endif /* XBB_USE_BOUNCE_BUFFERS */
-	int                   vfs_is_locked;
 	int                   error;
 
 	file_data = &xbb->backend.file;
@@ -2271,8 +2225,6 @@
 		SDT_PROBE1(xbb, kernel, xbb_dispatch_file, flush,
 			   device_get_unit(xbb->dev));
 
-		vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount);
-
 		(void) vn_start_write(xbb->vn, &mountpoint, V_WAIT);
 
 		vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
@@ -2281,8 +2233,6 @@
 
 		vn_finished_write(mountpoint);
 
-		VFS_UNLOCK_GIANT(vfs_is_locked);
-
 		goto bailout_send_response;
 		/* NOTREACHED */
 	}
@@ -2366,7 +2316,6 @@
 	}
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 
-	vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount);
 	switch (operation) {
 	case BIO_READ:
 
@@ -2442,7 +2391,6 @@
 		panic("invalid operation %d", operation);
 		/* NOTREACHED */
 	}
-	VFS_UNLOCK_GIANT(vfs_is_locked);
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	/* We only need to copy here for read operations */
@@ -2489,7 +2437,6 @@
 	DPRINTF("closing dev=%s\n", xbb->dev_name);
 	if (xbb->vn) {
 		int flags = FREAD;
-		int vfs_is_locked = 0;
 
 		if ((xbb->flags & XBBF_READ_ONLY) == 0)
 			flags |= FWRITE;
@@ -2504,7 +2451,6 @@
 			}
 			break;
 		case XBB_TYPE_FILE:
-			vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount);
 			break;
 		case XBB_TYPE_NONE:
 		default:
@@ -2519,7 +2465,6 @@
 		case XBB_TYPE_DISK:
 			break;
 		case XBB_TYPE_FILE:
-			VFS_UNLOCK_GIANT(vfs_is_locked);
 			if (xbb->backend.file.cred != NULL) {
 				crfree(xbb->backend.file.cred);
 				xbb->backend.file.cred = NULL;
@@ -2684,7 +2629,6 @@
 	struct nameidata nd;
 	int		 flags;
 	int		 error;
-	int		 vfs_is_locked;
 
 	flags = FREAD;
 	error = 0;
@@ -2744,8 +2688,6 @@
 		return (error);
 	}
 
-	vfs_is_locked = NDHASGIANT(&nd);
-
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 		
 	xbb->vn = nd.ni_vp;
@@ -2761,7 +2703,6 @@
 				 "or file", xbb->dev_name);
 	}
 	VOP_UNLOCK(xbb->vn, 0);
-	VFS_UNLOCK_GIANT(vfs_is_locked);
 
 	if (error != 0) {
 		xbb_close_backend(xbb);
@@ -2789,7 +2730,7 @@
 {
 	if (xbb->kva != 0) {
 #ifndef XENHVM
-		kmem_free(kernel_map, xbb->kva, xbb->kva_size);
+		kva_free(xbb->kva, xbb->kva_size);
 #else
 		if (xbb->pseudo_phys_res != NULL) {
 			bus_release_resource(xbb->dev, SYS_RES_MEMORY,
@@ -2825,10 +2766,7 @@
 	if ((xbb->flags & XBBF_RING_CONNECTED) == 0)
 		return (0);
 
-	if (xbb->irq != 0) {
-		unbind_from_irqhandler(xbb->irq);
-		xbb->irq = 0;
-	}
+	xen_intr_unbind(&xbb->xen_intr_handle);
 
 	mtx_unlock(&xbb->lock);
 	taskqueue_drain(xbb->io_taskqueue, &xbb->io_task); 
@@ -2980,13 +2918,14 @@
 
 	xbb->flags |= XBBF_RING_CONNECTED;
 
-	error =
-	    bind_interdomain_evtchn_to_irqhandler(xbb->otherend_id,
-						  xbb->ring_config.evtchn,
-						  device_get_nameunit(xbb->dev),
-						  xbb_intr, /*arg*/xbb,
-						  INTR_TYPE_BIO | INTR_MPSAFE,
-						  &xbb->irq);
+	error = xen_intr_bind_remote_port(xbb->dev,
+					  xbb->otherend_id,
+					  xbb->ring_config.evtchn,
+					  xbb_filter,
+					  /*ithread_handler*/NULL,
+					  /*arg*/xbb,
+					  INTR_TYPE_BIO | INTR_MPSAFE,
+					  &xbb->xen_intr_handle);
 	if (error) {
 		(void)xbb_disconnect(xbb);
 		xenbus_dev_fatal(xbb->dev, error, "binding event channel");
@@ -3028,7 +2967,7 @@
 		device_get_nameunit(xbb->dev), xbb->kva_size,
 		xbb->reqlist_kva_size);
 #ifndef XENHVM
-	xbb->kva = kmem_alloc_nofault(kernel_map, xbb->kva_size);
+	xbb->kva = kva_alloc(xbb->kva_size);
 	if (xbb->kva == 0)
 		return (ENOMEM);
 	xbb->gnt_base_addr = xbb->kva;
@@ -3079,7 +3018,7 @@
 	 * Protocol defaults valid even if all negotiation fails.
 	 */
 	xbb->ring_config.ring_pages = 1;
-	xbb->max_request_segments   = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
+	xbb->max_request_segments   = BLKIF_MAX_SEGMENTS_PER_REQUEST;
 	xbb->max_request_size	    = xbb->max_request_segments * PAGE_SIZE;
 
 	/*
@@ -3110,60 +3049,23 @@
 	 *       fields.
 	 */
 	ring_page_order = 0;
+	xbb->max_requests = 32;
+
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "ring-page-order", NULL, "%u",
 		       &ring_page_order);
 	xbb->ring_config.ring_pages = 1 << ring_page_order;
-	(void)xs_scanf(XST_NIL, otherend_path,
-		       "num-ring-pages", NULL, "%u",
-		       &xbb->ring_config.ring_pages);
 	ring_size = PAGE_SIZE * xbb->ring_config.ring_pages;
 	xbb->max_requests = BLKIF_MAX_RING_REQUESTS(ring_size);
 
-	(void)xs_scanf(XST_NIL, otherend_path,
-		       "max-requests", NULL, "%u",
-		       &xbb->max_requests);
-
-	(void)xs_scanf(XST_NIL, otherend_path,
-		       "max-request-segments", NULL, "%u",
-		       &xbb->max_request_segments);
-
-	(void)xs_scanf(XST_NIL, otherend_path,
-		       "max-request-size", NULL, "%u",
-		       &xbb->max_request_size);
-
 	if (xbb->ring_config.ring_pages	> XBB_MAX_RING_PAGES) {
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Front-end specified ring-pages of %u "
-				 "exceeds backend limit of %zu.  "
+				 "exceeds backend limit of %u.  "
 				 "Unable to connect.",
 				 xbb->ring_config.ring_pages,
 				 XBB_MAX_RING_PAGES);
 		return (EINVAL);
-	} else if (xbb->max_requests > XBB_MAX_REQUESTS) {
-		xenbus_dev_fatal(xbb->dev, EINVAL,
-				 "Front-end specified max_requests of %u "
-				 "exceeds backend limit of %u.  "
-				 "Unable to connect.",
-				 xbb->max_requests,
-				 XBB_MAX_REQUESTS);
-		return (EINVAL);
-	} else if (xbb->max_request_segments > XBB_MAX_SEGMENTS_PER_REQUEST) {
-		xenbus_dev_fatal(xbb->dev, EINVAL,
-				 "Front-end specified max_requests_segments "
-				 "of %u exceeds backend limit of %u.  "
-				 "Unable to connect.",
-				 xbb->max_request_segments,
-				 XBB_MAX_SEGMENTS_PER_REQUEST);
-		return (EINVAL);
-	} else if (xbb->max_request_size > XBB_MAX_REQUEST_SIZE) {
-		xenbus_dev_fatal(xbb->dev, EINVAL,
-				 "Front-end specified max_request_size "
-				 "of %u exceeds backend limit of %u.  "
-				 "Unable to connect.",
-				 xbb->max_request_size,
-				 XBB_MAX_REQUEST_SIZE);
-		return (EINVAL);
 	}
 
 	if (xbb->ring_config.ring_pages	== 1) {
@@ -3701,18 +3603,6 @@
 		return (error);
 	}
 
-	/*
-	 * Amazon EC2 client compatility.  They refer to max-ring-pages
-	 * instead of to max-ring-page-order.
-	 */
-	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
-			  "max-ring-pages", "%zu", XBB_MAX_RING_PAGES);
-	if (error) {
-		xbb_attach_failed(xbb, error, "writing %s/max-ring-pages",
-				  xenbus_get_node(xbb->dev));
-		return (error);
-	}
-
 	max_ring_page_order = flsl(XBB_MAX_RING_PAGES) - 1;
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-ring-page-order", "%u", max_ring_page_order);
@@ -3722,32 +3612,6 @@
 		return (error);
 	}
 
-	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
-			  "max-requests", "%u", XBB_MAX_REQUESTS);
-	if (error) {
-		xbb_attach_failed(xbb, error, "writing %s/max-requests",
-				  xenbus_get_node(xbb->dev));
-		return (error);
-	}
-
-	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
-			  "max-request-segments", "%u",
-			  XBB_MAX_SEGMENTS_PER_REQUEST);
-	if (error) {
-		xbb_attach_failed(xbb, error, "writing %s/max-request-segments",
-				  xenbus_get_node(xbb->dev));
-		return (error);
-	}
-
-	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
-			  "max-request-size", "%u",
-			  XBB_MAX_REQUEST_SIZE);
-	if (error) {
-		xbb_attach_failed(xbb, error, "writing %s/max-request-size",
-				  xenbus_get_node(xbb->dev));
-		return (error);
-	}
-
 	/* Collect physical device information. */
 	error = xs_gather(XST_NIL, xenbus_get_otherend_path(xbb->dev),
 			  "device-type", NULL, &xbb->dev_type,
@@ -3805,9 +3669,10 @@
 	 * Create a taskqueue for doing work that must occur from a
 	 * thread context.
 	 */
-	xbb->io_taskqueue = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
-					     taskqueue_thread_enqueue,
-					     /*context*/&xbb->io_taskqueue);
+	xbb->io_taskqueue = taskqueue_create_fast(device_get_nameunit(dev),
+						  M_NOWAIT,
+						  taskqueue_thread_enqueue,
+						  /*contxt*/&xbb->io_taskqueue);
 	if (xbb->io_taskqueue == NULL) {
 		xbb_attach_failed(xbb, error, "Unable to create taskqueue");
 		return (ENOMEM);

Modified: trunk/sys/dev/xen/blkfront/blkfront.c
===================================================================
--- trunk/sys/dev/xen/blkfront/blkfront.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/blkfront/blkfront.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,6 +1,8 @@
+/* $MidnightBSD$ */
 /*
  * XenBSD block device driver
  *
+ * Copyright (c) 2010-2013 Spectra Logic Corporation
  * Copyright (c) 2009 Scott Long, Yahoo!
  * Copyright (c) 2009 Frank Suchomel, Citrix
  * Copyright (c) 2009 Doug F. Rabson, Citrix
@@ -28,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/blkfront/blkfront.c 315676 2017-03-21 09:38:59Z royger $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -50,19 +52,17 @@
 #include <machine/vmparam.h>
 #include <sys/bus_dma.h>
 
-#include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
-#include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/protocols.h>
 #include <xen/xenbus/xenbusvar.h>
 
+#include <machine/_inttypes.h>
+#include <machine/xen/xenvar.h>
+
 #include <geom/geom_disk.h>
 
 #include <dev/xen/blkfront/block.h>
@@ -69,214 +69,398 @@
 
 #include "xenbus_if.h"
 
-/* prototypes */
-static void xb_free_command(struct xb_command *cm);
-static void xb_startio(struct xb_softc *sc);
-static void blkfront_connect(struct xb_softc *);
-static void blkfront_closing(device_t);
-static int blkfront_detach(device_t);
-static int setup_blkring(struct xb_softc *);
-static void blkif_int(void *);
-static void blkfront_initialize(struct xb_softc *);
-static int blkif_completion(struct xb_command *);
-static void blkif_free(struct xb_softc *);
-static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int);
+/*--------------------------- Forward Declarations ---------------------------*/
+static void xbd_closing(device_t);
+static void xbd_startio(struct xbd_softc *sc);
 
+/*---------------------------------- Macros ----------------------------------*/
+#if 0
+#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
+#else
+#define DPRINTK(fmt, args...) 
+#endif
+
+#define XBD_SECTOR_SHFT		9
+
+/*---------------------------- Global Static Data ----------------------------*/
 static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
 
-#define GRANT_INVALID_REF 0
+static int xbd_enable_indirect = 1;
+SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD, 0, "xbd driver parameters");
+SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN,
+    &xbd_enable_indirect, 0, "Enable xbd indirect segments");
 
-/* Control whether runtime update of vbds is enabled. */
-#define ENABLE_VBD_UPDATE 0
+/*---------------------------- Command Processing ----------------------------*/
+static void
+xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag)
+{
+	if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0)
+		return;
 
-#if ENABLE_VBD_UPDATE
-static void vbd_update(void);
-#endif
+	sc->xbd_flags |= xbd_flag;
+	sc->xbd_qfrozen_cnt++;
+}
 
-#define BLKIF_STATE_DISCONNECTED 0
-#define BLKIF_STATE_CONNECTED    1
-#define BLKIF_STATE_SUSPENDED    2
+static void
+xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag)
+{
+	if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0)
+		return;
 
-#ifdef notyet
-static char *blkif_state_name[] = {
-	[BLKIF_STATE_DISCONNECTED] = "disconnected",
-	[BLKIF_STATE_CONNECTED]    = "connected",
-	[BLKIF_STATE_SUSPENDED]    = "closed",
-};
+	if (sc->xbd_qfrozen_cnt == 0)
+		panic("%s: Thaw with flag 0x%x while not frozen.",
+		    __func__, xbd_flag);
 
-static char * blkif_status_name[] = {
-	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
-	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
-	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
-	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
-};
-#endif
+	sc->xbd_flags &= ~xbd_flag;
+	sc->xbd_qfrozen_cnt--;
+}
 
-#if 0
-#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
-#else
-#define DPRINTK(fmt, args...) 
-#endif
+static void
+xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag)
+{
+	if ((cm->cm_flags & XBDCF_FROZEN) != 0)
+		return;
 
-static int blkif_open(struct disk *dp);
-static int blkif_close(struct disk *dp);
-static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
-static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm);
-static void xb_strategy(struct bio *bp);
+	cm->cm_flags |= XBDCF_FROZEN|cm_flag;
+	xbd_freeze(sc, XBDF_NONE);
+}
 
-// In order to quiesce the device during kernel dumps, outstanding requests to
-// DOM0 for disk reads/writes need to be accounted for.
-static	int	xb_dump(void *, void *, vm_offset_t, off_t, size_t);
+static void
+xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm)
+{
+	if ((cm->cm_flags & XBDCF_FROZEN) == 0)
+		return;
 
-/* XXX move to xb_vbd.c when VBD update support is added */
-#define MAX_VBDS 64
+	cm->cm_flags &= ~XBDCF_FROZEN;
+	xbd_thaw(sc, XBDF_NONE);
+}
 
-#define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
-#define XBD_SECTOR_SHFT		9
+static inline void 
+xbd_flush_requests(struct xbd_softc *sc)
+{
+	int notify;
 
-/*
- * Translate Linux major/minor to an appropriate name and unit
- * number. For HVM guests, this allows us to use the same drive names
- * with blkfront as the emulated drives, easing transition slightly.
- */
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify);
+
+	if (notify)
+		xen_intr_signal(sc->xen_intr_handle);
+}
+
 static void
-blkfront_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
+xbd_free_command(struct xbd_command *cm)
 {
-	static struct vdev_info {
-		int major;
-		int shift;
-		int base;
-		const char *name;
-	} info[] = {
-		{3,	6,	0,	"ad"},	/* ide0 */
-		{22,	6,	2,	"ad"},	/* ide1 */
-		{33,	6,	4,	"ad"},	/* ide2 */
-		{34,	6,	6,	"ad"},	/* ide3 */
-		{56,	6,	8,	"ad"},	/* ide4 */
-		{57,	6,	10,	"ad"},	/* ide5 */
-		{88,	6,	12,	"ad"},	/* ide6 */
-		{89,	6,	14,	"ad"},	/* ide7 */
-		{90,	6,	16,	"ad"},	/* ide8 */
-		{91,	6,	18,	"ad"},	/* ide9 */
 
-		{8,	4,	0,	"da"},	/* scsi disk0 */
-		{65,	4,	16,	"da"},	/* scsi disk1 */
-		{66,	4,	32,	"da"},	/* scsi disk2 */
-		{67,	4,	48,	"da"},	/* scsi disk3 */
-		{68,	4,	64,	"da"},	/* scsi disk4 */
-		{69,	4,	80,	"da"},	/* scsi disk5 */
-		{70,	4,	96,	"da"},	/* scsi disk6 */
-		{71,	4,	112,	"da"},	/* scsi disk7 */
-		{128,	4,	128,	"da"},	/* scsi disk8 */
-		{129,	4,	144,	"da"},	/* scsi disk9 */
-		{130,	4,	160,	"da"},	/* scsi disk10 */
-		{131,	4,	176,	"da"},	/* scsi disk11 */
-		{132,	4,	192,	"da"},	/* scsi disk12 */
-		{133,	4,	208,	"da"},	/* scsi disk13 */
-		{134,	4,	224,	"da"},	/* scsi disk14 */
-		{135,	4,	240,	"da"},	/* scsi disk15 */
+	KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE,
+	    ("Freeing command that is still on queue %d.",
+	    cm->cm_flags & XBDCF_Q_MASK));
 
-		{202,	4,	0,	"xbd"},	/* xbd */
+	cm->cm_flags = XBDCF_INITIALIZER;
+	cm->cm_bp = NULL;
+	cm->cm_complete = NULL;
+	xbd_enqueue_cm(cm, XBD_Q_FREE);
+	xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE);
+}
 
-		{0,	0,	0,	NULL},
-	};
-	int major = vdevice >> 8;
-	int minor = vdevice & 0xff;
-	int i;
+static void
+xbd_mksegarray(bus_dma_segment_t *segs, int nsegs,
+    grant_ref_t * gref_head, int otherend_id, int readonly,
+    grant_ref_t * sg_ref, blkif_request_segment_t * sg)
+{
+	struct blkif_request_segment *last_block_sg = sg + nsegs;
+	vm_paddr_t buffer_ma;
+	uint64_t fsect, lsect;
+	int ref;
 
-	if (vdevice & (1 << 28)) {
-		*unit = (vdevice & ((1 << 28) - 1)) >> 8;
-		*name = "xbd";
+	while (sg < last_block_sg) {
+		buffer_ma = segs->ds_addr;
+		fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
+		lsect = fsect + (segs->ds_len  >> XBD_SECTOR_SHFT) - 1;
+
+		KASSERT(lsect <= 7, ("XEN disk driver data cannot "
+		    "cross a page boundary"));
+
+		/* install a grant reference. */
+		ref = gnttab_claim_grant_reference(gref_head);
+
+		/*
+		 * GNTTAB_LIST_END == 0xffffffff, but it is private
+		 * to gnttab.c.
+		 */
+		KASSERT(ref != ~0, ("grant_reference failed"));
+
+		gnttab_grant_foreign_access_ref(
+		    ref,
+		    otherend_id,
+		    buffer_ma >> PAGE_SHIFT,
+		    readonly);
+
+		*sg_ref = ref;
+		*sg = (struct blkif_request_segment) {
+			.gref       = ref,
+			.first_sect = fsect, 
+			.last_sect  = lsect
+		};
+		sg++;
+		sg_ref++;
+		segs++;
+	}
+}
+
+static void
+xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
+{
+	struct xbd_softc *sc;
+	struct xbd_command *cm;
+	int op;
+
+	cm = arg;
+	sc = cm->cm_sc;
+
+	if (error) {
+		cm->cm_bp->bio_error = EIO;
+		biodone(cm->cm_bp);
+		xbd_free_command(cm);
 		return;
 	}
 
-	for (i = 0; info[i].major; i++) {
-		if (info[i].major == major) {
-			*unit = info[i].base + (minor >> info[i].shift);
-			*name = info[i].name;
-			return;
-		}
+	KASSERT(nsegs <= sc->xbd_max_request_segments,
+	    ("Too many segments in a blkfront I/O"));
+
+	if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+		blkif_request_t	*ring_req;
+
+		/* Fill out a blkif_request_t structure. */
+		ring_req = (blkif_request_t *)
+		    RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
+		sc->xbd_ring.req_prod_pvt++;
+		ring_req->id = cm->cm_id;
+		ring_req->operation = cm->cm_operation;
+		ring_req->sector_number = cm->cm_sector_number;
+		ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
+		ring_req->nr_segments = nsegs;
+		cm->cm_nseg = nsegs;
+		xbd_mksegarray(segs, nsegs, &cm->cm_gref_head,
+		    xenbus_get_otherend_id(sc->xbd_dev),
+		    cm->cm_operation == BLKIF_OP_WRITE,
+		    cm->cm_sg_refs, ring_req->seg);
+	} else {
+		blkif_request_indirect_t *ring_req;
+
+		/* Fill out a blkif_request_indirect_t structure. */
+		ring_req = (blkif_request_indirect_t *)
+		    RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
+		sc->xbd_ring.req_prod_pvt++;
+		ring_req->id = cm->cm_id;
+		ring_req->operation = BLKIF_OP_INDIRECT;
+		ring_req->indirect_op = cm->cm_operation;
+		ring_req->sector_number = cm->cm_sector_number;
+		ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
+		ring_req->nr_segments = nsegs;
+		cm->cm_nseg = nsegs;
+		xbd_mksegarray(segs, nsegs, &cm->cm_gref_head,
+		    xenbus_get_otherend_id(sc->xbd_dev),
+		    cm->cm_operation == BLKIF_OP_WRITE,
+		    cm->cm_sg_refs, cm->cm_indirectionpages);
+		memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs,
+		    sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages);
 	}
 
-	*unit = minor >> 4;
-	*name = "xbd";
+	if (cm->cm_operation == BLKIF_OP_READ)
+		op = BUS_DMASYNC_PREREAD;
+	else if (cm->cm_operation == BLKIF_OP_WRITE)
+		op = BUS_DMASYNC_PREWRITE;
+	else
+		op = 0;
+	bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
+
+	gnttab_free_grant_references(cm->cm_gref_head);
+
+	xbd_enqueue_cm(cm, XBD_Q_BUSY);
+
+	/*
+	 * If bus dma had to asynchronously call us back to dispatch
+	 * this command, we are no longer executing in the context of 
+	 * xbd_startio().  Thus we cannot rely on xbd_startio()'s call to
+	 * xbd_flush_requests() to publish this command to the backend
+	 * along with any other commands that it could batch.
+	 */
+	if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0)
+		xbd_flush_requests(sc);
+
+	return;
 }
 
-int
-xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors,
-    int vdevice, uint16_t vdisk_info, unsigned long sector_size)
+static int
+xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm)
 {
-	int	unit, error = 0;
-	const char *name;
+	int error;
 
-	blkfront_vdevice_to_unit(vdevice, &unit, &name);
+	error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data,
+	    cm->cm_datalen, xbd_queue_cb, cm, 0);
+	if (error == EINPROGRESS) {
+		/*
+		 * Maintain queuing order by freezing the queue.  The next
+		 * command may not require as many resources as the command
+		 * we just attempted to map, so we can't rely on bus dma
+		 * blocking for it too.
+		 */
+		xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING);
+		return (0);
+	}
 
-	sc->xb_unit = unit;
+	return (error);
+}
 
-	if (strcmp(name, "xbd"))
-		device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit);
+static void
+xbd_restart_queue_callback(void *arg)
+{
+	struct xbd_softc *sc = arg;
 
-	sc->xb_disk = disk_alloc();
-	sc->xb_disk->d_unit = sc->xb_unit;
-	sc->xb_disk->d_open = blkif_open;
-	sc->xb_disk->d_close = blkif_close;
-	sc->xb_disk->d_ioctl = blkif_ioctl;
-	sc->xb_disk->d_strategy = xb_strategy;
-	sc->xb_disk->d_dump = xb_dump;
-	sc->xb_disk->d_name = name;
-	sc->xb_disk->d_drv1 = sc;
-	sc->xb_disk->d_sectorsize = sector_size;
+	mtx_lock(&sc->xbd_io_lock);
 
-	sc->xb_disk->d_mediasize = sectors * sector_size;
-	sc->xb_disk->d_maxsize = sc->max_request_size;
-	sc->xb_disk->d_flags = 0;
-	disk_create(sc->xb_disk, DISK_VERSION);
+	xbd_thaw(sc, XBDF_GNT_SHORTAGE);
 
-	return error;
+	xbd_startio(sc);
+
+	mtx_unlock(&sc->xbd_io_lock);
 }
 
-/************************ end VBD support *****************/
+static struct xbd_command *
+xbd_bio_command(struct xbd_softc *sc)
+{
+	struct xbd_command *cm;
+	struct bio *bp;
 
+	if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED))
+		return (NULL);
+
+	bp = xbd_dequeue_bio(sc);
+	if (bp == NULL)
+		return (NULL);
+
+	if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) {
+		xbd_freeze(sc, XBDF_CM_SHORTAGE);
+		xbd_requeue_bio(sc, bp);
+		return (NULL);
+	}
+
+	if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
+	    &cm->cm_gref_head) != 0) {
+		gnttab_request_free_callback(&sc->xbd_callback,
+		    xbd_restart_queue_callback, sc,
+		    sc->xbd_max_request_segments);
+		xbd_freeze(sc, XBDF_GNT_SHORTAGE);
+		xbd_requeue_bio(sc, bp);
+		xbd_enqueue_cm(cm, XBD_Q_FREE);
+		return (NULL);
+	}
+
+	cm->cm_bp = bp;
+	cm->cm_data = bp->bio_data;
+	cm->cm_datalen = bp->bio_bcount;
+	cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno;
+
+	switch (bp->bio_cmd) {
+	case BIO_READ:
+		cm->cm_operation = BLKIF_OP_READ;
+		break;
+	case BIO_WRITE:
+		cm->cm_operation = BLKIF_OP_WRITE;
+		if ((bp->bio_flags & BIO_ORDERED) != 0) {
+			if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
+				cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
+			} else {
+				/*
+				 * Single step this command.
+				 */
+				cm->cm_flags |= XBDCF_Q_FREEZE;
+				if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
+					/*
+					 * Wait for in-flight requests to
+					 * finish.
+					 */
+					xbd_freeze(sc, XBDF_WAIT_IDLE);
+					xbd_requeue_cm(cm, XBD_Q_READY);
+					return (NULL);
+				}
+			}
+		}
+		break;
+	case BIO_FLUSH:
+		if ((sc->xbd_flags & XBDF_FLUSH) != 0)
+			cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE;
+		else if ((sc->xbd_flags & XBDF_BARRIER) != 0)
+			cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
+		else
+			panic("flush request, but no flush support available");
+		break;
+	default:
+		panic("unknown bio command %d", bp->bio_cmd);
+	}
+
+	return (cm);
+}
+
 /*
- * Read/write routine for a buffer.  Finds the proper unit, place it on
- * the sortq and kick the controller.
+ * Dequeue buffers and place them in the shared communication ring.
+ * Return when no more requests can be accepted or all buffers have 
+ * been queued.
+ *
+ * Signal XEN once the ring has been filled out.
  */
 static void
-xb_strategy(struct bio *bp)
+xbd_startio(struct xbd_softc *sc)
 {
-	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+	struct xbd_command *cm;
+	int error, queued = 0;
 
-	/* bogus disk? */
-	if (sc == NULL) {
-		bp->bio_error = EINVAL;
-		bp->bio_flags |= BIO_ERROR;
-		bp->bio_resid = bp->bio_bcount;
-		biodone(bp);
+	mtx_assert(&sc->xbd_io_lock, MA_OWNED);
+
+	if (sc->xbd_state != XBD_STATE_CONNECTED)
 		return;
-	}
 
-	/*
-	 * Place it in the queue of disk activities for this disk
-	 */
-	mtx_lock(&sc->xb_io_lock);
+	while (!RING_FULL(&sc->xbd_ring)) {
 
-	xb_enqueue_bio(sc, bp);
-	xb_startio(sc);
+		if (sc->xbd_qfrozen_cnt != 0)
+			break;
 
-	mtx_unlock(&sc->xb_io_lock);
-	return;
+		cm = xbd_dequeue_cm(sc, XBD_Q_READY);
+
+		if (cm == NULL)
+		    cm = xbd_bio_command(sc);
+
+		if (cm == NULL)
+			break;
+
+		if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) {
+			/*
+			 * Single step command.  Future work is
+			 * held off until this command completes.
+			 */
+			xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE);
+		}
+
+		if ((error = xbd_queue_request(sc, cm)) != 0) {
+			printf("xbd_queue_request returned %d\n", error);
+			break;
+		}
+		queued++;
+	}
+
+	if (queued != 0) 
+		xbd_flush_requests(sc);
 }
 
 static void
-xb_bio_complete(struct xb_softc *sc, struct xb_command *cm)
+xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm)
 {
 	struct bio *bp;
 
-	bp = cm->bp;
+	bp = cm->cm_bp;
 
-	if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) {
+	if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) {
 		disk_err(bp, "disk error" , -1, 0);
-		printf(" status: %x\n", cm->status);
+		printf(" status: %x\n", cm->cm_status);
 		bp->bio_flags |= BIO_ERROR;
 	}
 
@@ -285,24 +469,107 @@
 	else
 		bp->bio_resid = 0;
 
-	xb_free_command(cm);
+	xbd_free_command(cm);
 	biodone(bp);
 }
 
-// Quiesce the disk writes for a dump file before allowing the next buffer.
 static void
-xb_quiesce(struct xb_softc *sc)
+xbd_int(void *xsc)
 {
-	int		mtd;
+	struct xbd_softc *sc = xsc;
+	struct xbd_command *cm;
+	blkif_response_t *bret;
+	RING_IDX i, rp;
+	int op;
 
+	mtx_lock(&sc->xbd_io_lock);
+
+	if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) {
+		mtx_unlock(&sc->xbd_io_lock);
+		return;
+	}
+
+ again:
+	rp = sc->xbd_ring.sring->rsp_prod;
+	rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+	for (i = sc->xbd_ring.rsp_cons; i != rp;) {
+		bret = RING_GET_RESPONSE(&sc->xbd_ring, i);
+		cm   = &sc->xbd_shadow[bret->id];
+
+		xbd_remove_cm(cm, XBD_Q_BUSY);
+		gnttab_end_foreign_access_references(cm->cm_nseg,
+		    cm->cm_sg_refs);
+		i++;
+
+		if (cm->cm_operation == BLKIF_OP_READ)
+			op = BUS_DMASYNC_POSTREAD;
+		else if (cm->cm_operation == BLKIF_OP_WRITE ||
+		    cm->cm_operation == BLKIF_OP_WRITE_BARRIER)
+			op = BUS_DMASYNC_POSTWRITE;
+		else
+			op = 0;
+		bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
+		bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map);
+
+		/*
+		 * Release any hold this command has on future command
+		 * dispatch. 
+		 */
+		xbd_cm_thaw(sc, cm);
+
+		/*
+		 * Directly call the i/o complete routine to save an
+		 * an indirection in the common case.
+		 */
+		cm->cm_status = bret->status;
+		if (cm->cm_bp)
+			xbd_bio_complete(sc, cm);
+		else if (cm->cm_complete != NULL)
+			cm->cm_complete(cm);
+		else
+			xbd_free_command(cm);
+	}
+
+	sc->xbd_ring.rsp_cons = i;
+
+	if (i != sc->xbd_ring.req_prod_pvt) {
+		int more_to_do;
+		RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do);
+		if (more_to_do)
+			goto again;
+	} else {
+		sc->xbd_ring.sring->rsp_event = i + 1;
+	}
+
+	if (xbd_queue_length(sc, XBD_Q_BUSY) == 0)
+		xbd_thaw(sc, XBDF_WAIT_IDLE);
+
+	xbd_startio(sc);
+
+	if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED))
+		wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]);
+
+	mtx_unlock(&sc->xbd_io_lock);
+}
+
+/*------------------------------- Dump Support -------------------------------*/
+/**
+ * Quiesce the disk writes for a dump file before allowing the next buffer.
+ */
+static void
+xbd_quiesce(struct xbd_softc *sc)
+{
+	int mtd;
+
 	// While there are outstanding requests
-	while (!TAILQ_EMPTY(&sc->cm_busy)) {
-		RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd);
+	while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
+		RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd);
 		if (mtd) {
 			/* Recieved request completions, update queue. */
-			blkif_int(sc);
+			xbd_int(sc);
 		}
-		if (!TAILQ_EMPTY(&sc->cm_busy)) {
+		if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
 			/*
 			 * Still pending requests, wait for the disk i/o
 			 * to complete.
@@ -314,60 +581,60 @@
 
 /* Kernel dump function for a paravirtualized disk device */
 static void
-xb_dump_complete(struct xb_command *cm)
+xbd_dump_complete(struct xbd_command *cm)
 {
 
-	xb_enqueue_complete(cm);
+	xbd_enqueue_cm(cm, XBD_Q_COMPLETE);
 }
 
 static int
-xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
-        size_t length)
+xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
+    size_t length)
 {
-	struct	disk   	*dp = arg;
-	struct xb_softc	*sc = (struct xb_softc *) dp->d_drv1;
-	struct xb_command *cm;
-	size_t		chunk;
-	int		sbp;
-	int		rc = 0;
+	struct disk *dp = arg;
+	struct xbd_softc *sc = dp->d_drv1;
+	struct xbd_command *cm;
+	size_t chunk;
+	int sbp;
+	int rc = 0;
 
 	if (length <= 0)
 		return (rc);
 
-	xb_quiesce(sc);	/* All quiet on the western front. */
+	xbd_quiesce(sc);	/* All quiet on the western front. */
 
 	/*
 	 * If this lock is held, then this module is failing, and a
 	 * successful kernel dump is highly unlikely anyway.
 	 */
-	mtx_lock(&sc->xb_io_lock);
+	mtx_lock(&sc->xbd_io_lock);
 
 	/* Split the 64KB block as needed */
 	for (sbp=0; length > 0; sbp++) {
-		cm = xb_dequeue_free(sc);
+		cm = xbd_dequeue_cm(sc, XBD_Q_FREE);
 		if (cm == NULL) {
-			mtx_unlock(&sc->xb_io_lock);
-			device_printf(sc->xb_dev, "dump: no more commands?\n");
+			mtx_unlock(&sc->xbd_io_lock);
+			device_printf(sc->xbd_dev, "dump: no more commands?\n");
 			return (EBUSY);
 		}
 
-		if (gnttab_alloc_grant_references(sc->max_request_segments,
-						  &cm->gref_head) != 0) {
-			xb_free_command(cm);
-			mtx_unlock(&sc->xb_io_lock);
-			device_printf(sc->xb_dev, "no more grant allocs?\n");
+		if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
+		    &cm->cm_gref_head) != 0) {
+			xbd_free_command(cm);
+			mtx_unlock(&sc->xbd_io_lock);
+			device_printf(sc->xbd_dev, "no more grant allocs?\n");
 			return (EBUSY);
 		}
 
-		chunk = length > sc->max_request_size
-		      ? sc->max_request_size : length;
-		cm->data = virtual;
-		cm->datalen = chunk;
-		cm->operation = BLKIF_OP_WRITE;
-		cm->sector_number = offset / dp->d_sectorsize;
-		cm->cm_complete = xb_dump_complete;
+		chunk = length > sc->xbd_max_request_size ?
+		    sc->xbd_max_request_size : length;
+		cm->cm_data = virtual;
+		cm->cm_datalen = chunk;
+		cm->cm_operation = BLKIF_OP_WRITE;
+		cm->cm_sector_number = offset / dp->d_sectorsize;
+		cm->cm_complete = xbd_dump_complete;
 
-		xb_enqueue_ready(cm);
+		xbd_enqueue_cm(cm, XBD_Q_READY);
 
 		length -= chunk;
 		offset += chunk;
@@ -375,175 +642,462 @@
 	}
 
 	/* Tell DOM0 to do the I/O */
-	xb_startio(sc);
-	mtx_unlock(&sc->xb_io_lock);
+	xbd_startio(sc);
+	mtx_unlock(&sc->xbd_io_lock);
 
 	/* Poll for the completion. */
-	xb_quiesce(sc);	/* All quite on the eastern front */
+	xbd_quiesce(sc);	/* All quite on the eastern front */
 
 	/* If there were any errors, bail out... */
-	while ((cm = xb_dequeue_complete(sc)) != NULL) {
-		if (cm->status != BLKIF_RSP_OKAY) {
-			device_printf(sc->xb_dev,
+	while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) {
+		if (cm->cm_status != BLKIF_RSP_OKAY) {
+			device_printf(sc->xbd_dev,
 			    "Dump I/O failed at sector %jd\n",
-			    cm->sector_number);
+			    cm->cm_sector_number);
 			rc = EIO;
 		}
-		xb_free_command(cm);
+		xbd_free_command(cm);
 	}
 
 	return (rc);
 }
 
+/*----------------------------- Disk Entrypoints -----------------------------*/
+static int
+xbd_open(struct disk *dp)
+{
+	struct xbd_softc *sc = dp->d_drv1;
 
+	if (sc == NULL) {
+		printf("xb%d: not found", sc->xbd_unit);
+		return (ENXIO);
+	}
+
+	sc->xbd_flags |= XBDF_OPEN;
+	sc->xbd_users++;
+	return (0);
+}
+
 static int
-blkfront_probe(device_t dev)
+xbd_close(struct disk *dp)
 {
+	struct xbd_softc *sc = dp->d_drv1;
 
-	if (!strcmp(xenbus_get_type(dev), "vbd")) {
-		device_set_desc(dev, "Virtual Block Device");
-		device_quiet(dev);
-		return (0);
+	if (sc == NULL)
+		return (ENXIO);
+	sc->xbd_flags &= ~XBDF_OPEN;
+	if (--(sc->xbd_users) == 0) {
+		/*
+		 * Check whether we have been instructed to close.  We will
+		 * have ignored this request initially, as the device was
+		 * still mounted.
+		 */
+		if (xenbus_get_otherend_state(sc->xbd_dev) ==
+		    XenbusStateClosing)
+			xbd_closing(sc->xbd_dev);
 	}
+	return (0);
+}
 
-	return (ENXIO);
+static int
+xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
+{
+	struct xbd_softc *sc = dp->d_drv1;
+
+	if (sc == NULL)
+		return (ENXIO);
+
+	return (ENOTTY);
 }
 
+/*
+ * Read/write routine for a buffer.  Finds the proper unit, place it on
+ * the sortq and kick the controller.
+ */
 static void
-xb_setup_sysctl(struct xb_softc *xb)
+xbd_strategy(struct bio *bp)
 {
+	struct xbd_softc *sc = bp->bio_disk->d_drv1;
+
+	/* bogus disk? */
+	if (sc == NULL) {
+		bp->bio_error = EINVAL;
+		bp->bio_flags |= BIO_ERROR;
+		bp->bio_resid = bp->bio_bcount;
+		biodone(bp);
+		return;
+	}
+
+	/*
+	 * Place it in the queue of disk activities for this disk
+	 */
+	mtx_lock(&sc->xbd_io_lock);
+
+	xbd_enqueue_bio(sc, bp);
+	xbd_startio(sc);
+
+	mtx_unlock(&sc->xbd_io_lock);
+	return;
+}
+
+/*------------------------------ Ring Management -----------------------------*/
+static int 
+xbd_alloc_ring(struct xbd_softc *sc)
+{
+	blkif_sring_t *sring;
+	uintptr_t sring_page_addr;
+	int error;
+	int i;
+
+	sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
+	    M_NOWAIT|M_ZERO);
+	if (sring == NULL) {
+		xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring");
+		return (ENOMEM);
+	}
+	SHARED_RING_INIT(sring);
+	FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE);
+
+	for (i = 0, sring_page_addr = (uintptr_t)sring;
+	     i < sc->xbd_ring_pages;
+	     i++, sring_page_addr += PAGE_SIZE) {
+
+		error = xenbus_grant_ring(sc->xbd_dev,
+		    (vtomach(sring_page_addr) >> PAGE_SHIFT),
+		    &sc->xbd_ring_ref[i]);
+		if (error) {
+			xenbus_dev_fatal(sc->xbd_dev, error,
+			    "granting ring_ref(%d)", i);
+			return (error);
+		}
+	}
+	if (sc->xbd_ring_pages == 1) {
+		error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
+		    "ring-ref", "%u", sc->xbd_ring_ref[0]);
+		if (error) {
+			xenbus_dev_fatal(sc->xbd_dev, error,
+			    "writing %s/ring-ref",
+			    xenbus_get_node(sc->xbd_dev));
+			return (error);
+		}
+	} else {
+		for (i = 0; i < sc->xbd_ring_pages; i++) {
+			char ring_ref_name[]= "ring_refXX";
+
+			snprintf(ring_ref_name, sizeof(ring_ref_name),
+			    "ring-ref%u", i);
+			error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
+			     ring_ref_name, "%u", sc->xbd_ring_ref[i]);
+			if (error) {
+				xenbus_dev_fatal(sc->xbd_dev, error,
+				    "writing %s/%s",
+				    xenbus_get_node(sc->xbd_dev),
+				    ring_ref_name);
+				return (error);
+			}
+		}
+	}
+
+	error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev,
+	    xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc,
+	    INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle);
+	if (error) {
+		xenbus_dev_fatal(sc->xbd_dev, error,
+		    "xen_intr_alloc_and_bind_local_port failed");
+		return (error);
+	}
+
+	return (0);
+}
+
+static void
+xbd_free_ring(struct xbd_softc *sc)
+{
+	int i;
+
+	if (sc->xbd_ring.sring == NULL)
+		return;
+
+	for (i = 0; i < sc->xbd_ring_pages; i++) {
+		if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) {
+			gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]);
+			sc->xbd_ring_ref[i] = GRANT_REF_INVALID;
+		}
+	}
+	free(sc->xbd_ring.sring, M_XENBLOCKFRONT);
+	sc->xbd_ring.sring = NULL;
+}
+
+/*-------------------------- Initialization/Teardown -------------------------*/
+static int
+xbd_feature_string(struct xbd_softc *sc, char *features, size_t len)
+{
+	struct sbuf sb;
+	int feature_cnt;
+
+	sbuf_new(&sb, features, len, SBUF_FIXEDLEN);
+
+	feature_cnt = 0;
+	if ((sc->xbd_flags & XBDF_FLUSH) != 0) {
+		sbuf_printf(&sb, "flush");
+		feature_cnt++;
+	}
+
+	if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
+		if (feature_cnt != 0)
+			sbuf_printf(&sb, ", ");
+		sbuf_printf(&sb, "write_barrier");
+		feature_cnt++;
+	}
+
+	if ((sc->xbd_flags & XBDF_DISCARD) != 0) {
+		if (feature_cnt != 0)
+			sbuf_printf(&sb, ", ");
+		sbuf_printf(&sb, "discard");
+		feature_cnt++;
+	}
+
+	if ((sc->xbd_flags & XBDF_PERSISTENT) != 0) {
+		if (feature_cnt != 0)
+			sbuf_printf(&sb, ", ");
+		sbuf_printf(&sb, "persistent_grants");
+		feature_cnt++;
+	}
+
+	(void) sbuf_finish(&sb);
+	return (sbuf_len(&sb));
+}
+
+static int
+xbd_sysctl_features(SYSCTL_HANDLER_ARGS)
+{
+	char features[80];
+	struct xbd_softc *sc = arg1;
+	int error;
+	int len;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+
+	len = xbd_feature_string(sc, features, sizeof(features));
+
+	/* len is -1 on error, which will make the SYSCTL_OUT a no-op. */
+	return (SYSCTL_OUT(req, features, len + 1/*NUL*/));
+}
+
+static void
+xbd_setup_sysctl(struct xbd_softc *xbd)
+{
 	struct sysctl_ctx_list *sysctl_ctx = NULL;
-	struct sysctl_oid      *sysctl_tree = NULL;
+	struct sysctl_oid *sysctl_tree = NULL;
+	struct sysctl_oid_list *children;
 	
-	sysctl_ctx = device_get_sysctl_ctx(xb->xb_dev);
+	sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev);
 	if (sysctl_ctx == NULL)
 		return;
 
-	sysctl_tree = device_get_sysctl_tree(xb->xb_dev);
+	sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev);
 	if (sysctl_tree == NULL)
 		return;
 
-	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
-		        "max_requests", CTLFLAG_RD, &xb->max_requests, -1,
-		        "maximum outstanding requests (negotiated)");
+	children = SYSCTL_CHILDREN(sysctl_tree);
+	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+	    "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
+	    "maximum outstanding requests (negotiated)");
 
-	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
-		        "max_request_segments", CTLFLAG_RD,
-		        &xb->max_request_segments, 0,
-		        "maximum number of pages per requests (negotiated)");
+	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+	    "max_request_segments", CTLFLAG_RD,
+	    &xbd->xbd_max_request_segments, 0,
+	    "maximum number of pages per requests (negotiated)");
 
-	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
-		        "max_request_size", CTLFLAG_RD,
-		        &xb->max_request_size, 0,
-		        "maximum size in bytes of a request (negotiated)");
+	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+	    "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0,
+	    "maximum size in bytes of a request (negotiated)");
 
-	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
-		        "ring_pages", CTLFLAG_RD,
-		        &xb->ring_pages, 0,
-		        "communication channel pages (negotiated)");
+	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+	    "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0,
+	    "communication channel pages (negotiated)");
+
+	SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO,
+	    "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0,
+	    xbd_sysctl_features, "A", "protocol features (negotiated)");
 }
 
 /*
- * Setup supplies the backend dir, virtual device.  We place an event
- * channel and shared frame entries.  We watch backend to wait if it's
- * ok.
+ * Translate Linux major/minor to an appropriate name and unit
+ * number. For HVM guests, this allows us to use the same drive names
+ * with blkfront as the emulated drives, easing transition slightly.
  */
-static int
-blkfront_attach(device_t dev)
+static void
+xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
 {
-	struct xb_softc *sc;
-	const char *name;
-	uint32_t vdevice;
-	int error;
+	static struct vdev_info {
+		int major;
+		int shift;
+		int base;
+		const char *name;
+	} info[] = {
+		{3,	6,	0,	"ada"},	/* ide0 */
+		{22,	6,	2,	"ada"},	/* ide1 */
+		{33,	6,	4,	"ada"},	/* ide2 */
+		{34,	6,	6,	"ada"},	/* ide3 */
+		{56,	6,	8,	"ada"},	/* ide4 */
+		{57,	6,	10,	"ada"},	/* ide5 */
+		{88,	6,	12,	"ada"},	/* ide6 */
+		{89,	6,	14,	"ada"},	/* ide7 */
+		{90,	6,	16,	"ada"},	/* ide8 */
+		{91,	6,	18,	"ada"},	/* ide9 */
+
+		{8,	4,	0,	"da"},	/* scsi disk0 */
+		{65,	4,	16,	"da"},	/* scsi disk1 */
+		{66,	4,	32,	"da"},	/* scsi disk2 */
+		{67,	4,	48,	"da"},	/* scsi disk3 */
+		{68,	4,	64,	"da"},	/* scsi disk4 */
+		{69,	4,	80,	"da"},	/* scsi disk5 */
+		{70,	4,	96,	"da"},	/* scsi disk6 */
+		{71,	4,	112,	"da"},	/* scsi disk7 */
+		{128,	4,	128,	"da"},	/* scsi disk8 */
+		{129,	4,	144,	"da"},	/* scsi disk9 */
+		{130,	4,	160,	"da"},	/* scsi disk10 */
+		{131,	4,	176,	"da"},	/* scsi disk11 */
+		{132,	4,	192,	"da"},	/* scsi disk12 */
+		{133,	4,	208,	"da"},	/* scsi disk13 */
+		{134,	4,	224,	"da"},	/* scsi disk14 */
+		{135,	4,	240,	"da"},	/* scsi disk15 */
+
+		{202,	4,	0,	"xbd"},	/* xbd */
+
+		{0,	0,	0,	NULL},
+	};
+	int major = vdevice >> 8;
+	int minor = vdevice & 0xff;
 	int i;
-	int unit;
 
-	/* FIXME: Use dynamic device id if this is not set. */
-	error = xs_scanf(XST_NIL, xenbus_get_node(dev),
-	    "virtual-device", NULL, "%" PRIu32, &vdevice);
-	if (error) {
-		xenbus_dev_fatal(dev, error, "reading virtual-device");
-		device_printf(dev, "Couldn't determine virtual device.\n");
-		return (error);
+	if (vdevice & (1 << 28)) {
+		*unit = (vdevice & ((1 << 28) - 1)) >> 8;
+		*name = "xbd";
+		return;
 	}
 
-	blkfront_vdevice_to_unit(vdevice, &unit, &name);
-	if (!strcmp(name, "xbd"))
-		device_set_unit(dev, unit);
+	for (i = 0; info[i].major; i++) {
+		if (info[i].major == major) {
+			*unit = info[i].base + (minor >> info[i].shift);
+			*name = info[i].name;
+			return;
+		}
+	}
 
-	sc = device_get_softc(dev);
-	mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
-	xb_initq_free(sc);
-	xb_initq_busy(sc);
-	xb_initq_ready(sc);
-	xb_initq_complete(sc);
-	xb_initq_bio(sc);
-	for (i = 0; i < XBF_MAX_RING_PAGES; i++)
-		sc->ring_ref[i] = GRANT_INVALID_REF;
+	*unit = minor >> 4;
+	*name = "xbd";
+}
 
-	sc->xb_dev = dev;
-	sc->vdevice = vdevice;
-	sc->connected = BLKIF_STATE_DISCONNECTED;
+int
+xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
+    int vdevice, uint16_t vdisk_info, unsigned long sector_size,
+    unsigned long phys_sector_size)
+{
+	char features[80];
+	int unit, error = 0;
+	const char *name;
 
-	xb_setup_sysctl(sc);
+	xbd_vdevice_to_unit(vdevice, &unit, &name);
 
-	/* Wait for backend device to publish its protocol capabilities. */
-	xenbus_set_state(dev, XenbusStateInitialising);
+	sc->xbd_unit = unit;
 
-	return (0);
-}
+	if (strcmp(name, "xbd") != 0)
+		device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit);
 
-static int
-blkfront_suspend(device_t dev)
-{
-	struct xb_softc *sc = device_get_softc(dev);
-	int retval;
-	int saved_state;
+	if (xbd_feature_string(sc, features, sizeof(features)) > 0) {
+		device_printf(sc->xbd_dev, "features: %s\n",
+		    features);
+	}
 
-	/* Prevent new requests being issued until we fix things up. */
-	mtx_lock(&sc->xb_io_lock);
-	saved_state = sc->connected;
-	sc->connected = BLKIF_STATE_SUSPENDED;
+	sc->xbd_disk = disk_alloc();
+	sc->xbd_disk->d_unit = sc->xbd_unit;
+	sc->xbd_disk->d_open = xbd_open;
+	sc->xbd_disk->d_close = xbd_close;
+	sc->xbd_disk->d_ioctl = xbd_ioctl;
+	sc->xbd_disk->d_strategy = xbd_strategy;
+	sc->xbd_disk->d_dump = xbd_dump;
+	sc->xbd_disk->d_name = name;
+	sc->xbd_disk->d_drv1 = sc;
+	sc->xbd_disk->d_sectorsize = sector_size;
+	sc->xbd_disk->d_stripesize = phys_sector_size;
+	sc->xbd_disk->d_stripeoffset = 0;
 
-	/* Wait for outstanding I/O to drain. */
-	retval = 0;
-	while (TAILQ_EMPTY(&sc->cm_busy) == 0) {
-		if (msleep(&sc->cm_busy, &sc->xb_io_lock,
-			   PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
-			retval = EBUSY;
-			break;
-		}
+	sc->xbd_disk->d_mediasize = sectors * sector_size;
+	sc->xbd_disk->d_maxsize = sc->xbd_max_request_size;
+	sc->xbd_disk->d_flags = 0;
+	if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) {
+		sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
+		device_printf(sc->xbd_dev,
+		    "synchronize cache commands enabled.\n");
 	}
-	mtx_unlock(&sc->xb_io_lock);
+	disk_create(sc->xbd_disk, DISK_VERSION);
 
-	if (retval != 0)
-		sc->connected = saved_state;
-
-	return (retval);
+	return error;
 }
 
-static int
-blkfront_resume(device_t dev)
+static void 
+xbd_free(struct xbd_softc *sc)
 {
-	struct xb_softc *sc = device_get_softc(dev);
+	int i;
+	
+	/* Prevent new requests being issued until we fix things up. */
+	mtx_lock(&sc->xbd_io_lock);
+	sc->xbd_state = XBD_STATE_DISCONNECTED; 
+	mtx_unlock(&sc->xbd_io_lock);
 
-	DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
+	/* Free resources associated with old device channel. */
+	xbd_free_ring(sc);
+	if (sc->xbd_shadow) {
 
-	blkif_free(sc);
-	blkfront_initialize(sc);
-	return (0);
+		for (i = 0; i < sc->xbd_max_requests; i++) {
+			struct xbd_command *cm;
+
+			cm = &sc->xbd_shadow[i];
+			if (cm->cm_sg_refs != NULL) {
+				free(cm->cm_sg_refs, M_XENBLOCKFRONT);
+				cm->cm_sg_refs = NULL;
+			}
+
+			if (cm->cm_indirectionpages != NULL) {
+				gnttab_end_foreign_access_references(
+				    sc->xbd_max_request_indirectpages,
+				    &cm->cm_indirectionrefs[0]);
+				contigfree(cm->cm_indirectionpages, PAGE_SIZE *
+				    sc->xbd_max_request_indirectpages,
+				    M_XENBLOCKFRONT);
+				cm->cm_indirectionpages = NULL;
+			}
+
+			bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map);
+		}
+		free(sc->xbd_shadow, M_XENBLOCKFRONT);
+		sc->xbd_shadow = NULL;
+
+		bus_dma_tag_destroy(sc->xbd_io_dmat);
+		
+		xbd_initq_cm(sc, XBD_Q_FREE);
+		xbd_initq_cm(sc, XBD_Q_READY);
+		xbd_initq_cm(sc, XBD_Q_COMPLETE);
+	}
+		
+	xen_intr_unbind(&sc->xen_intr_handle);
+
 }
 
+/*--------------------------- State Change Handlers --------------------------*/
 static void
-blkfront_initialize(struct xb_softc *sc)
+xbd_initialize(struct xbd_softc *sc)
 {
 	const char *otherend_path;
 	const char *node_path;
 	uint32_t max_ring_page_order;
 	int error;
-	int i;
 
-	if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) {
+	if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) {
 		/* Initialization has already been performed. */
 		return;
 	}
@@ -553,10 +1107,7 @@
 	 * setting fails.
 	 */
 	max_ring_page_order = 0;
-	sc->ring_pages = 1;
-	sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
-	sc->max_request_size = XBF_SEGS_TO_SIZE(sc->max_request_segments);
-	sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
+	sc->xbd_ring_pages = 1;
 
 	/*
 	 * Protocol negotiation.
@@ -569,334 +1120,120 @@
 	 * \note xs_scanf() does not update variables for unmatched
 	 *	 fields.
 	 */
-	otherend_path = xenbus_get_otherend_path(sc->xb_dev);
-	node_path = xenbus_get_node(sc->xb_dev);
+	otherend_path = xenbus_get_otherend_path(sc->xbd_dev);
+	node_path = xenbus_get_node(sc->xbd_dev);
 
 	/* Support both backend schemes for relaying ring page limits. */
 	(void)xs_scanf(XST_NIL, otherend_path,
-		       "max-ring-page-order", NULL, "%" PRIu32,
-		       &max_ring_page_order);
-	sc->ring_pages = 1 << max_ring_page_order;
+	    "max-ring-page-order", NULL, "%" PRIu32,
+	    &max_ring_page_order);
+	sc->xbd_ring_pages = 1 << max_ring_page_order;
 	(void)xs_scanf(XST_NIL, otherend_path,
-		       "max-ring-pages", NULL, "%" PRIu32,
-		       &sc->ring_pages);
-	if (sc->ring_pages < 1)
-		sc->ring_pages = 1;
+	    "max-ring-pages", NULL, "%" PRIu32,
+	    &sc->xbd_ring_pages);
+	if (sc->xbd_ring_pages < 1)
+		sc->xbd_ring_pages = 1;
 
-	sc->max_requests = BLKIF_MAX_RING_REQUESTS(sc->ring_pages * PAGE_SIZE);
-	(void)xs_scanf(XST_NIL, otherend_path,
-		       "max-requests", NULL, "%" PRIu32,
-		       &sc->max_requests);
-
-	(void)xs_scanf(XST_NIL, otherend_path,
-		       "max-request-segments", NULL, "%" PRIu32,
-		       &sc->max_request_segments);
-
-	(void)xs_scanf(XST_NIL, otherend_path,
-		       "max-request-size", NULL, "%" PRIu32,
-		       &sc->max_request_size);
-
-	if (sc->ring_pages > XBF_MAX_RING_PAGES) {
-		device_printf(sc->xb_dev, "Back-end specified ring-pages of "
-			      "%u limited to front-end limit of %zu.\n",
-			      sc->ring_pages, XBF_MAX_RING_PAGES);
-		sc->ring_pages = XBF_MAX_RING_PAGES;
+	if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) {
+		device_printf(sc->xbd_dev,
+		    "Back-end specified ring-pages of %u "
+		    "limited to front-end limit of %u.\n",
+		    sc->xbd_ring_pages, XBD_MAX_RING_PAGES);
+		sc->xbd_ring_pages = XBD_MAX_RING_PAGES;
 	}
 
-	if (powerof2(sc->ring_pages) == 0) {
+	if (powerof2(sc->xbd_ring_pages) == 0) {
 		uint32_t new_page_limit;
 
-		new_page_limit = 0x01 << (fls(sc->ring_pages) - 1);
-		device_printf(sc->xb_dev, "Back-end specified ring-pages of "
-			      "%u is not a power of 2. Limited to %u.\n",
-			      sc->ring_pages, new_page_limit);
-		sc->ring_pages = new_page_limit;
+		new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1);
+		device_printf(sc->xbd_dev,
+		    "Back-end specified ring-pages of %u "
+		    "is not a power of 2. Limited to %u.\n",
+		    sc->xbd_ring_pages, new_page_limit);
+		sc->xbd_ring_pages = new_page_limit;
 	}
 
-	if (sc->max_requests > XBF_MAX_REQUESTS) {
-		device_printf(sc->xb_dev, "Back-end specified max_requests of "
-			      "%u limited to front-end limit of %u.\n",
-			      sc->max_requests, XBF_MAX_REQUESTS);
-		sc->max_requests = XBF_MAX_REQUESTS;
+	sc->xbd_max_requests =
+	    BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE);
+	if (sc->xbd_max_requests > XBD_MAX_REQUESTS) {
+		device_printf(sc->xbd_dev,
+		    "Back-end specified max_requests of %u "
+		    "limited to front-end limit of %zu.\n",
+		    sc->xbd_max_requests, XBD_MAX_REQUESTS);
+		sc->xbd_max_requests = XBD_MAX_REQUESTS;
 	}
 
-	if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) {
-		device_printf(sc->xb_dev, "Back-end specified "
-			      "max_request_segments of %u limited to "
-			      "front-end limit of %u.\n",
-			      sc->max_request_segments,
-			      XBF_MAX_SEGMENTS_PER_REQUEST);
-		sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST;
-	}
-
-	if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) {
-		device_printf(sc->xb_dev, "Back-end specified "
-			      "max_request_size of %u limited to front-end "
-			      "limit of %u.\n", sc->max_request_size,
-			      XBF_MAX_REQUEST_SIZE);
-		sc->max_request_size = XBF_MAX_REQUEST_SIZE;
-	}
- 
- 	if (sc->max_request_size > XBF_SEGS_TO_SIZE(sc->max_request_segments)) {
- 		device_printf(sc->xb_dev, "Back-end specified "
- 			      "max_request_size of %u limited to front-end "
- 			      "limit of %u.  (Too few segments.)\n",
- 			      sc->max_request_size,
- 			      XBF_SEGS_TO_SIZE(sc->max_request_segments));
- 		sc->max_request_size =
- 		    XBF_SEGS_TO_SIZE(sc->max_request_segments);
- 	}
-
-	sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
-
-	/* Allocate datastructures based on negotiated values. */
-	error = bus_dma_tag_create(bus_get_dma_tag(sc->xb_dev),	/* parent */
-				   512, PAGE_SIZE,	/* algnmnt, boundary */
-				   BUS_SPACE_MAXADDR,	/* lowaddr */
-				   BUS_SPACE_MAXADDR,	/* highaddr */
-				   NULL, NULL,		/* filter, filterarg */
-				   sc->max_request_size,
-				   sc->max_request_segments,
-				   PAGE_SIZE,		/* maxsegsize */
-				   BUS_DMA_ALLOCNOW,	/* flags */
-				   busdma_lock_mutex,	/* lockfunc */
-				   &sc->xb_io_lock,	/* lockarg */
-				   &sc->xb_io_dmat);
-	if (error != 0) {
-		xenbus_dev_fatal(sc->xb_dev, error,
-				 "Cannot allocate parent DMA tag\n");
+	if (xbd_alloc_ring(sc) != 0)
 		return;
-	}
 
-	/* Per-transaction data allocation. */
-	sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests,
-			    M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
-	if (sc->shadow == NULL) {
-		bus_dma_tag_destroy(sc->xb_io_dmat);
-		xenbus_dev_fatal(sc->xb_dev, error,
-				 "Cannot allocate request structures\n");
-		return;
-	}
-
-	for (i = 0; i < sc->max_requests; i++) {
-		struct xb_command *cm;
-
-		cm = &sc->shadow[i];
-		cm->sg_refs = malloc(sizeof(grant_ref_t)
-				   * sc->max_request_segments,
-				     M_XENBLOCKFRONT, M_NOWAIT);
-		if (cm->sg_refs == NULL)
-			break;
-		cm->id = i;
-		cm->cm_sc = sc;
-		if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0)
-			break;
-		xb_free_command(cm);
-	}
-
-	if (setup_blkring(sc) != 0)
-		return;
-
 	/* Support both backend schemes for relaying ring page limits. */
-	if (sc->ring_pages > 1) {
+	if (sc->xbd_ring_pages > 1) {
 		error = xs_printf(XST_NIL, node_path,
-				 "num-ring-pages","%u", sc->ring_pages);
+		    "num-ring-pages","%u",
+		    sc->xbd_ring_pages);
 		if (error) {
-			xenbus_dev_fatal(sc->xb_dev, error,
-					 "writing %s/num-ring-pages",
-					 node_path);
+			xenbus_dev_fatal(sc->xbd_dev, error,
+			    "writing %s/num-ring-pages",
+			    node_path);
 			return;
 		}
 
 		error = xs_printf(XST_NIL, node_path,
-				 "ring-page-order", "%u",
-				 fls(sc->ring_pages) - 1);
+		    "ring-page-order", "%u",
+		    fls(sc->xbd_ring_pages) - 1);
 		if (error) {
-			xenbus_dev_fatal(sc->xb_dev, error,
-					 "writing %s/ring-page-order",
-					 node_path);
+			xenbus_dev_fatal(sc->xbd_dev, error,
+			    "writing %s/ring-page-order",
+			    node_path);
 			return;
 		}
 	}
 
-	error = xs_printf(XST_NIL, node_path,
-			 "max-requests","%u", sc->max_requests);
-	if (error) {
-		xenbus_dev_fatal(sc->xb_dev, error,
-				 "writing %s/max-requests",
-				 node_path);
-		return;
-	}
-
-	error = xs_printf(XST_NIL, node_path,
-			 "max-request-segments","%u", sc->max_request_segments);
-	if (error) {
-		xenbus_dev_fatal(sc->xb_dev, error,
-				 "writing %s/max-request-segments",
-				 node_path);
-		return;
-	}
-
-	error = xs_printf(XST_NIL, node_path,
-			 "max-request-size","%u", sc->max_request_size);
-	if (error) {
-		xenbus_dev_fatal(sc->xb_dev, error,
-				 "writing %s/max-request-size",
-				 node_path);
-		return;
-	}
-
 	error = xs_printf(XST_NIL, node_path, "event-channel",
-			  "%u", irq_to_evtchn_port(sc->irq));
+	    "%u", xen_intr_port(sc->xen_intr_handle));
 	if (error) {
-		xenbus_dev_fatal(sc->xb_dev, error,
-				 "writing %s/event-channel",
-				 node_path);
+		xenbus_dev_fatal(sc->xbd_dev, error,
+		    "writing %s/event-channel",
+		    node_path);
 		return;
 	}
 
-	error = xs_printf(XST_NIL, node_path,
-			  "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
+	error = xs_printf(XST_NIL, node_path, "protocol",
+	    "%s", XEN_IO_PROTO_ABI_NATIVE);
 	if (error) {
-		xenbus_dev_fatal(sc->xb_dev, error,
-				 "writing %s/protocol",
-				 node_path);
+		xenbus_dev_fatal(sc->xbd_dev, error,
+		    "writing %s/protocol",
+		    node_path);
 		return;
 	}
 
-	xenbus_set_state(sc->xb_dev, XenbusStateInitialised);
+	xenbus_set_state(sc->xbd_dev, XenbusStateInitialised);
 }
 
-static int 
-setup_blkring(struct xb_softc *sc)
-{
-	blkif_sring_t *sring;
-	uintptr_t sring_page_addr;
-	int error;
-	int i;
-
-	sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
-		       M_NOWAIT|M_ZERO);
-	if (sring == NULL) {
-		xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring");
-		return (ENOMEM);
-	}
-	SHARED_RING_INIT(sring);
-	FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE);
-
-	for (i = 0, sring_page_addr = (uintptr_t)sring;
-	     i < sc->ring_pages;
-	     i++, sring_page_addr += PAGE_SIZE) {
-
-		error = xenbus_grant_ring(sc->xb_dev,
-		    (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]);
-		if (error) {
-			xenbus_dev_fatal(sc->xb_dev, error,
-					 "granting ring_ref(%d)", i);
-			return (error);
-		}
-	}
-	if (sc->ring_pages == 1) {
-		error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
-				  "ring-ref", "%u", sc->ring_ref[0]);
-		if (error) {
-			xenbus_dev_fatal(sc->xb_dev, error,
-					 "writing %s/ring-ref",
-					 xenbus_get_node(sc->xb_dev));
-			return (error);
-		}
-	} else {
-		for (i = 0; i < sc->ring_pages; i++) {
-			char ring_ref_name[]= "ring_refXX";
-
-			snprintf(ring_ref_name, sizeof(ring_ref_name),
-				 "ring-ref%u", i);
-			error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
-					 ring_ref_name, "%u", sc->ring_ref[i]);
-			if (error) {
-				xenbus_dev_fatal(sc->xb_dev, error,
-						 "writing %s/%s",
-						 xenbus_get_node(sc->xb_dev),
-						 ring_ref_name);
-				return (error);
-			}
-		}
-	}
-
-	error = bind_listening_port_to_irqhandler(
-	    xenbus_get_otherend_id(sc->xb_dev),
-	    "xbd", (driver_intr_t *)blkif_int, sc,
-	    INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq);
-	if (error) {
-		xenbus_dev_fatal(sc->xb_dev, error,
-		    "bind_evtchn_to_irqhandler failed");
-		return (error);
-	}
-
-	return (0);
-}
-
-/**
- * Callback received when the backend's state changes.
+/* 
+ * Invoked when the backend is finally 'ready' (and has published
+ * the details about the physical device - #sectors, size, etc). 
  */
-static void
-blkfront_backend_changed(device_t dev, XenbusState backend_state)
-{
-	struct xb_softc *sc = device_get_softc(dev);
-
-	DPRINTK("backend_state=%d\n", backend_state);
-
-	switch (backend_state) {
-	case XenbusStateUnknown:
-	case XenbusStateInitialising:
-	case XenbusStateReconfigured:
-	case XenbusStateReconfiguring:
-	case XenbusStateClosed:
-		break;
-
-	case XenbusStateInitWait:
-	case XenbusStateInitialised:
-		blkfront_initialize(sc);
-		break;
-
-	case XenbusStateConnected:
-		blkfront_initialize(sc);
-		blkfront_connect(sc);
-		break;
-
-	case XenbusStateClosing:
-		if (sc->users > 0)
-			xenbus_dev_error(dev, -EBUSY,
-					 "Device in use; refusing to close");
-		else
-			blkfront_closing(dev);
-		break;	
-	}
-}
-
-/* 
-** Invoked when the backend is finally 'ready' (and has published
-** the details about the physical device - #sectors, size, etc). 
-*/
 static void 
-blkfront_connect(struct xb_softc *sc)
+xbd_connect(struct xbd_softc *sc)
 {
-	device_t dev = sc->xb_dev;
-	unsigned long sectors, sector_size;
+	device_t dev = sc->xbd_dev;
+	unsigned long sectors, sector_size, phys_sector_size;
 	unsigned int binfo;
-	int err, feature_barrier;
+	int err, feature_barrier, feature_flush;
+	int i, j;
 
-	if( (sc->connected == BLKIF_STATE_CONNECTED) || 
-	    (sc->connected == BLKIF_STATE_SUSPENDED) )
+	if (sc->xbd_state == XBD_STATE_CONNECTED || 
+	    sc->xbd_state == XBD_STATE_SUSPENDED)
 		return;
 
 	DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
 
 	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
-			"sectors", "%lu", &sectors,
-			"info", "%u", &binfo,
-			"sector-size", "%lu", &sector_size,
-			NULL);
+	    "sectors", "%lu", &sectors,
+	    "info", "%u", &binfo,
+	    "sector-size", "%lu", &sector_size,
+	    NULL);
 	if (err) {
 		xenbus_dev_fatal(dev, err,
 		    "reading backend fields at %s",
@@ -903,13 +1240,114 @@
 		    xenbus_get_otherend_path(dev));
 		return;
 	}
+	if ((sectors == 0) || (sector_size == 0)) {
+		xenbus_dev_fatal(dev, 0,
+		    "invalid parameters from %s:"
+		    " sectors = %lu, sector_size = %lu",
+		    xenbus_get_otherend_path(dev),
+		    sectors, sector_size);
+		return;
+	}
 	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
-			"feature-barrier", "%lu", &feature_barrier,
-			NULL);
-	if (!err || feature_barrier)
-		sc->xb_flags |= XB_BARRIER;
+	     "physical-sector-size", "%lu", &phys_sector_size,
+	     NULL);
+	if (err || phys_sector_size <= sector_size)
+		phys_sector_size = 0;
+	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
+	     "feature-barrier", "%d", &feature_barrier,
+	     NULL);
+	if (err == 0 && feature_barrier != 0)
+		sc->xbd_flags |= XBDF_BARRIER;
 
-	if (sc->xb_disk == NULL) {
+	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
+	     "feature-flush-cache", "%d", &feature_flush,
+	     NULL);
+	if (err == 0 && feature_flush != 0)
+		sc->xbd_flags |= XBDF_FLUSH;
+
+	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
+	    "feature-max-indirect-segments", "%" PRIu32,
+	    &sc->xbd_max_request_segments, NULL);
+	if ((err != 0) || (xbd_enable_indirect == 0))
+		sc->xbd_max_request_segments = 0;
+	if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS)
+		sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS;
+	if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(MAXPHYS))
+		sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(MAXPHYS);
+	sc->xbd_max_request_indirectpages =
+	    XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments);
+	if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
+		sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	sc->xbd_max_request_size =
+	    XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments);
+
+	/* Allocate datastructures based on negotiated values. */
+	err = bus_dma_tag_create(
+	    bus_get_dma_tag(sc->xbd_dev),	/* parent */
+	    512, PAGE_SIZE,			/* algnmnt, boundary */
+	    BUS_SPACE_MAXADDR,			/* lowaddr */
+	    BUS_SPACE_MAXADDR,			/* highaddr */
+	    NULL, NULL,				/* filter, filterarg */
+	    sc->xbd_max_request_size,
+	    sc->xbd_max_request_segments,
+	    PAGE_SIZE,				/* maxsegsize */
+	    BUS_DMA_ALLOCNOW,			/* flags */
+	    busdma_lock_mutex,			/* lockfunc */
+	    &sc->xbd_io_lock,			/* lockarg */
+	    &sc->xbd_io_dmat);
+	if (err != 0) {
+		xenbus_dev_fatal(sc->xbd_dev, err,
+		    "Cannot allocate parent DMA tag\n");
+		return;
+	}
+
+	/* Per-transaction data allocation. */
+	sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests,
+	    M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
+	if (sc->xbd_shadow == NULL) {
+		bus_dma_tag_destroy(sc->xbd_io_dmat);
+		xenbus_dev_fatal(sc->xbd_dev, ENOMEM,
+		    "Cannot allocate request structures\n");
+		return;
+	}
+
+	for (i = 0; i < sc->xbd_max_requests; i++) {
+		struct xbd_command *cm;
+		void * indirectpages;
+
+		cm = &sc->xbd_shadow[i];
+		cm->cm_sg_refs = malloc(
+		    sizeof(grant_ref_t) * sc->xbd_max_request_segments,
+		    M_XENBLOCKFRONT, M_NOWAIT);
+		if (cm->cm_sg_refs == NULL)
+			break;
+		cm->cm_id = i;
+		cm->cm_flags = XBDCF_INITIALIZER;
+		cm->cm_sc = sc;
+		if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0)
+			break;
+		if (sc->xbd_max_request_indirectpages > 0) {
+			indirectpages = contigmalloc(
+			    PAGE_SIZE * sc->xbd_max_request_indirectpages,
+			    M_XENBLOCKFRONT, M_ZERO, 0, ~0, PAGE_SIZE, 0);
+		} else {
+			indirectpages = NULL;
+		}
+		for (j = 0; j < sc->xbd_max_request_indirectpages; j++) {
+			if (gnttab_grant_foreign_access(
+			    xenbus_get_otherend_id(sc->xbd_dev),
+			    (vtomach(indirectpages) >> PAGE_SHIFT) + j,
+			    1 /* grant read-only access */,
+			    &cm->cm_indirectionrefs[j]))
+				break;
+		}
+		if (j < sc->xbd_max_request_indirectpages)
+			break;
+		cm->cm_indirectionpages = indirectpages;
+		xbd_free_command(cm);
+	}
+
+	if (sc->xbd_disk == NULL) {
 		device_printf(dev, "%juMB <%s> at %s",
 		    (uintmax_t) sectors / (1048576 / sector_size),
 		    device_get_desc(dev),
@@ -916,17 +1354,18 @@
 		    xenbus_get_node(dev));
 		bus_print_child_footer(device_get_parent(dev), dev);
 
-		xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size);
+		xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo,
+		    sector_size, phys_sector_size);
 	}
 
 	(void)xenbus_set_state(dev, XenbusStateConnected); 
 
 	/* Kick pending requests. */
-	mtx_lock(&sc->xb_io_lock);
-	sc->connected = BLKIF_STATE_CONNECTED;
-	xb_startio(sc);
-	sc->xb_flags |= XB_READY;
-	mtx_unlock(&sc->xb_io_lock);
+	mtx_lock(&sc->xbd_io_lock);
+	sc->xbd_state = XBD_STATE_CONNECTED;
+	xbd_startio(sc);
+	sc->xbd_flags |= XBDF_READY;
+	mtx_unlock(&sc->xbd_io_lock);
 }
 
 /**
@@ -936,493 +1375,236 @@
  * acknowledgement.
  */
 static void
-blkfront_closing(device_t dev)
+xbd_closing(device_t dev)
 {
-	struct xb_softc *sc = device_get_softc(dev);
+	struct xbd_softc *sc = device_get_softc(dev);
 
 	xenbus_set_state(dev, XenbusStateClosing);
 
-	DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
+	DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev));
 
-	if (sc->xb_disk != NULL) {
-		disk_destroy(sc->xb_disk);
-		sc->xb_disk = NULL;
+	if (sc->xbd_disk != NULL) {
+		disk_destroy(sc->xbd_disk);
+		sc->xbd_disk = NULL;
 	}
 
 	xenbus_set_state(dev, XenbusStateClosed); 
 }
 
-
+/*---------------------------- NewBus Entrypoints ----------------------------*/
 static int
-blkfront_detach(device_t dev)
+xbd_probe(device_t dev)
 {
-	struct xb_softc *sc = device_get_softc(dev);
+	if (strcmp(xenbus_get_type(dev), "vbd") != 0)
+		return (ENXIO);
 
-	DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
+#ifdef XENHVM
+	if (xen_disable_pv_disks != 0)
+		return (ENXIO);
+#endif
 
-	blkif_free(sc);
-	mtx_destroy(&sc->xb_io_lock);
+	if (xen_hvm_domain()) {
+		int error;
+		char *type;
 
-	return 0;
-}
+		/*
+		 * When running in an HVM domain, IDE disk emulation is
+		 * disabled early in boot so that native drivers will
+		 * not see emulated hardware.  However, CDROM device
+		 * emulation cannot be disabled.
+		 *
+		 * Through use of FreeBSD's vm_guest and xen_hvm_domain()
+		 * APIs, we could modify the native CDROM driver to fail its
+		 * probe when running under Xen.  Unfortunatlely, the PV
+		 * CDROM support in XenServer (up through at least version
+		 * 6.2) isn't functional, so we instead rely on the emulated
+		 * CDROM instance, and fail to attach the PV one here in
+		 * the blkfront driver.
+		 */
+		error = xs_read(XST_NIL, xenbus_get_node(dev),
+		    "device-type", NULL, (void **) &type);
+		if (error)
+			return (ENXIO);
 
+		if (strncmp(type, "cdrom", 5) == 0) {
+			free(type, M_XENSTORE);
+			return (ENXIO);
+		}
+		free(type, M_XENSTORE);
+	}
 
-static inline void 
-flush_requests(struct xb_softc *sc)
-{
-	int notify;
-
-	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify);
-
-	if (notify)
-		notify_remote_via_irq(sc->irq);
+	device_set_desc(dev, "Virtual Block Device");
+	device_quiet(dev);
+	return (0);
 }
 
-static void
-blkif_restart_queue_callback(void *arg)
+/*
+ * Setup supplies the backend dir, virtual device.  We place an event
+ * channel and shared frame entries.  We watch backend to wait if it's
+ * ok.
+ */
+static int
+xbd_attach(device_t dev)
 {
-	struct xb_softc *sc = arg;
+	struct xbd_softc *sc;
+	const char *name;
+	uint32_t vdevice;
+	int error;
+	int i;
+	int unit;
 
-	mtx_lock(&sc->xb_io_lock);
+	/* FIXME: Use dynamic device id if this is not set. */
+	error = xs_scanf(XST_NIL, xenbus_get_node(dev),
+	    "virtual-device", NULL, "%" PRIu32, &vdevice);
+	if (error)
+		error = xs_scanf(XST_NIL, xenbus_get_node(dev),
+		    "virtual-device-ext", NULL, "%" PRIu32, &vdevice);
+	if (error) {
+		xenbus_dev_fatal(dev, error, "reading virtual-device");
+		device_printf(dev, "Couldn't determine virtual device.\n");
+		return (error);
+	}
 
-	xb_startio(sc);
+	xbd_vdevice_to_unit(vdevice, &unit, &name);
+	if (!strcmp(name, "xbd"))
+		device_set_unit(dev, unit);
 
-	mtx_unlock(&sc->xb_io_lock);
-}
+	sc = device_get_softc(dev);
+	mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
+	xbd_initqs(sc);
+	for (i = 0; i < XBD_MAX_RING_PAGES; i++)
+		sc->xbd_ring_ref[i] = GRANT_REF_INVALID;
 
-static int
-blkif_open(struct disk *dp)
-{
-	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+	sc->xbd_dev = dev;
+	sc->xbd_vdevice = vdevice;
+	sc->xbd_state = XBD_STATE_DISCONNECTED;
 
-	if (sc == NULL) {
-		printf("xb%d: not found", sc->xb_unit);
-		return (ENXIO);
-	}
+	xbd_setup_sysctl(sc);
 
-	sc->xb_flags |= XB_OPEN;
-	sc->users++;
-	return (0);
-}
+	/* Wait for backend device to publish its protocol capabilities. */
+	xenbus_set_state(dev, XenbusStateInitialising);
 
-static int
-blkif_close(struct disk *dp)
-{
-	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
-
-	if (sc == NULL)
-		return (ENXIO);
-	sc->xb_flags &= ~XB_OPEN;
-	if (--(sc->users) == 0) {
-		/*
-		 * Check whether we have been instructed to close.  We will
-		 * have ignored this request initially, as the device was
-		 * still mounted.
-		 */
-		if (xenbus_get_otherend_state(sc->xb_dev) == XenbusStateClosing)
-			blkfront_closing(sc->xb_dev);
-	}
 	return (0);
 }
 
 static int
-blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
+xbd_detach(device_t dev)
 {
-	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+	struct xbd_softc *sc = device_get_softc(dev);
 
-	if (sc == NULL)
-		return (ENXIO);
+	DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev));
 
-	return (ENOTTY);
-}
+	xbd_free(sc);
+	mtx_destroy(&sc->xbd_io_lock);
 
-static void
-xb_free_command(struct xb_command *cm)
-{
-
-	KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0,
-	    ("Freeing command that is still on a queue\n"));
-
-	cm->cm_flags = 0;
-	cm->bp = NULL;
-	cm->cm_complete = NULL;
-	xb_enqueue_free(cm);
+	return 0;
 }
 
-/*
- * blkif_queue_request
- *
- * request block io
- * 
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- *   virtual address in the guest os.
- */
-static struct xb_command *
-xb_bio_command(struct xb_softc *sc)
+static int
+xbd_suspend(device_t dev)
 {
-	struct xb_command *cm;
-	struct bio *bp;
+	struct xbd_softc *sc = device_get_softc(dev);
+	int retval;
+	int saved_state;
 
-	if (unlikely(sc->connected != BLKIF_STATE_CONNECTED))
-		return (NULL);
+	/* Prevent new requests being issued until we fix things up. */
+	mtx_lock(&sc->xbd_io_lock);
+	saved_state = sc->xbd_state;
+	sc->xbd_state = XBD_STATE_SUSPENDED;
 
-	bp = xb_dequeue_bio(sc);
-	if (bp == NULL)
-		return (NULL);
-
-	if ((cm = xb_dequeue_free(sc)) == NULL) {
-		xb_requeue_bio(sc, bp);
-		return (NULL);
+	/* Wait for outstanding I/O to drain. */
+	retval = 0;
+	while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
+		if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock,
+		    PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
+			retval = EBUSY;
+			break;
+		}
 	}
+	mtx_unlock(&sc->xbd_io_lock);
 
-	if (gnttab_alloc_grant_references(sc->max_request_segments,
-	    &cm->gref_head) != 0) {
-		gnttab_request_free_callback(&sc->callback,
-			blkif_restart_queue_callback, sc,
-			sc->max_request_segments);
-		xb_requeue_bio(sc, bp);
-		xb_enqueue_free(cm);
-		sc->xb_flags |= XB_FROZEN;
-		return (NULL);
-	}
+	if (retval != 0)
+		sc->xbd_state = saved_state;
 
-	cm->bp = bp;
-	cm->data = bp->bio_data;
-	cm->datalen = bp->bio_bcount;
-	cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
-	    BLKIF_OP_WRITE;
-	cm->sector_number = (blkif_sector_t)bp->bio_pblkno;
-
-	return (cm);
+	return (retval);
 }
 
 static int
-blkif_queue_request(struct xb_softc *sc, struct xb_command *cm)
+xbd_resume(device_t dev)
 {
-	int	error;
+	struct xbd_softc *sc = device_get_softc(dev);
 
-	error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen,
-	    blkif_queue_cb, cm, 0);
-	if (error == EINPROGRESS) {
-		printf("EINPROGRESS\n");
-		sc->xb_flags |= XB_FROZEN;
-		cm->cm_flags |= XB_CMD_FROZEN;
+	if (xen_suspend_cancelled) {
+		sc->xbd_state = XBD_STATE_CONNECTED;
 		return (0);
 	}
 
-	return (error);
-}
+	DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev));
 
-static void
-blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
-{
-	struct xb_softc *sc;
-	struct xb_command *cm;
-	blkif_request_t	*ring_req;
-	struct blkif_request_segment *sg;
-	struct blkif_request_segment *last_block_sg;
-	grant_ref_t *sg_ref;
-	vm_paddr_t buffer_ma;
-	uint64_t fsect, lsect;
-	int ref;
-	int op;
-	int block_segs;
-
-	cm = arg;
-	sc = cm->cm_sc;
-
-//printf("%s: Start\n", __func__);
-	if (error) {
-		printf("error %d in blkif_queue_cb\n", error);
-		cm->bp->bio_error = EIO;
-		biodone(cm->bp);
-		xb_free_command(cm);
-		return;
-	}
-
-	/* Fill out a communications ring structure. */
-	ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
-	sc->ring.req_prod_pvt++;
-	ring_req->id = cm->id;
-	ring_req->operation = cm->operation;
-	ring_req->sector_number = cm->sector_number;
-	ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
-	ring_req->nr_segments = nsegs;
-	cm->nseg = nsegs;
-
-	block_segs    = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
-	sg            = ring_req->seg;
-	last_block_sg = sg + block_segs;
-	sg_ref        = cm->sg_refs;
-
-	while (1) {
-
-		while (sg < last_block_sg) {
-			buffer_ma = segs->ds_addr;
-			fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
-			lsect = fsect + (segs->ds_len  >> XBD_SECTOR_SHFT) - 1;
-
-			KASSERT(lsect <= 7, ("XEN disk driver data cannot "
-				"cross a page boundary"));
-
-			/* install a grant reference. */
-			ref = gnttab_claim_grant_reference(&cm->gref_head);
-
-			/*
-			 * GNTTAB_LIST_END == 0xffffffff, but it is private
-			 * to gnttab.c.
-			 */
-			KASSERT(ref != ~0, ("grant_reference failed"));
-
-			gnttab_grant_foreign_access_ref(
-				ref,
-				xenbus_get_otherend_id(sc->xb_dev),
-				buffer_ma >> PAGE_SHIFT,
-				ring_req->operation == BLKIF_OP_WRITE);
-
-			*sg_ref = ref;
-			*sg = (struct blkif_request_segment) {
-				.gref       = ref,
-				.first_sect = fsect, 
-				.last_sect  = lsect };
-			sg++;
-			sg_ref++;
-			segs++;
-			nsegs--;
-		}
-		block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
-		if (block_segs == 0)
-			break;
-
-		sg = BLKRING_GET_SEG_BLOCK(&sc->ring, sc->ring.req_prod_pvt);
-		sc->ring.req_prod_pvt++;
-		last_block_sg = sg + block_segs;
-	}
-
-	if (cm->operation == BLKIF_OP_READ)
-		op = BUS_DMASYNC_PREREAD;
-	else if (cm->operation == BLKIF_OP_WRITE)
-		op = BUS_DMASYNC_PREWRITE;
-	else
-		op = 0;
-	bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
-
-	gnttab_free_grant_references(cm->gref_head);
-
-	xb_enqueue_busy(cm);
-
-	/*
-	 * This flag means that we're probably executing in the busdma swi
-	 * instead of in the startio context, so an explicit flush is needed.
-	 */
-	if (cm->cm_flags & XB_CMD_FROZEN)
-		flush_requests(sc);
-
-//printf("%s: Done\n", __func__);
-	return;
+	xbd_free(sc);
+	xbd_initialize(sc);
+	return (0);
 }
 
-/*
- * Dequeue buffers and place them in the shared communication ring.
- * Return when no more requests can be accepted or all buffers have 
- * been queued.
- *
- * Signal XEN once the ring has been filled out.
+/**
+ * Callback received when the backend's state changes.
  */
 static void
-xb_startio(struct xb_softc *sc)
+xbd_backend_changed(device_t dev, XenbusState backend_state)
 {
-	struct xb_command *cm;
-	int error, queued = 0;
+	struct xbd_softc *sc = device_get_softc(dev);
 
-	mtx_assert(&sc->xb_io_lock, MA_OWNED);
+	DPRINTK("backend_state=%d\n", backend_state);
 
-	if (sc->connected != BLKIF_STATE_CONNECTED)
-		return;
+	switch (backend_state) {
+	case XenbusStateUnknown:
+	case XenbusStateInitialising:
+	case XenbusStateReconfigured:
+	case XenbusStateReconfiguring:
+	case XenbusStateClosed:
+		break;
 
-	while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) {
-		if (sc->xb_flags & XB_FROZEN)
-			break;
+	case XenbusStateInitWait:
+	case XenbusStateInitialised:
+		xbd_initialize(sc);
+		break;
 
-		cm = xb_dequeue_ready(sc);
+	case XenbusStateConnected:
+		xbd_initialize(sc);
+		xbd_connect(sc);
+		break;
 
-		if (cm == NULL)
-		    cm = xb_bio_command(sc);
-
-		if (cm == NULL)
-			break;
-
-		if ((error = blkif_queue_request(sc, cm)) != 0) {
-			printf("blkif_queue_request returned %d\n", error);
-			break;
-		}
-		queued++;
-	}
-
-	if (queued != 0) 
-		flush_requests(sc);
-}
-
-static void
-blkif_int(void *xsc)
-{
-	struct xb_softc *sc = xsc;
-	struct xb_command *cm;
-	blkif_response_t *bret;
-	RING_IDX i, rp;
-	int op;
-
-	mtx_lock(&sc->xb_io_lock);
-
-	if (unlikely(sc->connected == BLKIF_STATE_DISCONNECTED)) {
-		mtx_unlock(&sc->xb_io_lock);
-		return;
-	}
-
- again:
-	rp = sc->ring.sring->rsp_prod;
-	rmb(); /* Ensure we see queued responses up to 'rp'. */
-
-	for (i = sc->ring.rsp_cons; i != rp;) {
-		bret = RING_GET_RESPONSE(&sc->ring, i);
-		cm   = &sc->shadow[bret->id];
-
-		xb_remove_busy(cm);
-		i += blkif_completion(cm);
-
-		if (cm->operation == BLKIF_OP_READ)
-			op = BUS_DMASYNC_POSTREAD;
-		else if (cm->operation == BLKIF_OP_WRITE)
-			op = BUS_DMASYNC_POSTWRITE;
+	case XenbusStateClosing:
+		if (sc->xbd_users > 0)
+			xenbus_dev_error(dev, -EBUSY,
+			    "Device in use; refusing to close");
 		else
-			op = 0;
-		bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
-		bus_dmamap_unload(sc->xb_io_dmat, cm->map);
-
-		/*
-		 * If commands are completing then resources are probably
-		 * being freed as well.  It's a cheap assumption even when
-		 * wrong.
-		 */
-		sc->xb_flags &= ~XB_FROZEN;
-
-		/*
-		 * Directly call the i/o complete routine to save an
-		 * an indirection in the common case.
-		 */
-		cm->status = bret->status;
-		if (cm->bp)
-			xb_bio_complete(sc, cm);
-		else if (cm->cm_complete)
-			(cm->cm_complete)(cm);
-		else
-			xb_free_command(cm);
+			xbd_closing(dev);
+		break;	
 	}
-
-	sc->ring.rsp_cons = i;
-
-	if (i != sc->ring.req_prod_pvt) {
-		int more_to_do;
-		RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do);
-		if (more_to_do)
-			goto again;
-	} else {
-		sc->ring.sring->rsp_event = i + 1;
-	}
-
-	xb_startio(sc);
-
-	if (unlikely(sc->connected == BLKIF_STATE_SUSPENDED))
-		wakeup(&sc->cm_busy);
-
-	mtx_unlock(&sc->xb_io_lock);
 }
 
-static void 
-blkif_free(struct xb_softc *sc)
-{
-	uint8_t *sring_page_ptr;
-	int i;
-	
-	/* Prevent new requests being issued until we fix things up. */
-	mtx_lock(&sc->xb_io_lock);
-	sc->connected = BLKIF_STATE_DISCONNECTED; 
-	mtx_unlock(&sc->xb_io_lock);
-
-	/* Free resources associated with old device channel. */
-	if (sc->ring.sring != NULL) {
-		sring_page_ptr = (uint8_t *)sc->ring.sring;
-		for (i = 0; i < sc->ring_pages; i++) {
-			if (sc->ring_ref[i] != GRANT_INVALID_REF) {
-				gnttab_end_foreign_access_ref(sc->ring_ref[i]);
-				sc->ring_ref[i] = GRANT_INVALID_REF;
-			}
-			sring_page_ptr += PAGE_SIZE;
-		}
-		free(sc->ring.sring, M_XENBLOCKFRONT);
-		sc->ring.sring = NULL;
-	}
-
-	if (sc->shadow) {
-
-		for (i = 0; i < sc->max_requests; i++) {
-			struct xb_command *cm;
-
-			cm = &sc->shadow[i];
-			if (cm->sg_refs != NULL) {
-				free(cm->sg_refs, M_XENBLOCKFRONT);
-				cm->sg_refs = NULL;
-			}
-
-			bus_dmamap_destroy(sc->xb_io_dmat, cm->map);
-		}
-		free(sc->shadow, M_XENBLOCKFRONT);
-		sc->shadow = NULL;
-
-		bus_dma_tag_destroy(sc->xb_io_dmat);
-		
-		xb_initq_free(sc);
-		xb_initq_ready(sc);
-		xb_initq_complete(sc);
-	}
-		
-	if (sc->irq) {
-		unbind_from_irqhandler(sc->irq);
-		sc->irq = 0;
-	}
-}
-
-static int
-blkif_completion(struct xb_command *s)
-{
-//printf("%s: Req %p(%d)\n", __func__, s, s->nseg);
-	gnttab_end_foreign_access_references(s->nseg, s->sg_refs);
-	return (BLKIF_SEGS_TO_BLOCKS(s->nseg));
-}
-
-/* ** Driver registration ** */
-static device_method_t blkfront_methods[] = { 
+/*---------------------------- NewBus Registration ---------------------------*/
+static device_method_t xbd_methods[] = { 
 	/* Device interface */ 
-	DEVMETHOD(device_probe,         blkfront_probe), 
-	DEVMETHOD(device_attach,        blkfront_attach), 
-	DEVMETHOD(device_detach,        blkfront_detach), 
+	DEVMETHOD(device_probe,         xbd_probe), 
+	DEVMETHOD(device_attach,        xbd_attach), 
+	DEVMETHOD(device_detach,        xbd_detach), 
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
-	DEVMETHOD(device_suspend,       blkfront_suspend), 
-	DEVMETHOD(device_resume,        blkfront_resume), 
+	DEVMETHOD(device_suspend,       xbd_suspend), 
+	DEVMETHOD(device_resume,        xbd_resume), 
  
 	/* Xenbus interface */
-	DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed),
+	DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed),
 
 	{ 0, 0 } 
 }; 
 
-static driver_t blkfront_driver = { 
+static driver_t xbd_driver = { 
 	"xbd", 
-	blkfront_methods, 
-	sizeof(struct xb_softc),                      
+	xbd_methods, 
+	sizeof(struct xbd_softc),                      
 }; 
-devclass_t blkfront_devclass; 
+devclass_t xbd_devclass; 
  
-DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0); 
+DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0); 

Modified: trunk/sys/dev/xen/blkfront/block.h
===================================================================
--- trunk/sys/dev/xen/blkfront/block.h	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/blkfront/block.h	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,6 +1,8 @@
+/* $MidnightBSD$ */
 /*
  * XenBSD block device driver
  *
+ * Copyright (c) 2010-2013 Spectra Logic Corporation
  * Copyright (c) 2009 Scott Long, Yahoo!
  * Copyright (c) 2009 Frank Suchomel, Citrix
  * Copyright (c) 2009 Doug F. Rabson, Citrix
@@ -26,12 +28,11 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/xen/blkfront/block.h 298960 2016-05-03 07:52:06Z mav $
  */
 
-
-#ifndef __XEN_DRIVERS_BLOCK_H__
-#define __XEN_DRIVERS_BLOCK_H__
+#ifndef __XEN_BLKFRONT_BLOCK_H__
+#define __XEN_BLKFRONT_BLOCK_H__
 #include <xen/blkif.h>
 
 /**
@@ -44,7 +45,7 @@
  *       guarantee we can handle an unaligned transfer without the need to
  *       use a bounce buffer.
  */
-#define	XBF_SEGS_TO_SIZE(segs)						\
+#define	XBD_SEGS_TO_SIZE(segs)						\
 	(((segs) - 1) * PAGE_SIZE)
 
 /**
@@ -57,264 +58,301 @@
  * \note We reserve a segement to guarantee we can handle an unaligned
  *       transfer without the need to use a bounce buffer.
  */
-#define	XBF_SIZE_TO_SEGS(size)						\
+#define	XBD_SIZE_TO_SEGS(size)						\
 	((size / PAGE_SIZE) + 1)
 
 /**
- * The maximum number of outstanding requests blocks (request headers plus
- * additional segment blocks) we will allow in a negotiated block-front/back
- * communication channel.
+ * The maximum number of shared memory ring pages we will allow in a
+ * negotiated block-front/back communication channel.  Allow enough
+ * ring space for all requests to be  XBD_MAX_REQUEST_SIZE'd.
  */
-#define XBF_MAX_REQUESTS		256
+#define XBD_MAX_RING_PAGES		32
 
 /**
- * The maximum mapped region size per request we will allow in a negotiated
+ * The maximum number of outstanding requests we will allow in a negotiated
  * block-front/back communication channel.
  */
-#define	XBF_MAX_REQUEST_SIZE						\
-	MIN(MAXPHYS, XBF_SEGS_TO_SIZE(BLKIF_MAX_SEGMENTS_PER_REQUEST))
+#define XBD_MAX_REQUESTS						\
+	__CONST_RING_SIZE(blkif, PAGE_SIZE * XBD_MAX_RING_PAGES)
 
 /**
- * The maximum number of segments (within a request header and accompanying
- * segment blocks) per request we will allow in a negotiated block-front/back
- * communication channel.
+ * The maximum number of blkif segments which can be provided per indirect
+ * page in an indirect request.
  */
-#define	XBF_MAX_SEGMENTS_PER_REQUEST					\
-	(MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST,				\
-	     XBF_SIZE_TO_SEGS(XBF_MAX_REQUEST_SIZE)))
+#define XBD_MAX_SEGMENTS_PER_PAGE					\
+	(PAGE_SIZE / sizeof(struct blkif_request_segment))
 
 /**
- * The maximum number of shared memory ring pages we will allow in a
- * negotiated block-front/back communication channel.  Allow enough
- * ring space for all requests to be  XBF_MAX_REQUEST_SIZE'd.
+ * The maximum number of blkif segments which can be provided in an indirect
+ * request.
  */
-#define XBF_MAX_RING_PAGES						    \
-	BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBF_MAX_SEGMENTS_PER_REQUEST) \
-		       * XBF_MAX_REQUESTS)
+#define XBD_MAX_INDIRECT_SEGMENTS					\
+	(BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST * XBD_MAX_SEGMENTS_PER_PAGE)
 
-struct xlbd_type_info
-{
-	int partn_shift;
-	int disks_per_major;
-	char *devname;
-	char *diskname;
-};
+/**
+ * Compute the number of indirect segment pages required for an I/O with the
+ * specified number of indirect segments.
+ */
+#define XBD_INDIRECT_SEGS_TO_PAGES(segs)				\
+	((segs + XBD_MAX_SEGMENTS_PER_PAGE - 1) / XBD_MAX_SEGMENTS_PER_PAGE)
 
-struct xlbd_major_info
-{
-	int major;
-	int index;
-	int usage;
-	struct xlbd_type_info *type;
-};
+typedef enum {
+	XBDCF_Q_MASK		= 0xFF,
+	/* This command has contributed to xbd_qfrozen_cnt. */
+	XBDCF_FROZEN		= 1<<8,
+	/* Freeze the command queue on dispatch (i.e. single step command). */
+	XBDCF_Q_FREEZE		= 1<<9,
+	/* Bus DMA returned EINPROGRESS for this command. */
+	XBDCF_ASYNC_MAPPING	= 1<<10,
+	XBDCF_INITIALIZER	= XBDCF_Q_MASK
+} xbdc_flag_t;
 
-struct xb_command {
-	TAILQ_ENTRY(xb_command)	cm_link;
-	struct xb_softc		*cm_sc;
-	u_int			cm_flags;
-#define XB_CMD_FROZEN		(1<<0)
-#define XB_CMD_POLLED		(1<<1)
-#define XB_ON_XBQ_FREE		(1<<2)
-#define XB_ON_XBQ_READY		(1<<3)
-#define XB_ON_XBQ_BUSY		(1<<4)
-#define XB_ON_XBQ_COMPLETE	(1<<5)
-#define XB_ON_XBQ_MASK		((1<<2)|(1<<3)|(1<<4)|(1<<5))
-	bus_dmamap_t		map;
-	uint64_t		id;
-	grant_ref_t		*sg_refs;
-	struct bio		*bp;
-	grant_ref_t		gref_head;
-	void			*data;
-	size_t			datalen;
-	u_int			nseg;
-	int			operation;
-	blkif_sector_t		sector_number;
-	int			status;
-	void			(* cm_complete)(struct xb_command *);
+struct xbd_command;
+typedef void xbd_cbcf_t(struct xbd_command *);
+
+struct xbd_command {
+	TAILQ_ENTRY(xbd_command) cm_link;
+	struct xbd_softc	*cm_sc;
+	xbdc_flag_t		 cm_flags;
+	bus_dmamap_t		 cm_map;
+	uint64_t		 cm_id;
+	grant_ref_t		*cm_sg_refs;
+	struct bio		*cm_bp;
+	grant_ref_t		 cm_gref_head;
+	void			*cm_data;
+	size_t			 cm_datalen;
+	u_int			 cm_nseg;
+	int			 cm_operation;
+	blkif_sector_t		 cm_sector_number;
+	int			 cm_status;
+	xbd_cbcf_t		*cm_complete;
+	void			*cm_indirectionpages;
+	grant_ref_t		 cm_indirectionrefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
 };
 
-#define XBQ_FREE	0
-#define XBQ_BIO		1
-#define XBQ_READY	2
-#define XBQ_BUSY	3
-#define XBQ_COMPLETE	4
-#define XBQ_COUNT	5
+typedef enum {
+	XBD_Q_FREE,
+	XBD_Q_READY,
+	XBD_Q_BUSY,
+	XBD_Q_COMPLETE,
+	XBD_Q_BIO,
+	XBD_Q_COUNT,
+	XBD_Q_NONE = XBDCF_Q_MASK
+} xbd_q_index_t;
 
-struct xb_qstat {
-	uint32_t	q_length;
-	uint32_t	q_max;
-};
+typedef struct xbd_cm_q {
+	TAILQ_HEAD(, xbd_command) q_tailq;
+	uint32_t		  q_length;
+	uint32_t		  q_max;
+} xbd_cm_q_t;
 
-union xb_statrequest {
-	uint32_t		ms_item;
-	struct xb_qstat		ms_qstat;
-};
+typedef enum {
+	XBD_STATE_DISCONNECTED,
+	XBD_STATE_CONNECTED,
+	XBD_STATE_SUSPENDED
+} xbd_state_t;
 
+typedef enum {
+	XBDF_NONE	  = 0,
+	XBDF_OPEN	  = 1 << 0, /* drive is open (can't shut down) */
+	XBDF_BARRIER	  = 1 << 1, /* backend supports barriers */
+	XBDF_FLUSH	  = 1 << 2, /* backend supports flush */
+	XBDF_READY	  = 1 << 3, /* Is ready */
+	XBDF_CM_SHORTAGE  = 1 << 4, /* Free cm resource shortage active. */
+	XBDF_GNT_SHORTAGE = 1 << 5, /* Grant ref resource shortage active */
+	XBDF_WAIT_IDLE	  = 1 << 6,  /*
+				     * No new work until oustanding work
+				     * completes.
+				     */
+	XBDF_DISCARD	  = 1 << 7, /* backend supports discard */
+	XBDF_PERSISTENT	  = 1 << 8  /* backend supports persistent grants */
+} xbd_flag_t;
+
 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.
  */
-struct xb_softc {
-	device_t		xb_dev;
-	struct disk		*xb_disk;		/* disk params */
-	struct bio_queue_head   xb_bioq;		/* sort queue */
-	int			xb_unit;
-	int			xb_flags;
-#define XB_OPEN		(1<<0)		/* drive is open (can't shut down) */
-#define XB_BARRIER	(1 << 1)	/* backend supports barriers */
-#define XB_READY	(1 << 2)	/* Is ready */
-#define XB_FROZEN	(1 << 3)	/* Waiting for resources */
-	int			vdevice;
-	int			connected;
-	u_int			ring_pages;
-	uint32_t		max_requests;
-	uint32_t		max_request_segments;
-	uint32_t		max_request_blocks;
-	uint32_t		max_request_size;
-	grant_ref_t		ring_ref[XBF_MAX_RING_PAGES];
-	blkif_front_ring_t	ring;
-	unsigned int		irq;
-	struct gnttab_free_callback	callback;
-	TAILQ_HEAD(,xb_command)	cm_free;
-	TAILQ_HEAD(,xb_command)	cm_ready;
-	TAILQ_HEAD(,xb_command)	cm_busy;
-	TAILQ_HEAD(,xb_command)	cm_complete;
-	struct xb_qstat		xb_qstat[XBQ_COUNT];
-	bus_dma_tag_t		xb_io_dmat;
+struct xbd_softc {
+	device_t			 xbd_dev;
+	struct disk			*xbd_disk;	/* disk params */
+	struct bio_queue_head 		 xbd_bioq;	/* sort queue */
+	int				 xbd_unit;
+	xbd_flag_t			 xbd_flags;
+	int				 xbd_qfrozen_cnt;
+	int				 xbd_vdevice;
+	xbd_state_t			 xbd_state;
+	u_int				 xbd_ring_pages;
+	uint32_t			 xbd_max_requests;
+	uint32_t			 xbd_max_request_segments;
+	uint32_t			 xbd_max_request_size;
+	uint32_t			 xbd_max_request_indirectpages;
+	grant_ref_t			 xbd_ring_ref[XBD_MAX_RING_PAGES];
+	blkif_front_ring_t		 xbd_ring;
+	xen_intr_handle_t		 xen_intr_handle;
+	struct gnttab_free_callback	 xbd_callback;
+	xbd_cm_q_t			 xbd_cm_q[XBD_Q_COUNT];
+	bus_dma_tag_t			 xbd_io_dmat;
 
 	/**
 	 * The number of people holding this device open.  We won't allow a
 	 * hot-unplug unless this is 0.
 	 */
-	int			users;
-	struct mtx		xb_io_lock;
+	int				 xbd_users;
+	struct mtx			 xbd_io_lock;
 
-	struct xb_command      *shadow;
+	struct xbd_command		*xbd_shadow;
 };
 
-int xlvbd_add(struct xb_softc *, blkif_sector_t sectors, int device,
-	      uint16_t vdisk_info, unsigned long sector_size);
-void xlvbd_del(struct xb_softc *);
+int xbd_instance_create(struct xbd_softc *, blkif_sector_t sectors, int device,
+			uint16_t vdisk_info, unsigned long sector_size,
+			unsigned long phys_sector_size);
 
-#define XBQ_ADD(sc, qname)					\
-	do {							\
-		struct xb_qstat *qs;				\
-								\
-		qs = &(sc)->xb_qstat[qname];			\
-		qs->q_length++;					\
-		if (qs->q_length > qs->q_max)			\
-			qs->q_max = qs->q_length;		\
-	} while (0)
+static inline void
+xbd_added_qentry(struct xbd_softc *sc, xbd_q_index_t index)
+{
+	struct xbd_cm_q *cmq;
 
-#define XBQ_REMOVE(sc, qname)	(sc)->xb_qstat[qname].q_length--
+	cmq = &sc->xbd_cm_q[index];
+	cmq->q_length++;
+	if (cmq->q_length > cmq->q_max)
+		cmq->q_max = cmq->q_length;
+}
 
-#define XBQ_INIT(sc, qname)					\
-	do {							\
-		sc->xb_qstat[qname].q_length = 0;		\
-		sc->xb_qstat[qname].q_max = 0;			\
-	} while (0)
+static inline void
+xbd_removed_qentry(struct xbd_softc *sc, xbd_q_index_t index)
+{
+	sc->xbd_cm_q[index].q_length--;
+}
 
-#define XBQ_COMMAND_QUEUE(name, index)					\
-	static __inline void						\
-	xb_initq_ ## name (struct xb_softc *sc)				\
-	{								\
-		TAILQ_INIT(&sc->cm_ ## name);				\
-		XBQ_INIT(sc, index);					\
-	}								\
-	static __inline void						\
-	xb_enqueue_ ## name (struct xb_command *cm)			\
-	{								\
-		if ((cm->cm_flags & XB_ON_XBQ_MASK) != 0) {		\
-			printf("command %p is on another queue, "	\
-			    "flags = %#x\n", cm, cm->cm_flags);		\
-			panic("command is on another queue");		\
-		}							\
-		TAILQ_INSERT_TAIL(&cm->cm_sc->cm_ ## name, cm, cm_link); \
-		cm->cm_flags |= XB_ON_ ## index;			\
-		XBQ_ADD(cm->cm_sc, index);				\
-	}								\
-	static __inline void						\
-	xb_requeue_ ## name (struct xb_command *cm)			\
-	{								\
-		if ((cm->cm_flags & XB_ON_XBQ_MASK) != 0) {		\
-			printf("command %p is on another queue, "	\
-			    "flags = %#x\n", cm, cm->cm_flags);		\
-			panic("command is on another queue");		\
-		}							\
-		TAILQ_INSERT_HEAD(&cm->cm_sc->cm_ ## name, cm, cm_link); \
-		cm->cm_flags |= XB_ON_ ## index;			\
-		XBQ_ADD(cm->cm_sc, index);				\
-	}								\
-	static __inline struct xb_command *				\
-	xb_dequeue_ ## name (struct xb_softc *sc)			\
-	{								\
-		struct xb_command *cm;					\
-									\
-		if ((cm = TAILQ_FIRST(&sc->cm_ ## name)) != NULL) {	\
-			if ((cm->cm_flags & XB_ON_XBQ_MASK) !=		\
-			     XB_ON_ ## index) {				\
-				printf("command %p not in queue, "	\
-				    "flags = %#x, bit = %#x\n", cm,	\
-				    cm->cm_flags, XB_ON_ ## index);	\
-				panic("command not in queue");		\
-			}						\
-			TAILQ_REMOVE(&sc->cm_ ## name, cm, cm_link);	\
-			cm->cm_flags &= ~XB_ON_ ## index;		\
-			XBQ_REMOVE(sc, index);				\
-		}							\
-		return (cm);						\
-	}								\
-	static __inline void						\
-	xb_remove_ ## name (struct xb_command *cm)			\
-	{								\
-		if ((cm->cm_flags & XB_ON_XBQ_MASK) != XB_ON_ ## index){\
-			printf("command %p not in queue, flags = %#x, " \
-			    "bit = %#x\n", cm, cm->cm_flags,		\
-			    XB_ON_ ## index);				\
-			panic("command not in queue");			\
-		}							\
-		TAILQ_REMOVE(&cm->cm_sc->cm_ ## name, cm, cm_link);	\
-		cm->cm_flags &= ~XB_ON_ ## index;			\
-		XBQ_REMOVE(cm->cm_sc, index);				\
-	}								\
-struct hack
+static inline uint32_t
+xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index)
+{
+	return (sc->xbd_cm_q[index].q_length);
+}
 
-XBQ_COMMAND_QUEUE(free, XBQ_FREE);
-XBQ_COMMAND_QUEUE(ready, XBQ_READY);
-XBQ_COMMAND_QUEUE(busy, XBQ_BUSY);
-XBQ_COMMAND_QUEUE(complete, XBQ_COMPLETE);
+static inline void
+xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index)
+{
+	struct xbd_cm_q *cmq;
 
-static __inline void
-xb_initq_bio(struct xb_softc *sc)
+	cmq = &sc->xbd_cm_q[index];
+	TAILQ_INIT(&cmq->q_tailq);
+	cmq->q_length = 0;
+	cmq->q_max = 0;
+}
+
+static inline void
+xbd_enqueue_cm(struct xbd_command *cm, xbd_q_index_t index)
 {
-	bioq_init(&sc->xb_bioq);
-	XBQ_INIT(sc, XBQ_BIO);
+	KASSERT(index != XBD_Q_BIO,
+	    ("%s: Commands cannot access the bio queue.", __func__));
+	if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE)
+		panic("%s: command %p is already on queue %d.",
+		    __func__, cm, cm->cm_flags & XBDCF_Q_MASK);
+	TAILQ_INSERT_TAIL(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link);
+	cm->cm_flags &= ~XBDCF_Q_MASK;
+	cm->cm_flags |= index;
+	xbd_added_qentry(cm->cm_sc, index);
 }
 
-static __inline void
-xb_enqueue_bio(struct xb_softc *sc, struct bio *bp)
+static inline void
+xbd_requeue_cm(struct xbd_command *cm, xbd_q_index_t index)
 {
-	bioq_insert_tail(&sc->xb_bioq, bp);
-	XBQ_ADD(sc, XBQ_BIO);
+	KASSERT(index != XBD_Q_BIO,
+	    ("%s: Commands cannot access the bio queue.", __func__));
+	if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE)
+		panic("%s: command %p is already on queue %d.",
+		    __func__, cm, cm->cm_flags & XBDCF_Q_MASK);
+	TAILQ_INSERT_HEAD(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link);
+	cm->cm_flags &= ~XBDCF_Q_MASK;
+	cm->cm_flags |= index;
+	xbd_added_qentry(cm->cm_sc, index);
 }
 
-static __inline void
-xb_requeue_bio(struct xb_softc *sc, struct bio *bp)
+static inline struct xbd_command *
+xbd_dequeue_cm(struct xbd_softc *sc, xbd_q_index_t index)
 {
-	bioq_insert_head(&sc->xb_bioq, bp);
-	XBQ_ADD(sc, XBQ_BIO);
+	struct xbd_command *cm;
+
+	KASSERT(index != XBD_Q_BIO,
+	    ("%s: Commands cannot access the bio queue.", __func__));
+
+	if ((cm = TAILQ_FIRST(&sc->xbd_cm_q[index].q_tailq)) != NULL) {
+		if ((cm->cm_flags & XBDCF_Q_MASK) != index) {
+			panic("%s: command %p is on queue %d, "
+			    "not specified queue %d",
+			    __func__, cm,
+			    cm->cm_flags & XBDCF_Q_MASK,
+			    index);
+		}
+		TAILQ_REMOVE(&sc->xbd_cm_q[index].q_tailq, cm, cm_link);
+		cm->cm_flags &= ~XBDCF_Q_MASK;
+		cm->cm_flags |= XBD_Q_NONE;
+		xbd_removed_qentry(cm->cm_sc, index);
+	}
+	return (cm);
 }
 
-static __inline struct bio *
-xb_dequeue_bio(struct xb_softc *sc)
+static inline void
+xbd_remove_cm(struct xbd_command *cm, xbd_q_index_t expected_index)
 {
+	xbd_q_index_t index;
+
+	index = cm->cm_flags & XBDCF_Q_MASK;
+
+	KASSERT(index != XBD_Q_BIO,
+	    ("%s: Commands cannot access the bio queue.", __func__));
+
+	if (index != expected_index) {
+		panic("%s: command %p is on queue %d, not specified queue %d",
+		    __func__, cm, index, expected_index);
+	}
+	TAILQ_REMOVE(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link);
+	cm->cm_flags &= ~XBDCF_Q_MASK;
+	cm->cm_flags |= XBD_Q_NONE;
+	xbd_removed_qentry(cm->cm_sc, index);
+}
+
+static inline void
+xbd_initq_bio(struct xbd_softc *sc)
+{
+	bioq_init(&sc->xbd_bioq);
+}
+
+static inline void
+xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp)
+{
+	bioq_insert_tail(&sc->xbd_bioq, bp);
+	xbd_added_qentry(sc, XBD_Q_BIO);
+}
+
+static inline void
+xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp)
+{
+	bioq_insert_head(&sc->xbd_bioq, bp);
+	xbd_added_qentry(sc, XBD_Q_BIO);
+}
+
+static inline struct bio *
+xbd_dequeue_bio(struct xbd_softc *sc)
+{
 	struct bio *bp;
 
-	if ((bp = bioq_first(&sc->xb_bioq)) != NULL) {
-		bioq_remove(&sc->xb_bioq, bp);
-		XBQ_REMOVE(sc, XBQ_BIO);
+	if ((bp = bioq_first(&sc->xbd_bioq)) != NULL) {
+		bioq_remove(&sc->xbd_bioq, bp);
+		xbd_removed_qentry(sc, XBD_Q_BIO);
 	}
 	return (bp);
 }
 
-#endif /* __XEN_DRIVERS_BLOCK_H__ */
+static inline void
+xbd_initqs(struct xbd_softc *sc)
+{
+	u_int index;
 
+	for (index = 0; index < XBD_Q_COUNT; index++)
+		xbd_initq_cm(sc, index);
+
+	xbd_initq_bio(sc);
+}
+
+#endif /* __XEN_BLKFRONT_BLOCK_H__ */

Modified: trunk/sys/dev/xen/console/console.c
===================================================================
--- trunk/sys/dev/xen/console/console.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/console/console.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/console/console.c 265999 2014-05-14 01:35:43Z ian $");
 
 #include <sys/param.h>
 #include <sys/module.h>
@@ -15,7 +16,7 @@
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <sys/cons.h>
@@ -71,6 +72,8 @@
 static int rc, rp;
 static unsigned int cnsl_evt_reg;
 static unsigned int wc, wp; /* write_cons, write_prod */
+xen_intr_handle_t xen_intr_handle;
+device_t xencons_dev;
 
 #ifdef KDB
 static int	xc_altbrk;
@@ -224,7 +227,7 @@
 xc_probe(device_t dev)
 {
 
-	return (0);
+	return (BUS_PROBE_NOWILDCARD);
 }
 
 static int
@@ -232,6 +235,7 @@
 {
 	int error;
 
+	xencons_dev = dev;
 	xccons = tty_alloc(&xc_ttydevsw, NULL);
 	tty_makedev(xccons, NULL, "xc%r", 0);
 
@@ -243,15 +247,10 @@
 	callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons);
     
 	if (xen_start_info->flags & SIF_INITDOMAIN) {
-			error = bind_virq_to_irqhandler(
-				 VIRQ_CONSOLE,
-				 0,
-				 "console",
-				 NULL,
-				 xencons_priv_interrupt, NULL,
-				 INTR_TYPE_TTY, NULL);
-		
-				KASSERT(error >= 0, ("can't register console interrupt"));
+		error = xen_intr_bind_virq(dev, VIRQ_CONSOLE, 0, NULL,
+		                           xencons_priv_interrupt, NULL,
+		                           INTR_TYPE_TTY, &xen_intr_handle);
+		KASSERT(error >= 0, ("can't register console interrupt"));
 	}
 
 	/* register handler to flush console on shutdown */
@@ -358,6 +357,7 @@
 	xen_console_up = 0;
 }
 
+#if 0
 static inline int 
 __xencons_put_char(int ch)
 {
@@ -367,6 +367,7 @@
 	wbuf[WBUF_MASK(wp++)] = _ch;
 	return 1;
 }
+#endif
 
 
 static void
@@ -410,7 +411,8 @@
 	DEVMETHOD(device_identify, xc_identify),
 	DEVMETHOD(device_probe, xc_probe),
 	DEVMETHOD(device_attach, xc_attach),
-	{0, 0}
+
+	DEVMETHOD_END
 };
 
 static driver_t xc_driver = {

Modified: trunk/sys/dev/xen/console/xencons_ring.c
===================================================================
--- trunk/sys/dev/xen/console/xencons_ring.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/console/xencons_ring.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/console/xencons_ring.c 255040 2013-08-29 19:52:18Z gibbs $");
 
 #include <sys/param.h>
 #include <sys/module.h>
@@ -16,7 +17,8 @@
 #include <sys/cons.h>
 
 #include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
+
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <sys/cons.h>
@@ -30,9 +32,10 @@
 #include <xen/interface/io/console.h>
 
 #define console_evtchn	console.domU.evtchn
-static unsigned int console_irq;
+xen_intr_handle_t console_handle;
 extern char *console_page;
 extern struct mtx              cn_mtx;
+extern device_t xencons_dev;
 
 static inline struct xencons_interface *
 xencons_interface(void)
@@ -74,7 +77,7 @@
 	wmb();
 	intf->out_prod = prod;
 
-	notify_remote_via_evtchn(xen_start_info->console_evtchn);
+	xen_intr_signal(console_handle);
 
 	return sent;
 
@@ -106,7 +109,7 @@
 	intf->in_cons = cons;
 
 	CN_LOCK(cn_mtx);
-	notify_remote_via_evtchn(xen_start_info->console_evtchn);
+	xen_intr_signal(console_handle);
 
 	xencons_tx();
 	CN_UNLOCK(cn_mtx);
@@ -126,9 +129,9 @@
 	if (!xen_start_info->console_evtchn)
 		return 0;
 
-	err = bind_caller_port_to_irqhandler(xen_start_info->console_evtchn,
-		"xencons", xencons_handle_input, NULL,
-		INTR_TYPE_MISC | INTR_MPSAFE, &console_irq);
+	err = xen_intr_bind_local_port(xencons_dev,
+	    xen_start_info->console_evtchn, NULL, xencons_handle_input, NULL,
+	    INTR_TYPE_MISC | INTR_MPSAFE, &console_handle);
 	if (err) {
 		return err;
 	}
@@ -146,7 +149,7 @@
 	if (!xen_start_info->console_evtchn)
 		return;
 
-	unbind_from_irqhandler(console_irq);
+	xen_intr_unbind(&console_handle);
 }
 
 void 

Modified: trunk/sys/dev/xen/console/xencons_ring.h
===================================================================
--- trunk/sys/dev/xen/console/xencons_ring.h	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/console/xencons_ring.h	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/xen/console/xencons_ring.h 192003 2009-05-11 22:55:49Z kmacy $
  *
  */
 #ifndef _XENCONS_RING_H

Modified: trunk/sys/dev/xen/control/control.c
===================================================================
--- trunk/sys/dev/xen/control/control.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/control/control.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation
  * All rights reserved.
@@ -89,7 +90,7 @@
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/control/control.c 315676 2017-03-21 09:38:59Z royger $");
 
 /**
  * \file control.c
@@ -119,31 +120,39 @@
 #include <sys/taskqueue.h>
 #include <sys/types.h>
 #include <sys/vnode.h>
-
-#ifndef XENHVM
 #include <sys/sched.h>
 #include <sys/smp.h>
-#endif
+#include <sys/eventhandler.h>
 
 #include <geom/geom.h>
 
 #include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
+#include <xen/xen-os.h>
 #include <xen/blkif.h>
 #include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/xen_intr.h>
 
+#ifdef XENHVM
+#include <xen/hvm.h>
+#endif
+
 #include <xen/interface/event_channel.h>
 #include <xen/interface/grant_table.h>
 
 #include <xen/xenbus/xenbusvar.h>
 
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+
+bool xen_suspend_cancelled;
 /*--------------------------- Forward Declarations --------------------------*/
 /** Function signature for shutdown event handlers. */
 typedef	void (xctrl_shutdown_handler_t)(void);
@@ -152,7 +161,6 @@
 static xctrl_shutdown_handler_t xctrl_reboot;
 static xctrl_shutdown_handler_t xctrl_suspend;
 static xctrl_shutdown_handler_t xctrl_crash;
-static xctrl_shutdown_handler_t xctrl_halt;
 
 /*-------------------------- Private Data Structures -------------------------*/
 /** Element type for lookup table of event name to handler. */
@@ -167,7 +175,7 @@
 	{ "reboot",   xctrl_reboot   },
 	{ "suspend",  xctrl_suspend  },
 	{ "crash",    xctrl_crash    },
-	{ "halt",     xctrl_halt     },
+	{ "halt",     xctrl_poweroff },
 };
 
 struct xctrl_softc {
@@ -195,7 +203,7 @@
 static void
 xctrl_suspend()
 {
-	int i, j, k, fpp;
+	int i, j, k, fpp, suspend_cancelled;
 	unsigned long max_pfn, start_info_mfn;
 
 	EVENTHANDLER_INVOKE(power_suspend);
@@ -242,6 +250,7 @@
 
 	xencons_suspend();
 	gnttab_suspend();
+	intr_suspend();
 
 	max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
 
@@ -259,7 +268,7 @@
 	 */
 	start_info_mfn = VTOMFN(xen_start_info);
 	pmap_suspend();
-	HYPERVISOR_suspend(start_info_mfn);
+	suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
 	pmap_resume();
 
 	pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
@@ -282,7 +291,7 @@
 	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
 
 	gnttab_resume();
-	irq_resume();
+	intr_resume(suspend_cancelled != 0);
 	local_irq_enable();
 	xencons_resume();
 
@@ -326,17 +335,35 @@
 }
 
 #else
-extern void xenpci_resume(void);
 
 /* HVM mode suspension. */
 static void
 xctrl_suspend()
 {
-	int suspend_cancelled;
+#ifdef SMP
+	cpuset_t cpu_suspend_map;
+#endif
 
+	EVENTHANDLER_INVOKE(power_suspend_early);
+	xs_lock();
+	stop_all_proc();
+	xs_unlock();
 	EVENTHANDLER_INVOKE(power_suspend);
 
+	if (smp_started) {
+		thread_lock(curthread);
+		sched_bind(curthread, 0);
+		thread_unlock(curthread);
+	}
+	KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
+
 	/*
+	 * Clear our XenStore node so the toolstack knows we are
+	 * responding to the suspend request.
+	 */
+	xs_write(XST_NIL, "control", "shutdown", "");
+
+	/*
 	 * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
 	 * drivers need this.
 	 */
@@ -348,33 +375,76 @@
 	}
 	mtx_unlock(&Giant);
 
+#ifdef SMP
+	CPU_ZERO(&cpu_suspend_map);	/* silence gcc */
+	if (smp_started) {
+		/*
+		 * Suspend other CPUs. This prevents IPIs while we
+		 * are resuming, and will allow us to reset per-cpu
+		 * vcpu_info on resume.
+		 */
+		cpu_suspend_map = all_cpus;
+		CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
+		if (!CPU_EMPTY(&cpu_suspend_map))
+			suspend_cpus(cpu_suspend_map);
+	}
+#endif
+
 	/*
 	 * Prevent any races with evtchn_interrupt() handler.
 	 */
 	disable_intr();
-	irq_suspend();
+	intr_suspend();
+	xen_hvm_suspend();
 
-	suspend_cancelled = HYPERVISOR_suspend(0);
-	if (suspend_cancelled)
-		irq_resume();
-	else
-		xenpci_resume();
+	xen_suspend_cancelled = !!HYPERVISOR_suspend(0);
 
+	if (!xen_suspend_cancelled) {
+		xen_hvm_resume(false);
+	}
+	intr_resume(xen_suspend_cancelled != 0);
+	enable_intr();
+
 	/*
-	 * Re-enable interrupts and put the scheduler back to normal.
+	 * Reset grant table info.
 	 */
-	enable_intr();
+	if (!xen_suspend_cancelled) {
+		gnttab_resume();
+	}
 
+#ifdef SMP
+	/* Send an IPI_BITMAP in case there are pending bitmap IPIs. */
+	lapic_ipi_vectored(IPI_BITMAP_VECTOR, APIC_IPI_DEST_ALL);
+	if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
+		/*
+		 * Now that event channels have been initialized,
+		 * resume CPUs.
+		 */
+		resume_cpus(cpu_suspend_map);
+	}
+#endif
+
 	/*
 	 * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
 	 * similar.
 	 */
 	mtx_lock(&Giant);
-	if (!suspend_cancelled)
-		DEVICE_RESUME(root_bus);
+	DEVICE_RESUME(root_bus);
 	mtx_unlock(&Giant);
 
+	if (smp_started) {
+		thread_lock(curthread);
+		sched_unbind(curthread);
+		thread_unlock(curthread);
+	}
+
+	resume_all_proc();
+
 	EVENTHANDLER_INVOKE(power_resume);
+
+	if (bootverbose)
+		printf("System resumed after suspension\n");
+
 }
 #endif
 
@@ -384,12 +454,6 @@
 	panic("Xen directed crash");
 }
 
-static void
-xctrl_halt()
-{
-	shutdown_nice(RB_HALT);
-}
-
 /*------------------------------ Event Reception -----------------------------*/
 static void
 xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len)

Modified: trunk/sys/dev/xen/netback/netback.c
===================================================================
--- trunk/sys/dev/xen/netback/netback.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/netback/netback.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009-2011 Spectra Logic Corporation
  * All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/netback/netback.c 319222 2017-05-30 16:15:52Z asomers $");
 
 /**
  * \file netback.c
@@ -42,6 +43,7 @@
  * 	  from this FreeBSD domain to other domains.
  */
 #include "opt_inet.h"
+#include "opt_inet6.h"
 #include "opt_global.h"
 
 #include "opt_sctp.h"
@@ -79,14 +81,15 @@
 #include <vm/vm_kern.h>
 
 #include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
 
-#include <xen/evtchn.h>
+#include <xen/xen-os.h>
+#include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <xen/interface/io/netif.h>
 #include <xen/xenbus/xenbusvar.h>
 
+#include <machine/xen/xenvar.h>
+
 /*--------------------------- Compile-time Tunables --------------------------*/
 
 /*---------------------------------- Macros ----------------------------------*/
@@ -182,7 +185,6 @@
 static int	xnb_rxpkt2rsp(const struct xnb_pkt *pkt,
 			      const gnttab_copy_table gnttab, int n_entries,
 			      netif_rx_back_ring_t *ring);
-static void	xnb_add_mbuf_cksum(struct mbuf *mbufc);
 static void	xnb_stop(struct xnb_softc*);
 static int	xnb_ioctl(struct ifnet*, u_long, caddr_t);
 static void	xnb_start_locked(struct ifnet*);
@@ -193,6 +195,9 @@
 static int	xnb_unit_test_main(SYSCTL_HANDLER_ARGS);
 static int	xnb_dump_rings(SYSCTL_HANDLER_ARGS);
 #endif
+#if defined(INET) || defined(INET6)
+static void	xnb_add_mbuf_cksum(struct mbuf *mbufc);
+#endif
 /*------------------------------ Data Structures -----------------------------*/
 
 
@@ -433,8 +438,8 @@
 	/** Xen device handle.*/
 	long 			handle;
 
-	/** IRQ mapping for the communication ring event channel. */
-	int			irq;
+	/** Handle to the communication ring event channel. */
+	xen_intr_handle_t	xen_intr_handle;
 
 	/**
 	 * \brief Cached value of the front-end's domain id.
@@ -587,14 +592,14 @@
 	if (m->m_flags & M_PKTHDR) {
 		printf("    flowid=%10d, csum_flags=%#8x, csum_data=%#8x, "
 		       "tso_segsz=%5hd\n",
-		       m->m_pkthdr.flowid, m->m_pkthdr.csum_flags,
+		       m->m_pkthdr.flowid, (int)m->m_pkthdr.csum_flags,
 		       m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz);
-		printf("    rcvif=%16p,  header=%18p, len=%19d\n",
-		       m->m_pkthdr.rcvif, m->m_pkthdr.header, m->m_pkthdr.len);
+		printf("    rcvif=%16p,  len=%19d\n",
+		       m->m_pkthdr.rcvif, m->m_pkthdr.len);
 	}
 	printf("    m_next=%16p, m_nextpk=%16p, m_data=%16p\n",
 	       m->m_next, m->m_nextpkt, m->m_data);
-	printf("    m_len=%17d, m_flags=%#15x, m_type=%18hd\n",
+	printf("    m_len=%17d, m_flags=%#15x, m_type=%18u\n",
 	       m->m_len, m->m_flags, m->m_type);
 
 	len = m->m_len;
@@ -621,7 +626,7 @@
 {
 	if (xnb->kva != 0) {
 #ifndef XENHVM
-		kmem_free(kernel_map, xnb->kva, xnb->kva_size);
+		kva_free(xnb->kva, xnb->kva_size);
 #else
 		if (xnb->pseudo_phys_res != NULL) {
 			bus_release_resource(xnb->dev, SYS_RES_MEMORY,
@@ -647,10 +652,7 @@
 	int error;
 	int i;
 
-	if (xnb->irq != 0) {
-		unbind_from_irqhandler(xnb->irq);
-		xnb->irq = 0;
-	}
+	xen_intr_unbind(xnb->xen_intr_handle);
 
 	/*
 	 * We may still have another thread currently processing requests.  We
@@ -773,13 +775,13 @@
 
 	xnb->flags |= XNBF_RING_CONNECTED;
 
-	error =
-	    bind_interdomain_evtchn_to_irqhandler(xnb->otherend_id,
-						  xnb->evtchn,
-						  device_get_nameunit(xnb->dev),
-						  xnb_intr, /*arg*/xnb,
-						  INTR_TYPE_BIO | INTR_MPSAFE,
-						  &xnb->irq);
+	error = xen_intr_bind_remote_port(xnb->dev,
+					  xnb->otherend_id,
+					  xnb->evtchn,
+					  /*filter*/NULL,
+					  xnb_intr, /*arg*/xnb,
+					  INTR_TYPE_BIO | INTR_MPSAFE,
+					  &xnb->xen_intr_handle);
 	if (error != 0) {
 		(void)xnb_disconnect(xnb);
 		xenbus_dev_fatal(xnb->dev, error, "binding event channel");
@@ -811,7 +813,7 @@
 		xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE;
 	}
 #ifndef XENHVM
-	xnb->kva = kmem_alloc_nofault(kernel_map, xnb->kva_size);
+	xnb->kva = kva_alloc(xnb->kva_size);
 	if (xnb->kva == 0)
 		return (ENOMEM);
 	xnb->gnt_base_addr = xnb->kva;
@@ -1110,14 +1112,13 @@
 	xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev),
 		  "hotplug-error", fmt, ap_hotplug);
 	va_end(ap_hotplug);
-	xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
+	(void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
 		  "hotplug-status", "error");
 
 	xenbus_dev_vfatal(xnb->dev, err, fmt, ap);
 	va_end(ap);
 
-	xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
-		  "online", "0");
+	(void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "online", "0");
 	xnb_detach(xnb->dev);
 }
 
@@ -1448,7 +1449,7 @@
 
 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify);
 		if (notify != 0)
-			notify_remote_via_irq(xnb->irq);
+			xen_intr_signal(xnb->xen_intr_handle);
 
 		txb->sring->req_event = txb->req_cons + 1;
 		xen_mb();
@@ -1780,7 +1781,9 @@
 	}
 	mbufc->m_pkthdr.len = total_size;
 
+#if defined(INET) || defined(INET6)
 	xnb_add_mbuf_cksum(mbufc);
+#endif
 }
 
 /**
@@ -1811,12 +1814,12 @@
 	if (num_consumed == 0)
 		return 0;	/* Nothing to receive */
 
-	/* update statistics indepdent of errors */
+	/* update statistics independent of errors */
 	ifnet->if_ipackets++;
 
 	/*
 	 * if we got here, then 1 or more requests was consumed, but the packet
-	 * is not necesarily valid.
+	 * is not necessarily valid.
 	 */
 	if (xnb_pkt_is_valid(&pkt) == 0) {
 		/* got a garbage packet, respond and drop it */
@@ -2123,6 +2126,7 @@
 	return n_responses;
 }
 
+#if defined(INET) || defined(INET6)
 /**
  * Add IP, TCP, and/or UDP checksums to every mbuf in a chain.  The first mbuf
  * in the chain must start with a struct ether_header.
@@ -2177,6 +2181,7 @@
 		break;
 	}
 }
+#endif /* INET || INET6 */
 
 static void
 xnb_stop(struct xnb_softc *xnb)
@@ -2193,8 +2198,8 @@
 xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct xnb_softc *xnb = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq*) data;
 #ifdef INET
-	struct ifreq *ifr = (struct ifreq*) data;
 	struct ifaddr *ifa = (struct ifaddr*)data;
 #endif
 	int error = 0;
@@ -2361,7 +2366,7 @@
 
 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify);
 		if ((notify != 0) || (out_of_space != 0))
-			notify_remote_via_irq(xnb->irq);
+			xen_intr_signal(xnb->xen_intr_handle);
 		rxb->sring->req_event = req_prod_local + 1;
 		xen_mb();
 	} while (rxb->sring->req_prod != req_prod_local) ;

Modified: trunk/sys/dev/xen/netback/netback_unit_tests.c
===================================================================
--- trunk/sys/dev/xen/netback/netback_unit_tests.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/netback/netback_unit_tests.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009-2011 Spectra Logic Corporation
  * All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/netback/netback_unit_tests.c 319222 2017-05-30 16:15:52Z asomers $");
 
 /**
  * \file netback_unit_tests.c
@@ -104,10 +105,6 @@
 
 typedef struct test_fixture test_fixture_t;
 
-static void	xnb_fill_eh_and_ip(struct mbuf *m, uint16_t ip_len,
-				   uint16_t ip_id, uint16_t ip_p,
-				   uint16_t ip_off, uint16_t ip_sum);
-static void	xnb_fill_tcp(struct mbuf *m);
 static int	xnb_get1pkt(struct xnb_pkt *pkt, size_t size, uint16_t flags);
 static int	xnb_unit_test_runner(test_fixture_t const tests[], int ntests,
 				     char *buffer, size_t buflen);
@@ -163,6 +160,13 @@
 static testcase_t xnb_rxpkt2rsp_2short;
 static testcase_t xnb_rxpkt2rsp_2slots;
 static testcase_t xnb_rxpkt2rsp_copyerror;
+static testcase_t xnb_sscanf_llu;
+static testcase_t xnb_sscanf_lld;
+static testcase_t xnb_sscanf_hhu;
+static testcase_t xnb_sscanf_hhd;
+static testcase_t xnb_sscanf_hhn;
+
+#if defined(INET) || defined(INET6)
 /* TODO: add test cases for xnb_add_mbuf_cksum for IPV6 tcp and udp */
 static testcase_t xnb_add_mbuf_cksum_arp;
 static testcase_t xnb_add_mbuf_cksum_tcp;
@@ -169,11 +173,11 @@
 static testcase_t xnb_add_mbuf_cksum_udp;
 static testcase_t xnb_add_mbuf_cksum_icmp;
 static testcase_t xnb_add_mbuf_cksum_tcp_swcksum;
-static testcase_t xnb_sscanf_llu;
-static testcase_t xnb_sscanf_lld;
-static testcase_t xnb_sscanf_hhu;
-static testcase_t xnb_sscanf_hhd;
-static testcase_t xnb_sscanf_hhn;
+static void	xnb_fill_eh_and_ip(struct mbuf *m, uint16_t ip_len,
+				   uint16_t ip_id, uint16_t ip_p,
+				   uint16_t ip_off, uint16_t ip_sum);
+static void	xnb_fill_tcp(struct mbuf *m);
+#endif /* INET || INET6 */
 
 /** Private data used by unit tests */
 static struct {
@@ -307,11 +311,13 @@
 		{setup_pvt_data, xnb_rxpkt2rsp_2short, teardown_pvt_data},
 		{setup_pvt_data, xnb_rxpkt2rsp_2slots, teardown_pvt_data},
 		{setup_pvt_data, xnb_rxpkt2rsp_copyerror, teardown_pvt_data},
+#if defined(INET) || defined(INET6)
 		{null_setup, xnb_add_mbuf_cksum_arp, null_teardown},
 		{null_setup, xnb_add_mbuf_cksum_icmp, null_teardown},
 		{null_setup, xnb_add_mbuf_cksum_tcp, null_teardown},
 		{null_setup, xnb_add_mbuf_cksum_tcp_swcksum, null_teardown},
 		{null_setup, xnb_add_mbuf_cksum_udp, null_teardown},
+#endif
 		{null_setup, xnb_sscanf_hhd, null_teardown},
 		{null_setup, xnb_sscanf_hhu, null_teardown},
 		{null_setup, xnb_sscanf_lld, null_teardown},
@@ -1222,6 +1228,10 @@
 	xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, xnb_unit_pvt.txb.req_cons);
 
 	pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp);
+	XNB_ASSERT(pMbuf != NULL);
+	if (pMbuf == NULL)
+		return;
+
 	n_entries = xnb_txpkt2gnttab(&pkt, pMbuf, xnb_unit_pvt.gnttab,
 	    &xnb_unit_pvt.txb, DOMID_FIRST_RESERVED);
 
@@ -1266,8 +1276,7 @@
 		/* should never get here */
 		XNB_ASSERT(0);
 	}
-	if (pMbuf != NULL)
-		m_freem(pMbuf);
+	m_freem(pMbuf);
 }
 
 
@@ -1478,7 +1487,7 @@
 	safe_m_freem(&mbuf);
 }
 
-/** xnb_mbufc2pkt on a a two-mbuf chain with short data regions */
+/** xnb_mbufc2pkt on a two-mbuf chain with short data regions */
 static void
 xnb_mbufc2pkt_2short(char *buffer, size_t buflen) {
 	struct xnb_pkt pkt;
@@ -1489,15 +1498,14 @@
 	struct mbuf *mbufc, *mbufc2;
 
 	mbufc = m_getm(NULL, size1, M_WAITOK, MT_DATA);
+	XNB_ASSERT(mbufc != NULL);
+	if (mbufc == NULL)
+		return;
 	mbufc->m_flags |= M_PKTHDR;
-	if (mbufc == NULL) {
-		XNB_ASSERT(mbufc != NULL);
-		return;
-	}
 
 	mbufc2 = m_getm(mbufc, size2, M_WAITOK, MT_DATA);
+	XNB_ASSERT(mbufc2 != NULL);
 	if (mbufc2 == NULL) {
-		XNB_ASSERT(mbufc2 != NULL);
 		safe_m_freem(&mbufc);
 		return;
 	}
@@ -1521,7 +1529,7 @@
 	safe_m_freem(&mbufc2);
 }
 
-/** xnb_mbufc2pkt on a a mbuf chain with >1 mbuf cluster */
+/** xnb_mbufc2pkt on a mbuf chain with >1 mbuf cluster */
 static void
 xnb_mbufc2pkt_long(char *buffer, size_t buflen) {
 	struct xnb_pkt pkt;
@@ -1532,11 +1540,10 @@
 	struct mbuf *mbufc, *m;
 
 	mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA);
+	XNB_ASSERT(mbufc != NULL);
+	if (mbufc == NULL)
+		return;
 	mbufc->m_flags |= M_PKTHDR;
-	if (mbufc == NULL) {
-		XNB_ASSERT(mbufc != NULL);
-		return;
-	}
 
 	mbufc->m_pkthdr.len = size;
 	size_remaining = size;
@@ -1560,7 +1567,7 @@
 	safe_m_freem(&mbufc);
 }
 
-/** xnb_mbufc2pkt on a a mbuf chain with >1 mbuf cluster and extra info */
+/** xnb_mbufc2pkt on a mbuf chain with >1 mbuf cluster and extra info */
 static void
 xnb_mbufc2pkt_extra(char *buffer, size_t buflen) {
 	struct xnb_pkt pkt;
@@ -1571,10 +1578,9 @@
 	struct mbuf *mbufc, *m;
 
 	mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA);
-	if (mbufc == NULL) {
-		XNB_ASSERT(mbufc != NULL);
+	XNB_ASSERT(mbufc != NULL);
+	if (mbufc == NULL)
 		return;
-	}
 
 	mbufc->m_flags |= M_PKTHDR;
 	mbufc->m_pkthdr.len = size;
@@ -1614,11 +1620,10 @@
 	int error;
 
 	mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA);
+	XNB_ASSERT(mbufc != NULL);
+	if (mbufc == NULL)
+		return;
 	mbufc->m_flags |= M_PKTHDR;
-	if (mbufc == NULL) {
-		XNB_ASSERT(mbufc != NULL);
-		return;
-	}
 
 	mbufc->m_pkthdr.len = size;
 	size_remaining = size;
@@ -1835,10 +1840,9 @@
 	struct netif_extra_info *ext;
 
 	mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA);
-	if (mbufc == NULL) {
-		XNB_ASSERT(mbufc != NULL);
+	XNB_ASSERT(mbufc != NULL);
+	if (mbufc == NULL)
 		return;
-	}
 
 	mbufc->m_flags |= M_PKTHDR;
 	mbufc->m_pkthdr.len = size;
@@ -1969,11 +1973,10 @@
 	struct mbuf *mbufc;
 
 	mbufc = m_getm(NULL, size1, M_WAITOK, MT_DATA);
+	XNB_ASSERT(mbufc != NULL);
+	if (mbufc == NULL)
+		return;
 	mbufc->m_flags |= M_PKTHDR;
-	if (mbufc == NULL) {
-		XNB_ASSERT(mbufc != NULL);
-		return;
-	}
 
 	m_getm(mbufc, size2, M_WAITOK, MT_DATA);
 	XNB_ASSERT(mbufc->m_next != NULL);
@@ -2066,6 +2069,7 @@
 	safe_m_freem(&mbuf);
 }
 
+#if defined(INET) || defined(INET6)
 /**
  * xnb_add_mbuf_cksum on an ARP request packet
  */
@@ -2430,6 +2434,7 @@
 
 	m_freem(mbufc);
 }
+#endif /* INET || INET6 */
 
 /**
  * sscanf on unsigned chars
@@ -2444,7 +2449,7 @@
 	for (i = 0; i < 12; i++)
 		dest[i] = 'X';
 
-	sscanf(mystr, "%hhu", &dest[4]);
+	XNB_ASSERT(sscanf(mystr, "%hhu", &dest[4]) == 1);
 	for (i = 0; i < 12; i++)
 		XNB_ASSERT(dest[i] == (i == 4 ? 137 : 'X'));
 }
@@ -2462,7 +2467,7 @@
 	for (i = 0; i < 12; i++)
 		dest[i] = 'X';
 
-	sscanf(mystr, "%hhd", &dest[4]);
+	XNB_ASSERT(sscanf(mystr, "%hhd", &dest[4]) == 1);
 	for (i = 0; i < 12; i++)
 		XNB_ASSERT(dest[i] == (i == 4 ? -27 : 'X'));
 }
@@ -2480,7 +2485,7 @@
 	for (i = 0; i < 3; i++)
 		dest[i] = (long long)0xdeadbeefdeadbeef;
 
-	sscanf(mystr, "%lld", &dest[1]);
+	XNB_ASSERT(sscanf(mystr, "%lld", &dest[1]) == 1);
 	for (i = 0; i < 3; i++)
 		XNB_ASSERT(dest[i] == (i != 1 ? (long long)0xdeadbeefdeadbeef :
 		    -123456789012345));
@@ -2499,7 +2504,7 @@
 	for (i = 0; i < 3; i++)
 		dest[i] = (long long)0xdeadbeefdeadbeef;
 
-	sscanf(mystr, "%llu", &dest[1]);
+	XNB_ASSERT(sscanf(mystr, "%llu", &dest[1]) == 1);
 	for (i = 0; i < 3; i++)
 		XNB_ASSERT(dest[i] == (i != 1 ? (long long)0xdeadbeefdeadbeef :
 		    12802747070103273189ull));
@@ -2521,10 +2526,10 @@
 	for (i = 0; i < 12; i++)
 		dest[i] = (unsigned char)'X';
 
-	sscanf(mystr,
+	XNB_ASSERT(sscanf(mystr,
 	    "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"
 	    "202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f"
-	    "404142434445464748494a4b4c4d4e4f%hhn", &dest[4]);
+	    "404142434445464748494a4b4c4d4e4f%hhn", &dest[4]) == 0);
 	for (i = 0; i < 12; i++)
 		XNB_ASSERT(dest[i] == (i == 4 ? 160 : 'X'));
 }

Modified: trunk/sys/dev/xen/netfront/mbufq.h
===================================================================
--- trunk/sys/dev/xen/netfront/mbufq.h	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/netfront/mbufq.h	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /**************************************************************************
 
 Copyright (c) 2007, Chelsio Inc.
@@ -25,7 +26,7 @@
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
-$MidnightBSD$
+$FreeBSD: stable/10/sys/dev/xen/netfront/mbufq.h 181643 2008-08-12 20:01:57Z kmacy $
 
 ***************************************************************************/
 

Modified: trunk/sys/dev/xen/netfront/netfront.c
===================================================================
--- trunk/sys/dev/xen/netfront/netfront.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/netfront/netfront.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * All rights reserved.
@@ -25,9 +26,10 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/netfront/netfront.c 316170 2017-03-29 17:11:41Z ngie $");
 
 #include "opt_inet.h"
+#include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -41,6 +43,7 @@
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
+#include <sys/limits.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
@@ -76,17 +79,16 @@
 
 #include <machine/intr_machdep.h>
 
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenfunc.h>
-#include <machine/xen/xenvar.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
-#include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/interface/memory.h>
 #include <xen/interface/io/netif.h>
 #include <xen/xenbus/xenbusvar.h>
 
+#include <machine/xen/xenvar.h>
+
 #include <dev/xen/netfront/mbufq.h>
 
 #include "xenbus_if.h"
@@ -165,7 +167,6 @@
 static void xn_watchdog(struct ifnet *);
 #endif
 
-static void show_device(struct netfront_info *sc);
 #ifdef notyet
 static void netfront_closing(device_t dev);
 #endif
@@ -256,8 +257,7 @@
 	struct mtx   rx_lock;
 	struct mtx   sc_lock;
 
-	u_int handle;
-	u_int irq;
+	xen_intr_handle_t xen_intr_handle;
 	u_int copying_receiver;
 	u_int carrier;
 	u_int maxfrags;
@@ -288,6 +288,8 @@
 	multicall_entry_t	rx_mcl[NET_RX_RING_SIZE+1];
 	mmu_update_t		rx_mmu[NET_RX_RING_SIZE];
 	struct ifmedia		sc_media;
+
+	bool			xn_resume;
 };
 
 #define rx_mbufs xn_cdata.xn_rx_chain
@@ -450,6 +452,11 @@
 netfront_probe(device_t dev)
 {
 
+#ifdef XENHVM
+	if (xen_disable_pv_nics != 0)
+		return (ENXIO);
+#endif
+
 	if (!strcmp(xenbus_get_type(dev), "vif")) {
 		device_set_desc(dev, "Virtual Network Interface");
 		return (0);
@@ -472,7 +479,7 @@
 #if __FreeBSD_version >= 700000
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
-	    OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
+	    OID_AUTO, "enable_lro", CTLFLAG_RW,
 	    &xn_enable_lro, 0, "Large Receive Offload");
 #endif
 
@@ -503,6 +510,16 @@
 {
 	struct netfront_info *info = device_get_softc(dev);
 
+	if (xen_suspend_cancelled) {
+		XN_RX_LOCK(info);
+		XN_TX_LOCK(info);
+		netfront_carrier_on(info);
+		XN_TX_UNLOCK(info);
+		XN_RX_UNLOCK(info);
+		return (0);
+	}
+
+	info->xn_resume = true;
 	netif_disconnect_backend(info);
 	return (0);
 }
@@ -546,7 +563,8 @@
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node,
-			"event-channel", "%u", irq_to_evtchn_port(info->irq));
+			"event-channel", "%u",
+			xen_intr_port(info->xen_intr_handle));
 	if (err) {
 		message = "writing event-channel";
 		goto abort_transaction;
@@ -608,7 +626,6 @@
 	info->rx_ring_ref = GRANT_REF_INVALID;
 	info->rx.sring = NULL;
 	info->tx.sring = NULL;
-	info->irq = 0;
 
 	txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (!txs) {
@@ -635,17 +652,16 @@
 	if (error)
 		goto fail;
 
-	error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
-	    "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq);
+	error = xen_intr_alloc_and_bind_local_port(dev,
+	    xenbus_get_otherend_id(dev), /*filter*/NULL, xn_intr, info,
+	    INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, &info->xen_intr_handle);
 
 	if (error) {
 		xenbus_dev_fatal(dev, error,
-				 "bind_evtchn_to_irqhandler failed");
+				 "xen_intr_alloc_and_bind_local_port failed");
 		goto fail;
 	}
 
-	show_device(info);
-	
 	return (0);
 	
  fail:
@@ -686,7 +702,6 @@
 	switch (newstate) {
 	case XenbusStateInitialising:
 	case XenbusStateInitialised:
-	case XenbusStateConnected:
 	case XenbusStateUnknown:
 	case XenbusStateClosed:
 	case XenbusStateReconfigured:
@@ -698,13 +713,15 @@
 		if (network_connect(sc) != 0)
 			break;
 		xenbus_set_state(dev, XenbusStateConnected);
+		break;
+	case XenbusStateClosing:
+		xenbus_set_state(dev, XenbusStateClosed);
+		break;
+	case XenbusStateConnected:
 #ifdef INET
 		netfront_send_fake_arp(dev, sc);
 #endif
 		break;
-	case XenbusStateClosing:
-		xenbus_set_state(dev, XenbusStateClosed);
-		break;
 	}
 }
 
@@ -789,6 +806,45 @@
 }
 
 static void
+netif_release_rx_bufs_copy(struct netfront_info *np)
+{
+	struct mbuf *m;
+	grant_ref_t ref;
+	unsigned int i, busy, inuse;
+
+	XN_RX_LOCK(np);
+
+	for (busy = inuse = i = 0; i < NET_RX_RING_SIZE; i++) {
+		ref = np->grant_rx_ref[i];
+
+		if (ref == GRANT_REF_INVALID)
+			continue;
+
+		inuse++;
+
+		m = np->rx_mbufs[i];
+
+		if (!gnttab_end_foreign_access_ref(ref)) {
+			busy++;
+			continue;
+		}
+
+		gnttab_release_grant_reference(&np->gref_rx_head, ref);
+		np->grant_rx_ref[i] = GRANT_REF_INVALID;
+		add_id_to_freelist(np->rx_mbufs, i);
+
+		m_freem(m);
+	}
+
+	if (busy != 0)
+		device_printf(np->xbdev,
+		    "Unable to release %u of %u in use grant references out of %zu total.\n",
+		    busy, inuse, NET_RX_RING_SIZE);
+
+	XN_RX_UNLOCK(np);
+}
+
+static void
 network_alloc_rx_buffers(struct netfront_info *sc)
 {
 	int otherend_id = xenbus_get_otherend_id(sc->xbdev);
@@ -805,7 +861,7 @@
 	
 	req_prod = sc->rx.req_prod_pvt;
 
-	if (unlikely(sc->carrier == 0))
+	if (__predict_false(sc->carrier == 0))
 		return;
 	
 	/*
@@ -945,7 +1001,7 @@
 			/* Zap PTEs and give away pages in one big multicall. */
 			(void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
 
-			if (unlikely(sc->rx_mcl[i].result != i ||
+			if (__predict_false(sc->rx_mcl[i].result != i ||
 			    HYPERVISOR_memory_op(XENMEM_decrease_reservation,
 			    &reservation) != i))
 				panic("%s: unable to reduce memory "
@@ -960,7 +1016,7 @@
 push:
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
 	if (notify)
-		notify_remote_via_irq(sc->irq);
+		xen_intr_signal(sc->xen_intr_handle);
 }
 
 static void
@@ -967,7 +1023,7 @@
 xn_rxeof(struct netfront_info *np)
 {
 	struct ifnet *ifp;
-#if __FreeBSD_version >= 700000
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
 	struct lro_ctrl *lro = &np->xn_lro;
 	struct lro_entry *queued;
 #endif
@@ -1002,7 +1058,7 @@
 			err = xennet_get_responses(np, &rinfo, rp, &i, &m,
 			    &pages_flipped);
 
-			if (unlikely(err)) {
+			if (__predict_false(err)) {
 				if (m)
 					mbufq_tail(&errq, m);
 				np->stats.rx_errors++;
@@ -1064,7 +1120,7 @@
 			 * Do we really need to drop the rx lock?
 			 */
 			XN_RX_UNLOCK(np);
-#if __FreeBSD_version >= 700000
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
 			/* Use LRO if possible */
 			if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
 			    lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
@@ -1082,7 +1138,7 @@
 	
 		np->rx.rsp_cons = i;
 
-#if __FreeBSD_version >= 700000
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
 		/*
 		 * Flush any outstanding LRO work
 		 */
@@ -1150,7 +1206,7 @@
 			 */
 			if (!m->m_next)
 				ifp->if_opackets++;
-			if (unlikely(gnttab_query_foreign_access(
+			if (__predict_false(gnttab_query_foreign_access(
 			    np->grant_tx_ref[id]) != 0)) {
 				panic("%s: grant id %u still in use by the "
 				    "backend", __func__, id);
@@ -1248,7 +1304,7 @@
 		struct mbuf *m;
 		grant_ref_t ref;
 
-		if (unlikely(*cons + 1 == rp)) {
+		if (__predict_false(*cons + 1 == rp)) {
 #if 0			
 			if (net_ratelimit())
 				WPRINTK("Missing extra info\n");
@@ -1260,7 +1316,7 @@
 		extra = (struct netif_extra_info *)
 		RING_GET_RESPONSE(&np->rx, ++(*cons));
 
-		if (unlikely(!extra->type ||
+		if (__predict_false(!extra->type ||
 			extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 #if 0				
 			if (net_ratelimit())
@@ -1316,7 +1372,7 @@
 		DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n",
 			rx->status, rx->offset, frags);
 #endif
-		if (unlikely(rx->status < 0 ||
+		if (__predict_false(rx->status < 0 ||
 			rx->offset + rx->status > PAGE_SIZE)) {
 
 #if 0						
@@ -1678,7 +1734,7 @@
 
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
 	if (notify)
-		notify_remote_via_irq(sc->irq);
+		xen_intr_signal(sc->xen_intr_handle);
 
 	if (RING_FULL(&sc->tx)) {
 		sc->tx_full = 1;
@@ -1960,7 +2016,7 @@
 	 * packets.
 	 */
 	netfront_carrier_on(np);
-	notify_remote_via_irq(np->irq);
+	xen_intr_signal(np->xen_intr_handle);
 	XN_TX_LOCK(np);
 	xn_txeof(np);
 	XN_TX_UNLOCK(np);
@@ -1969,25 +2025,6 @@
 	return (0);
 }
 
-static void 
-show_device(struct netfront_info *sc)
-{
-#ifdef DEBUG
-	if (sc) {
-		IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
-			sc->xn_ifno,
-			be_state_name[sc->xn_backend_state],
-			sc->xn_user_state ? "open" : "closed",
-			sc->xn_evtchn,
-			sc->xn_irq,
-			sc->xn_tx_if,
-			sc->xn_rx_if);
-	} else {
-		IPRINTK("<vif NULL>\n");
-	}
-#endif
-}
-
 static void
 xn_query_features(struct netfront_info *np)
 {
@@ -2021,18 +2058,33 @@
 static int
 xn_configure_features(struct netfront_info *np)
 {
-	int err;
+	int err, cap_enabled;
 
 	err = 0;
-#if __FreeBSD_version >= 700000
-	if ((np->xn_ifp->if_capenable & IFCAP_LRO) != 0)
+
+	if (np->xn_resume &&
+	    ((np->xn_ifp->if_capenable & np->xn_ifp->if_capabilities)
+	    == np->xn_ifp->if_capenable)) {
+		/* Current options are available, no need to do anything. */
+		return (0);
+	}
+
+	/* Try to preserve as many options as possible. */
+	if (np->xn_resume)
+		cap_enabled = np->xn_ifp->if_capenable;
+	else
+		cap_enabled = UINT_MAX;
+
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
+	if ((np->xn_ifp->if_capenable & IFCAP_LRO) == (cap_enabled & IFCAP_LRO))
 		tcp_lro_free(&np->xn_lro);
 #endif
     	np->xn_ifp->if_capenable =
-	    np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4);
+	    np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4) & cap_enabled;
 	np->xn_ifp->if_hwassist &= ~CSUM_TSO;
-#if __FreeBSD_version >= 700000
-	if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) != 0) {
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
+	if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) ==
+	    (cap_enabled & IFCAP_LRO)) {
 		err = tcp_lro_init(&np->xn_lro);
 		if (err) {
 			device_printf(np->xbdev, "LRO initialization failed\n");
@@ -2041,7 +2093,8 @@
 			np->xn_ifp->if_capenable |= IFCAP_LRO;
 		}
 	}
-	if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) != 0) {
+	if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) ==
+	    (cap_enabled & IFCAP_TSO4)) {
 		np->xn_ifp->if_capenable |= IFCAP_TSO4;
 		np->xn_ifp->if_hwassist |= CSUM_TSO;
 	}
@@ -2049,8 +2102,9 @@
 	return (err);
 }
 
-/** Create a network device.
- * @param handle device handle
+/**
+ * Create a network device.
+ * @param dev  Newbus device representing this virtual NIC.
  */
 int 
 create_netdev(device_t dev)
@@ -2118,14 +2172,16 @@
     	ifp->if_watchdog = xn_watchdog;
 #endif
     	ifp->if_init = xn_ifinit;
-    	ifp->if_mtu = ETHERMTU;
     	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
 	
     	ifp->if_hwassist = XN_CSUM_FEATURES;
     	ifp->if_capabilities = IFCAP_HWCSUM;
+	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
+	ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS;
+	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
 	
     	ether_ifattach(ifp, np->mac);
-    	callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
+    	callout_init(&np->xn_stat_ch, 1);
 	netfront_carrier_off(np);
 
 	return (0);
@@ -2172,10 +2228,23 @@
 static void
 netif_free(struct netfront_info *info)
 {
+	XN_LOCK(info);
+	xn_stop(info);
+	XN_UNLOCK(info);
+	callout_drain(&info->xn_stat_ch);
 	netif_disconnect_backend(info);
-#if 0
-	close_netdev(info);
-#endif
+	if (info->xn_ifp != NULL) {
+		ether_ifdetach(info->xn_ifp);
+		if_free(info->xn_ifp);
+		info->xn_ifp = NULL;
+	}
+	ifmedia_removeall(&info->sc_media);
+	netif_release_tx_bufs(info);
+	if (info->copying_receiver)
+		netif_release_rx_bufs_copy(info);
+
+	gnttab_free_grant_references(info->gref_tx_head);
+	gnttab_free_grant_references(info->gref_rx_head);
 }
 
 static void
@@ -2190,10 +2259,7 @@
 	free_ring(&info->tx_ring_ref, &info->tx.sring);
 	free_ring(&info->rx_ring_ref, &info->rx.sring);
 
-	if (info->irq)
-		unbind_from_irqhandler(info->irq);
-
-	info->irq = 0;
+	xen_intr_unbind(&info->xen_intr_handle);
 }
 
 static void

Modified: trunk/sys/dev/xen/pcifront/pcifront.c
===================================================================
--- trunk/sys/dev/xen/pcifront/pcifront.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/pcifront/pcifront.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright (c) 2006, Cisco Systems, Inc.
  * All rights reserved.
@@ -29,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/pcifront/pcifront.c 265999 2014-05-14 01:35:43Z ian $");
 
 #include <sys/param.h>
 #include <sys/module.h>
@@ -441,7 +442,7 @@
 	struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev);
 	DPRINTF("xpcife probe (unit=%d)\n", pdev->unit);
 #endif
-	return 0;
+	return (BUS_PROBE_NOWILDCARD);
 }
 
 /* Newbus xpcife device driver attach */

Added: trunk/sys/dev/xen/timer/timer.c
===================================================================
--- trunk/sys/dev/xen/timer/timer.c	                        (rev 0)
+++ trunk/sys/dev/xen/timer/timer.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -0,0 +1,597 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009 Adrian Chadd
+ * Copyright (c) 2012 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/**
+ * \file dev/xen/timer/timer.c
+ * \brief A timer driver for the Xen hypervisor's PV clock.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/timer/timer.c 265999 2014-05-14 01:35:43Z ian $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/time.h>
+#include <sys/timetc.h>
+#include <sys/timeet.h>
+#include <sys/smp.h>
+#include <sys/limits.h>
+#include <sys/clock.h>
+
+#include <xen/xen-os.h>
+#include <xen/features.h>
+#include <xen/xen_intr.h>
+#include <xen/hypervisor.h>
+#include <xen/interface/io/xenbus.h>
+#include <xen/interface/vcpu.h>
+
+#include <machine/cpu.h>
+#include <machine/cpufunc.h>
+#include <machine/clock.h>
+#include <machine/_inttypes.h>
+#include <machine/smp.h>
+
+#include "clock_if.h"
+
+static devclass_t xentimer_devclass;
+
+#define	NSEC_IN_SEC	1000000000ULL
+#define	NSEC_IN_USEC	1000ULL
+/* 18446744073 = int(2^64 / NSEC_IN_SC) = 1 ns in 64-bit fractions */
+#define	FRAC_IN_NSEC	18446744073LL
+
+/* Xen timers may fire up to 100us off */
+#define	XENTIMER_MIN_PERIOD_IN_NSEC	100*NSEC_IN_USEC
+#define	XENCLOCK_RESOLUTION		10000000
+
+#define	ETIME	62	/* Xen "bad time" error */
+
+#define	XENTIMER_QUALITY	950
+
+struct xentimer_pcpu_data {
+	uint64_t timer;
+	uint64_t last_processed;
+	void *irq_handle;
+};
+
+DPCPU_DEFINE(struct xentimer_pcpu_data, xentimer_pcpu);
+
+DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
+
+struct xentimer_softc {
+	device_t dev;
+	struct timecounter tc;
+	struct eventtimer et;
+};
+
+/* Last time; this guarantees a monotonically increasing clock. */
+volatile uint64_t xen_timer_last_time = 0;
+
+static void
+xentimer_identify(driver_t *driver, device_t parent)
+{
+	if (!xen_domain())
+		return;
+
+	/* Handle all Xen PV timers in one device instance. */
+	if (devclass_get_device(xentimer_devclass, 0))
+		return;
+
+	BUS_ADD_CHILD(parent, 0, "xen_et", 0);
+}
+
+static int
+xentimer_probe(device_t dev)
+{
+	KASSERT((xen_domain()), ("Trying to use Xen timer on bare metal"));
+	/*
+	 * In order to attach, this driver requires the following:
+	 * - Vector callback support by the hypervisor, in order to deliver
+	 *   timer interrupts to the correct CPU for CPUs other than 0.
+	 * - Access to the hypervisor shared info page, in order to look up
+	 *   each VCPU's timer information and the Xen wallclock time.
+	 * - The hypervisor must say its PV clock is "safe" to use.
+	 * - The hypervisor must support VCPUOP hypercalls.
+	 * - The maximum number of CPUs supported by FreeBSD must not exceed
+	 *   the number of VCPUs supported by the hypervisor.
+	 */
+#define	XTREQUIRES(condition, reason...)	\
+	if (!(condition)) {			\
+		device_printf(dev, ## reason);	\
+		device_detach(dev);		\
+		return (ENXIO);			\
+	}
+
+	if (xen_hvm_domain()) {
+		XTREQUIRES(xen_vector_callback_enabled,
+		           "vector callbacks unavailable\n");
+		XTREQUIRES(xen_feature(XENFEAT_hvm_safe_pvclock),
+		           "HVM safe pvclock unavailable\n");
+	}
+	XTREQUIRES(HYPERVISOR_shared_info != NULL,
+	           "shared info page unavailable\n");
+	XTREQUIRES(HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, 0, NULL) == 0,
+	           "VCPUOPs interface unavailable\n");
+#undef XTREQUIRES
+	device_set_desc(dev, "Xen PV Clock");
+	return (BUS_PROBE_NOWILDCARD);
+}
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline uint64_t
+scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
+{
+	uint64_t product;
+
+	if (shift < 0)
+		delta >>= -shift;
+	else
+		delta <<= shift;
+
+#if defined(__i386__)
+	{
+		uint32_t tmp1, tmp2;
+
+		/**
+		 * For i386, the formula looks like:
+		 *
+		 *   lower = (mul_frac * (delta & UINT_MAX)) >> 32
+		 *   upper = mul_frac * (delta >> 32)
+		 *   product = lower + upper
+		 */
+		__asm__ (
+			"mul  %5       ; "
+			"mov  %4,%%eax ; "
+			"mov  %%edx,%4 ; "
+			"mul  %5       ; "
+			"xor  %5,%5    ; "
+			"add  %4,%%eax ; "
+			"adc  %5,%%edx ; "
+			: "=A" (product), "=r" (tmp1), "=r" (tmp2)
+			: "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)),
+			  "2" (mul_frac) );
+	}
+#elif defined(__amd64__)
+	{
+		unsigned long tmp;
+
+		__asm__ (
+			"mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
+			: [lo]"=a" (product), [hi]"=d" (tmp)
+			: "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac));
+	}
+#else
+#error "xentimer: unsupported architecture"
+#endif
+
+	return (product);
+}
+
+static uint64_t
+get_nsec_offset(struct vcpu_time_info *tinfo)
+{
+
+	return (scale_delta(rdtsc() - tinfo->tsc_timestamp,
+	    tinfo->tsc_to_system_mul, tinfo->tsc_shift));
+}
+
+/*
+ * Read the current hypervisor system uptime value from Xen.
+ * See <xen/interface/xen.h> for a description of how this works.
+ */
+static uint32_t
+xen_fetch_vcpu_tinfo(struct vcpu_time_info *dst, struct vcpu_time_info *src)
+{
+
+	do {
+		dst->version = src->version;
+		rmb();
+		dst->tsc_timestamp = src->tsc_timestamp;
+		dst->system_time = src->system_time;
+		dst->tsc_to_system_mul = src->tsc_to_system_mul;
+		dst->tsc_shift = src->tsc_shift;
+		rmb();
+	} while ((src->version & 1) | (dst->version ^ src->version));
+
+	return (dst->version);
+}
+
+/**
+ * \brief Get the current time, in nanoseconds, since the hypervisor booted.
+ *
+ * \note This function returns the current CPU's idea of this value, unless
+ *       it happens to be less than another CPU's previously determined value.
+ */
+static uint64_t
+xen_fetch_vcpu_time(void)
+{
+	struct vcpu_time_info dst;
+	struct vcpu_time_info *src;
+	uint32_t pre_version;
+	uint64_t now;
+	volatile uint64_t last;
+	struct vcpu_info *vcpu = DPCPU_GET(vcpu_info);
+
+	src = &vcpu->time;
+
+	critical_enter();
+	do {
+		pre_version = xen_fetch_vcpu_tinfo(&dst, src);
+		barrier();
+		now = dst.system_time + get_nsec_offset(&dst);
+		barrier();
+	} while (pre_version != src->version);
+
+	/*
+	 * Enforce a monotonically increasing clock time across all
+	 * VCPUs.  If our time is too old, use the last time and return.
+	 * Otherwise, try to update the last time.
+	 */
+	do {
+		last = xen_timer_last_time;
+		if (last > now) {
+			now = last;
+			break;
+		}
+	} while (!atomic_cmpset_64(&xen_timer_last_time, last, now));
+
+	critical_exit();
+
+	return (now);
+}
+
+static uint32_t
+xentimer_get_timecount(struct timecounter *tc)
+{
+
+	return ((uint32_t)xen_fetch_vcpu_time() & UINT_MAX);
+}
+
+/**
+ * \brief Fetch the hypervisor boot time, known as the "Xen wallclock".
+ *
+ * \param ts		Timespec to store the current stable value.
+ * \param version	Pointer to store the corresponding wallclock version.
+ *
+ * \note This value is updated when Domain-0 shifts its clock to follow
+ *       clock drift, e.g. as detected by NTP.
+ */
+static void
+xen_fetch_wallclock(struct timespec *ts)
+{
+	shared_info_t *src = HYPERVISOR_shared_info;
+	uint32_t version = 0;
+
+	do {
+		version = src->wc_version;
+		rmb();
+		ts->tv_sec = src->wc_sec;
+		ts->tv_nsec = src->wc_nsec;
+		rmb();
+	} while ((src->wc_version & 1) | (version ^ src->wc_version));
+}
+
+static void
+xen_fetch_uptime(struct timespec *ts)
+{
+	uint64_t uptime = xen_fetch_vcpu_time();
+	ts->tv_sec = uptime / NSEC_IN_SEC;
+	ts->tv_nsec = uptime % NSEC_IN_SEC;
+}
+
+static int
+xentimer_settime(device_t dev __unused, struct timespec *ts)
+{
+	/*
+	 * Don't return EINVAL here; just silently fail if the domain isn't
+	 * privileged enough to set the TOD.
+	 */
+	return (0);
+}
+
+/**
+ * \brief Return current time according to the Xen Hypervisor wallclock.
+ *
+ * \param dev	Xentimer device.
+ * \param ts	Pointer to store the wallclock time.
+ *
+ * \note  The Xen time structures document the hypervisor start time and the
+ *        uptime-since-hypervisor-start (in nsec.) They need to be combined
+ *        in order to calculate a TOD clock.
+ */
+static int
+xentimer_gettime(device_t dev, struct timespec *ts)
+{
+	struct timespec u_ts;
+
+	timespecclear(ts);
+	xen_fetch_wallclock(ts);
+	xen_fetch_uptime(&u_ts);
+	timespecadd(ts, &u_ts);
+
+	return (0);
+}
+
+/**
+ * \brief Handle a timer interrupt for the Xen PV timer driver.
+ *
+ * \param arg	Xen timer driver softc that is expecting the interrupt.
+ */
+static int
+xentimer_intr(void *arg)
+{
+	struct xentimer_softc *sc = (struct xentimer_softc *)arg;
+	struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu);
+
+	pcpu->last_processed = xen_fetch_vcpu_time();
+	if (pcpu->timer != 0 && sc->et.et_active)
+		sc->et.et_event_cb(&sc->et, sc->et.et_arg);
+
+	return (FILTER_HANDLED);
+}
+
+static int
+xentimer_vcpu_start_timer(int vcpu, uint64_t next_time)
+{
+	struct vcpu_set_singleshot_timer single;
+
+	single.timeout_abs_ns = next_time;
+	single.flags          = VCPU_SSHOTTMR_future;
+	return (HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, vcpu, &single));
+}
+
+static int
+xentimer_vcpu_stop_timer(int vcpu)
+{
+
+	return (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, vcpu, NULL));
+}
+
+/**
+ * \brief Set the next oneshot time for the current CPU.
+ *
+ * \param et	Xen timer driver event timer to schedule on.
+ * \param first	Delta to the next time to schedule the interrupt for.
+ * \param period Not used.
+ *
+ * \note See eventtimers(9) for more information.
+ * \note 
+ *
+ * \returns 0
+ */
+static int
+xentimer_et_start(struct eventtimer *et,
+    sbintime_t first, sbintime_t period)
+{
+	int error = 0, i = 0;
+	struct xentimer_softc *sc = et->et_priv;
+	int cpu = PCPU_GET(vcpu_id);
+	struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu);
+	uint64_t first_in_ns, next_time;
+
+	/* See sbttots() for this formula. */
+	first_in_ns = (((first >> 32) * NSEC_IN_SEC) +
+	               (((uint64_t)NSEC_IN_SEC * (uint32_t)first) >> 32));
+
+	/*
+	 * Retry any timer scheduling failures, where the hypervisor
+	 * returns -ETIME.  Sometimes even a 100us timer period isn't large
+	 * enough, but larger period instances are relatively uncommon.
+	 *
+	 * XXX Remove the panics once et_start() and its consumers are
+	 *     equipped to deal with start failures.
+	 */
+	do {
+		if (++i == 60)
+			panic("can't schedule timer");
+		next_time = xen_fetch_vcpu_time() + first_in_ns;
+		error = xentimer_vcpu_start_timer(cpu, next_time);
+	} while (error == -ETIME);
+
+	if (error)
+		panic("%s: Error %d setting singleshot timer to %"PRIu64"\n",
+		    device_get_nameunit(sc->dev), error, next_time);
+
+	pcpu->timer = next_time;
+	return (error);
+}
+
+/**
+ * \brief Cancel the event timer's currently running timer, if any.
+ */
+static int
+xentimer_et_stop(struct eventtimer *et)
+{
+	int cpu = PCPU_GET(vcpu_id);
+	struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu);
+
+	pcpu->timer = 0;
+	return (xentimer_vcpu_stop_timer(cpu));
+}
+
+/**
+ * \brief Attach a Xen PV timer driver instance.
+ * 
+ * \param dev	Bus device object to attach.
+ *
+ * \note
+ * \returns EINVAL 
+ */
+static int
+xentimer_attach(device_t dev)
+{
+	struct xentimer_softc *sc = device_get_softc(dev);
+	int error, i;
+
+	sc->dev = dev;
+
+	/* Bind an event channel to a VIRQ on each VCPU. */
+	CPU_FOREACH(i) {
+		struct xentimer_pcpu_data *pcpu;
+
+		pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
+		error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
+		if (error) {
+			device_printf(dev, "Error disabling Xen periodic timer "
+			                   "on CPU %d\n", i);
+			return (error);
+		}
+
+		error = xen_intr_bind_virq(dev, VIRQ_TIMER, i, xentimer_intr,
+		    NULL, sc, INTR_TYPE_CLK, &pcpu->irq_handle);
+		if (error) {
+			device_printf(dev, "Error %d binding VIRQ_TIMER "
+			    "to VCPU %d\n", error, i);
+			return (error);
+		}
+		xen_intr_describe(pcpu->irq_handle, "c%d", i);
+	}
+
+	/* Register the event timer. */
+	sc->et.et_name = "XENTIMER";
+	sc->et.et_quality = XENTIMER_QUALITY;
+	sc->et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU;
+	sc->et.et_frequency = NSEC_IN_SEC;
+	/* See tstosbt() for this formula */
+	sc->et.et_min_period = (XENTIMER_MIN_PERIOD_IN_NSEC *
+	                        (((uint64_t)1 << 63) / 500000000) >> 32);
+	sc->et.et_max_period = ((sbintime_t)4 << 32);
+	sc->et.et_start = xentimer_et_start;
+	sc->et.et_stop = xentimer_et_stop;
+	sc->et.et_priv = sc;
+	et_register(&sc->et);
+
+	/* Register the timecounter. */
+	sc->tc.tc_name = "XENTIMER";
+	sc->tc.tc_quality = XENTIMER_QUALITY;
+	sc->tc.tc_flags = TC_FLAGS_SUSPEND_SAFE;
+	/*
+	 * The underlying resolution is in nanoseconds, since the timer info
+	 * scales TSC frequencies using a fraction that represents time in
+	 * terms of nanoseconds.
+	 */
+	sc->tc.tc_frequency = NSEC_IN_SEC;
+	sc->tc.tc_counter_mask = ~0u;
+	sc->tc.tc_get_timecount = xentimer_get_timecount;
+	sc->tc.tc_priv = sc;
+	tc_init(&sc->tc);
+
+	/* Register the Hypervisor wall clock */
+	clock_register(dev, XENCLOCK_RESOLUTION);
+
+	return (0);
+}
+
+static int
+xentimer_detach(device_t dev)
+{
+
+	/* Implement Xen PV clock teardown - XXX see hpet_detach ? */
+	/* If possible:
+	 * 1. need to deregister timecounter
+	 * 2. need to deregister event timer
+	 * 3. need to deregister virtual IRQ event channels
+	 */
+	return (EBUSY);
+}
+
+static void
+xentimer_percpu_resume(void *arg)
+{
+	device_t dev = (device_t) arg;
+	struct xentimer_softc *sc = device_get_softc(dev);
+
+	xentimer_et_start(&sc->et, sc->et.et_min_period, 0);
+}
+
+static int
+xentimer_resume(device_t dev)
+{
+	int error;
+	int i;
+
+	/* Disable the periodic timer */
+	CPU_FOREACH(i) {
+		error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
+		if (error != 0) {
+			device_printf(dev,
+			    "Error disabling Xen periodic timer on CPU %d\n",
+			    i);
+			return (error);
+		}
+	}
+
+	/* Reset the last uptime value */
+	xen_timer_last_time = 0;
+
+	/* Reset the RTC clock */
+	inittodr(time_second);
+
+	/* Kick the timers on all CPUs */
+	smp_rendezvous(NULL, xentimer_percpu_resume, NULL, dev);
+
+	if (bootverbose)
+		device_printf(dev, "resumed operation after suspension\n");
+
+	return (0);
+}
+
+static int
+xentimer_suspend(device_t dev)
+{
+	return (0);
+}
+
+static device_method_t xentimer_methods[] = {
+	DEVMETHOD(device_identify, xentimer_identify),
+	DEVMETHOD(device_probe, xentimer_probe),
+	DEVMETHOD(device_attach, xentimer_attach),
+	DEVMETHOD(device_detach, xentimer_detach),
+	DEVMETHOD(device_suspend, xentimer_suspend),
+	DEVMETHOD(device_resume, xentimer_resume),
+	/* clock interface */
+	DEVMETHOD(clock_gettime, xentimer_gettime),
+	DEVMETHOD(clock_settime, xentimer_settime),
+	DEVMETHOD_END
+};
+
+static driver_t xentimer_driver = {
+	"xen_et",
+	xentimer_methods,
+	sizeof(struct xentimer_softc),
+};
+
+DRIVER_MODULE(xentimer, nexus, xentimer_driver, xentimer_devclass, 0, 0);
+MODULE_DEPEND(xentimer, nexus, 1, 1, 1);


Property changes on: trunk/sys/dev/xen/timer/timer.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/xen/xenpci/xenpci.c
===================================================================
--- trunk/sys/dev/xen/xenpci/xenpci.c	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/xenpci/xenpci.c	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright (c) 2008 Citrix Systems, Inc.
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/xenpci/xenpci.c 255726 2013-09-20 05:06:03Z gibbs $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -32,9 +33,6 @@
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/time.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
@@ -41,31 +39,19 @@
 #include <sys/rman.h>
 
 #include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
+
+#include <xen/xen-os.h>
 #include <xen/features.h>
 #include <xen/hypervisor.h>
-#include <xen/gnttab.h>
-#include <xen/xen_intr.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/hvm/params.h>
+#include <xen/hvm.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_kern.h>
-#include <vm/pmap.h>
-
 #include <dev/xen/xenpci/xenpcivar.h>
 
-/*
- * These variables are used by the rest of the kernel to access the
- * hypervisor.
- */
-char *hypercall_stubs;
-shared_info_t *HYPERVISOR_shared_info;
-static vm_paddr_t shared_info_pa;
+extern void xen_intr_handle_upcall(struct trapframe *trap_frame);
+
 static device_t nexus;
 
 /*
@@ -73,103 +59,44 @@
  */
 static devclass_t xenpci_devclass;
 
-/*
- * Return the CPUID base address for Xen functions.
- */
-static uint32_t
-xenpci_cpuid_base(void)
+static int
+xenpci_intr_filter(void *trap_frame)
 {
-	uint32_t base, regs[4];
-
-	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
-		do_cpuid(base, regs);
-		if (!memcmp("XenVMMXenVMM", &regs[1], 12)
-		    && (regs[0] - base) >= 2)
-			return (base);
-	}
-	return (0);
+	xen_intr_handle_upcall(trap_frame);
+	return (FILTER_HANDLED);
 }
 
-/*
- * Allocate and fill in the hypcall page.
- */
 static int
-xenpci_init_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
+xenpci_irq_init(device_t device, struct xenpci_softc *scp)
 {
-	uint32_t base, regs[4];
-	int i;
+	int error;
 
-	base = xenpci_cpuid_base();
-	if (!base) {
-		device_printf(dev, "Xen platform device but not Xen VMM\n");
-		return (EINVAL);
-	}
+	error = BUS_SETUP_INTR(device_get_parent(device), device,
+			       scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC,
+			       xenpci_intr_filter, NULL, /*trap_frame*/NULL,
+			       &scp->intr_cookie);
+	if (error)
+		return error;
 
-	if (bootverbose) {
-		do_cpuid(base + 1, regs);
-		device_printf(dev, "Xen version %d.%d.\n",
-		    regs[0] >> 16, regs[0] & 0xffff);
-	}
-
+#ifdef SMP
 	/*
-	 * Find the hypercall pages.
+	 * When using the PCI event delivery callback we cannot assign
+	 * events to specific vCPUs, so all events are delivered to vCPU#0 by
+	 * Xen. Since the PCI interrupt can fire on any CPU by default, we
+	 * need to bind it to vCPU#0 in order to ensure that
+	 * xen_intr_handle_upcall always gets called on vCPU#0.
 	 */
-	do_cpuid(base + 2, regs);
-	
-	hypercall_stubs = malloc(regs[0] * PAGE_SIZE, M_TEMP, M_WAITOK);
+	error = BUS_BIND_INTR(device_get_parent(device), device,
+	                      scp->res_irq, 0);
+	if (error)
+		return error;
+#endif
 
-	for (i = 0; i < regs[0]; i++) {
-		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
-	}
-
+	xen_hvm_set_callback(device);
 	return (0);
 }
 
 /*
- * After a resume, re-initialise the hypercall page.
- */
-static void
-xenpci_resume_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
-{
-	uint32_t base, regs[4];
-	int i;
-
-	base = xenpci_cpuid_base();
-
-	do_cpuid(base + 2, regs);
-	for (i = 0; i < regs[0]; i++) {
-		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
-	}
-}
-
-/*
- * Tell the hypervisor how to contact us for event channel callbacks.
- */
-static void
-xenpci_set_callback(device_t dev)
-{
-	int irq;
-	uint64_t callback;
-	struct xen_hvm_param xhp;
-
-	irq = pci_get_irq(dev);
-	if (irq < 16) {
-		callback = irq;
-	} else {
-		callback = (pci_get_intpin(dev) - 1) & 3;
-		callback |= pci_get_slot(dev) << 11;
-		callback |= 1ull << 56;
-	}
-
-	xhp.domid = DOMID_SELF;
-	xhp.index = HVM_PARAM_CALLBACK_IRQ;
-	xhp.value = callback;
-	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp))
-		panic("Can't set evtchn callback");
-}
-
-
-/*
  * Deallocate anything allocated by xenpci_allocate_resources.
  */
 static int
@@ -293,35 +220,6 @@
 }
 
 /*
- * Called very early in the resume sequence - reinitialise the various
- * bits of Xen machinery including the hypercall page and the shared
- * info page.
- */
-void
-xenpci_resume()
-{
-	device_t dev = devclass_get_device(xenpci_devclass, 0);
-	struct xenpci_softc *scp = device_get_softc(dev);
-	struct xen_add_to_physmap xatp;
-
-	xenpci_resume_hypercall_stubs(dev, scp);
-
-	xatp.domid = DOMID_SELF;
-	xatp.idx = 0;
-	xatp.space = XENMAPSPACE_shared_info;
-	xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
-	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
-		panic("HYPERVISOR_memory_op failed");
-
-	pmap_kenter((vm_offset_t) HYPERVISOR_shared_info, shared_info_pa);
-
-	xenpci_set_callback(dev);
-
-	gnttab_resume();
-	irq_resume();
-}
-
-/*
  * Probe - just check device ID.
  */
 static int
@@ -341,11 +239,9 @@
 static int
 xenpci_attach(device_t dev)
 {
-	int error;
 	struct xenpci_softc *scp = device_get_softc(dev);
-	struct xen_add_to_physmap xatp;
-	vm_offset_t shared_va;
 	devclass_t dc;
+	int error;
 
 	/*
 	 * Find and record nexus0.  Since we are not really on the
@@ -365,34 +261,16 @@
 		goto errexit;
 	}
 
-	error = xenpci_init_hypercall_stubs(dev, scp);
+	/*
+	 * Hook the irq up to evtchn
+	 */
+	error = xenpci_irq_init(dev, scp);
 	if (error) {
-		device_printf(dev, "xenpci_init_hypercall_stubs failed(%d).\n",
-		    error);
+		device_printf(dev, "xenpci_irq_init failed(%d).\n",
+			error);
 		goto errexit;
 	}
 
-	setup_xen_features();
-
-	xenpci_alloc_space_int(scp, PAGE_SIZE, &shared_info_pa); 
-
-	xatp.domid = DOMID_SELF;
-	xatp.idx = 0;
-	xatp.space = XENMAPSPACE_shared_info;
-	xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
-	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
-		panic("HYPERVISOR_memory_op failed");
-
-	shared_va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
-	pmap_kenter(shared_va, shared_info_pa);
-	HYPERVISOR_shared_info = (void *) shared_va;
-
-	/*
-	 * Hook the irq up to evtchn
-	 */
-	xenpci_irq_init(dev, scp);
-	xenpci_set_callback(dev);
-
 	return (bus_generic_attach(dev));
 
 errexit:
@@ -431,13 +309,26 @@
 	return (xenpci_deallocate_resources(dev));
 }
 
+static int
+xenpci_suspend(device_t dev)
+{
+	return (bus_generic_suspend(dev));
+}
+
+static int
+xenpci_resume(device_t dev)
+{
+	xen_hvm_set_callback(dev);
+	return (bus_generic_resume(dev));
+}
+
 static device_method_t xenpci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		xenpci_probe),
 	DEVMETHOD(device_attach,	xenpci_attach),
 	DEVMETHOD(device_detach,	xenpci_detach),
-	DEVMETHOD(device_suspend,	bus_generic_suspend),
-	DEVMETHOD(device_resume,	bus_generic_resume),
+	DEVMETHOD(device_suspend,	xenpci_suspend),
+	DEVMETHOD(device_resume,	xenpci_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_add_child,	bus_generic_add_child),

Modified: trunk/sys/dev/xen/xenpci/xenpcivar.h
===================================================================
--- trunk/sys/dev/xen/xenpci/xenpcivar.h	2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/xenpci/xenpcivar.h	2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright (c) 2008 Citrix Systems, Inc.
  * All rights reserved.
@@ -22,6 +23,8 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/xen/xenpci/xenpcivar.h 255040 2013-08-29 19:52:18Z gibbs $
  */
 
 /*
@@ -38,7 +41,4 @@
 	vm_paddr_t phys_next;		/* next page from mem range */
 };
 
-extern int xenpci_irq_init(device_t device, struct xenpci_softc *scp);
 extern int xenpci_alloc_space(size_t sz, vm_paddr_t *pa);
-extern void xenpci_resume(void);
-extern void xen_suspend(void);



More information about the Midnightbsd-cvs mailing list