[Midnightbsd-cvs] src [10029] trunk/sys/dev/xen: sync
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sun May 27 18:21:26 EDT 2018
Revision: 10029
http://svnweb.midnightbsd.org/src/?rev=10029
Author: laffer1
Date: 2018-05-27 18:21:25 -0400 (Sun, 27 May 2018)
Log Message:
-----------
sync
Modified Paths:
--------------
trunk/sys/dev/xen/balloon/balloon.c
trunk/sys/dev/xen/blkback/blkback.c
trunk/sys/dev/xen/blkfront/blkfront.c
trunk/sys/dev/xen/blkfront/block.h
trunk/sys/dev/xen/console/console.c
trunk/sys/dev/xen/console/xencons_ring.c
trunk/sys/dev/xen/console/xencons_ring.h
trunk/sys/dev/xen/control/control.c
trunk/sys/dev/xen/netback/netback.c
trunk/sys/dev/xen/netback/netback_unit_tests.c
trunk/sys/dev/xen/netfront/mbufq.h
trunk/sys/dev/xen/netfront/netfront.c
trunk/sys/dev/xen/pcifront/pcifront.c
trunk/sys/dev/xen/xenpci/xenpci.c
trunk/sys/dev/xen/xenpci/xenpcivar.h
Added Paths:
-----------
trunk/sys/dev/xen/timer/
trunk/sys/dev/xen/timer/timer.c
Modified: trunk/sys/dev/xen/balloon/balloon.c
===================================================================
--- trunk/sys/dev/xen/balloon/balloon.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/balloon/balloon.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/******************************************************************************
* balloon.c
*
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/balloon/balloon.c 292906 2015-12-30 08:15:43Z royger $");
#include <sys/param.h>
#include <sys/lock.h>
@@ -40,29 +41,25 @@
#include <sys/mutex.h>
#include <sys/sysctl.h>
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+#include <xen/xen-os.h>
#include <xen/hypervisor.h>
+#include <xen/features.h>
#include <xen/xenstore/xenstorevar.h>
-#include <vm/vm.h>
-#include <vm/vm_page.h>
+#include <machine/xen/xenvar.h>
static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
+/* Convert from KB (as fetched from xenstore) to number of PAGES */
+#define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10)
+
struct mtx balloon_mutex;
-/*
- * Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and driver_pages, and
- * balloon lists.
- */
-struct mtx balloon_lock;
-
/* We increase/decrease in batches which fit in a page */
static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
-#define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0]))
struct balloon_stats {
/* We aim for 'current allocation' == 'target allocation'. */
@@ -98,13 +95,8 @@
SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
&bs.balloon_high, 0, "High-mem balloon");
-struct balloon_entry {
- vm_page_t page;
- STAILQ_ENTRY(balloon_entry) list;
-};
-
/* List of ballooned pages, threaded through the mem_map array. */
-static STAILQ_HEAD(,balloon_entry) ballooned_pages;
+static TAILQ_HEAD(,vm_page) ballooned_pages;
/* Main work function, always executed in process context. */
static void balloon_process(void *unused);
@@ -114,39 +106,6 @@
#define WPRINTK(fmt, args...) \
printk(KERN_WARNING "xen_mem: " fmt, ##args)
-/* balloon_append: add the given page to the balloon. */
-static void
-balloon_append(vm_page_t page)
-{
- struct balloon_entry *entry;
-
- entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
- entry->page = page;
- STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
- bs.balloon_low++;
-}
-
-/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
-static vm_page_t
-balloon_retrieve(void)
-{
- vm_page_t page;
- struct balloon_entry *entry;
-
- if (STAILQ_EMPTY(&ballooned_pages))
- return NULL;
-
- entry = STAILQ_FIRST(&ballooned_pages);
- STAILQ_REMOVE_HEAD(&ballooned_pages, list);
-
- page = entry->page;
- free(entry, M_BALLOON);
-
- bs.balloon_low--;
-
- return page;
-}
-
static unsigned long
current_target(void)
{
@@ -153,7 +112,7 @@
unsigned long target = min(bs.target_pages, bs.hard_limit);
if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
target = bs.current_pages + bs.balloon_low + bs.balloon_high;
- return target;
+ return (target);
}
static unsigned long
@@ -160,7 +119,7 @@
minimum_target(void)
{
#ifdef XENHVM
-#define max_pfn physmem
+#define max_pfn realmem
#else
#define max_pfn HYPERVISOR_shared_info->arch.max_pfn
#endif
@@ -167,7 +126,8 @@
unsigned long min_pages, curr_pages = current_target();
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
- /* Simple continuous piecewiese linear function:
+ /*
+ * Simple continuous piecewiese linear function:
* max MiB -> min MiB gradient
* 0 0
* 16 16
@@ -188,12 +148,10 @@
else
min_pages = MB2PAGES(296) + (max_pfn >> 5);
#undef MB2PAGES
+#undef max_pfn
/* Don't enforce growth */
- return min(min_pages, curr_pages);
-#ifndef CONFIG_XEN
-#undef max_pfn
-#endif
+ return (min(min_pages, curr_pages));
}
static int
@@ -200,7 +158,6 @@
increase_reservation(unsigned long nr_pages)
{
unsigned long pfn, i;
- struct balloon_entry *entry;
vm_page_t page;
long rc;
struct xen_memory_reservation reservation = {
@@ -209,15 +166,14 @@
.domid = DOMID_SELF
};
- if (nr_pages > ARRAY_SIZE(frame_list))
- nr_pages = ARRAY_SIZE(frame_list);
+ mtx_assert(&balloon_mutex, MA_OWNED);
- mtx_lock(&balloon_lock);
+ if (nr_pages > nitems(frame_list))
+ nr_pages = nitems(frame_list);
- for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
- i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
- KASSERT(entry, ("ballooned_pages list corrupt"));
- page = entry->page;
+ for (page = TAILQ_FIRST(&ballooned_pages), i = 0;
+ i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) {
+ KASSERT(page != NULL, ("ballooned_pages list corrupt"));
frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
}
@@ -242,8 +198,10 @@
}
for (i = 0; i < nr_pages; i++) {
- page = balloon_retrieve();
- KASSERT(page, ("balloon_retrieve failed"));
+ page = TAILQ_FIRST(&ballooned_pages);
+ KASSERT(page != NULL, ("Unable to get ballooned page"));
+ TAILQ_REMOVE(&ballooned_pages, page, plinks.q);
+ bs.balloon_low--;
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
@@ -252,33 +210,13 @@
set_phys_to_machine(pfn, frame_list[i]);
-#if 0
-#ifndef XENHVM
- /* Link back into the page tables if not highmem. */
- if (pfn < max_low_pfn) {
- int ret;
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- pfn_pte_ma(frame_list[i], PAGE_KERNEL),
- 0);
- PASSING(ret == 0,
- ("HYPERVISOR_update_va_mapping failed"));
- }
-#endif
-#endif
-
- /* Relinquish the page back to the allocator. */
- vm_page_unwire(page, 0);
vm_page_free(page);
}
bs.current_pages += nr_pages;
- //totalram_pages = bs.current_pages;
out:
- mtx_unlock(&balloon_lock);
-
- return 0;
+ return (0);
}
static int
@@ -294,54 +232,37 @@
.domid = DOMID_SELF
};
- if (nr_pages > ARRAY_SIZE(frame_list))
- nr_pages = ARRAY_SIZE(frame_list);
+ mtx_assert(&balloon_mutex, MA_OWNED);
+ if (nr_pages > nitems(frame_list))
+ nr_pages = nitems(frame_list);
+
for (i = 0; i < nr_pages; i++) {
if ((page = vm_page_alloc(NULL, 0,
VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
- VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+ VM_ALLOC_ZERO)) == NULL) {
nr_pages = i;
need_sleep = 1;
break;
}
+ if ((page->flags & PG_ZERO) == 0) {
+ /*
+ * Zero the page, or else we might be leaking
+ * important data to other domains on the same
+ * host. Xen doesn't scrub ballooned out memory
+ * pages, the guest is in charge of making
+ * sure that no information is leaked.
+ */
+ pmap_zero_page(page);
+ }
+
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
frame_list[i] = PFNTOMFN(pfn);
-#if 0
- if (!PageHighMem(page)) {
- v = phys_to_virt(pfn << PAGE_SHIFT);
- scrub_pages(v, 1);
-#ifdef CONFIG_XEN
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)v, __pte_ma(0), 0);
- BUG_ON(ret);
-#endif
- }
-#endif
-#ifdef CONFIG_XEN_SCRUB_PAGES
- else {
- v = kmap(page);
- scrub_pages(v, 1);
- kunmap(page);
- }
-#endif
- }
-
-#ifdef CONFIG_XEN
- /* Ensure that ballooned highmem pages don't have kmaps. */
- kmap_flush_unused();
- flush_tlb_all();
-#endif
-
- mtx_lock(&balloon_lock);
-
- /* No more mappings: invalidate P2M and add to balloon. */
- for (i = 0; i < nr_pages; i++) {
- pfn = MFNTOPFN(frame_list[i]);
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
- balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
+ TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
+ bs.balloon_low++;
}
set_xen_guest_handle(reservation.extent_start, frame_list);
@@ -350,10 +271,7 @@
KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
bs.current_pages -= nr_pages;
- //totalram_pages = bs.current_pages;
- mtx_unlock(&balloon_lock);
-
return (need_sleep);
}
@@ -424,11 +342,11 @@
return;
}
- /* The given memory/target value is in KiB, so it needs converting to
- pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
- */
- set_new_target(new_target >> (PAGE_SHIFT - 10));
-
+ /*
+ * The given memory/target value is in KiB, so it needs converting to
+ * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+ */
+ set_new_target(new_target >> KB_TO_PAGE_SHIFT);
}
static void
@@ -460,13 +378,12 @@
if (!is_running_on_xen())
return;
- mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
#ifndef XENHVM
bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
#else
- bs.current_pages = physmem;
+ bs.current_pages = realmem;
#endif
bs.target_pages = bs.current_pages;
bs.balloon_low = 0;
@@ -480,7 +397,8 @@
/* Initialise the balloon with excess memory space. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
- balloon_append(page);
+ TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
+ bs.balloon_low++;
}
#undef max_pfn
#endif
@@ -496,76 +414,7 @@
void
balloon_update_driver_allowance(long delta)
{
- mtx_lock(&balloon_lock);
+ mtx_lock(&balloon_mutex);
bs.driver_pages += delta;
- mtx_unlock(&balloon_lock);
+ mtx_unlock(&balloon_mutex);
}
-
-#if 0
-static int dealloc_pte_fn(
- pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
-{
- unsigned long mfn = pte_mfn(*pte);
- int ret;
- struct xen_memory_reservation reservation = {
- .extent_start = &mfn,
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_pte_at(&init_mm, addr, pte, __pte_ma(0));
- set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
- KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
- return 0;
-}
-
-#endif
-
-#if 0
-vm_page_t
-balloon_alloc_empty_page_range(unsigned long nr_pages)
-{
- vm_page_t pages;
- int i, rc;
- unsigned long *mfn_list;
- struct xen_memory_reservation reservation = {
- .address_bits = 0,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
-
- pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
- if (pages == NULL)
- return NULL;
-
- mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
-
- for (i = 0; i < nr_pages; i++) {
- mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
- PFNTOMFN(i) = INVALID_P2M_ENTRY;
- reservation.extent_start = mfn_list;
- reservation.nr_extents = nr_pages;
- rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation);
- KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
- }
-
- current_pages -= nr_pages;
-
- wakeup(balloon_process);
-
- return pages;
-}
-
-void
-balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
-{
- unsigned long i;
-
- for (i = 0; i < nr_pages; i++)
- balloon_append(page + i);
-
- wakeup(balloon_process);
-}
-#endif
Modified: trunk/sys/dev/xen/blkback/blkback.c
===================================================================
--- trunk/sys/dev/xen/blkback/blkback.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/blkback/blkback.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009-2011 Spectra Logic Corporation
* All rights reserved.
@@ -31,7 +32,7 @@
* Ken Merry (Spectra Logic Corporation)
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/blkback/blkback.c 285738 2015-07-21 07:22:18Z royger $");
/**
* \file blkback.c
@@ -70,14 +71,13 @@
#include <geom/geom.h>
#include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
+#include <xen/xen-os.h>
#include <xen/blkif.h>
-#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
@@ -88,11 +88,19 @@
/*--------------------------- Compile-time Tunables --------------------------*/
/**
+ * The maximum number of shared memory ring pages we will allow in a
+ * negotiated block-front/back communication channel. Allow enough
+ * ring space for all requests to be XBB_MAX_REQUEST_SIZE'd.
+ */
+#define XBB_MAX_RING_PAGES 32
+
+/**
* The maximum number of outstanding request blocks (request headers plus
* additional segment blocks) we will allow in a negotiated block-front/back
* communication channel.
*/
-#define XBB_MAX_REQUESTS 256
+#define XBB_MAX_REQUESTS \
+ __CONST_RING_SIZE(blkif, PAGE_SIZE * XBB_MAX_RING_PAGES)
/**
* \brief Define to force all I/O to be performed on memory owned by the
@@ -151,14 +159,6 @@
(XBB_MAX_REQUEST_SIZE / PAGE_SIZE) + 1)))
/**
- * The maximum number of shared memory ring pages we will allow in a
- * negotiated block-front/back communication channel. Allow enough
- * ring space for all requests to be XBB_MAX_REQUEST_SIZE'd.
- */
-#define XBB_MAX_RING_PAGES \
- BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBB_MAX_SEGMENTS_PER_REQUEST) \
- * XBB_MAX_REQUESTS)
-/**
* The maximum number of ring pages that we can allow per request list.
* We limit this to the maximum number of segments per request, because
* that is already a reasonable number of segments to aggregate. This
@@ -231,7 +231,7 @@
int num_children;
/**
- * Number of I/O requests dispatched to the backend.
+ * Number of I/O requests still pending on the backend.
*/
int pendcnt;
@@ -328,13 +328,6 @@
int nr_512b_sectors;
/**
- * The number of struct bio requests still outstanding for this
- * request on the backend device. This field is only used for
- * device (rather than file) backed I/O.
- */
- int pendcnt;
-
- /**
* BLKIF_OP code for this request.
*/
int operation;
@@ -682,7 +675,7 @@
blkif_back_rings_t rings;
/** IRQ mapping for the communication ring event channel. */
- int irq;
+ xen_intr_handle_t xen_intr_handle;
/**
* \brief Backend access mode flags (e.g. write, or read-only).
@@ -1240,6 +1233,8 @@
nreq->reqlist = *reqlist;
nreq->req_ring_idx = ring_idx;
+ nreq->id = ring_req->id;
+ nreq->operation = ring_req->operation;
if (xbb->abi != BLKIF_PROTOCOL_NATIVE) {
bcopy(ring_req, &nreq->ring_req_storage, sizeof(*ring_req));
@@ -1323,7 +1318,7 @@
resp->operation = req->operation;
resp->status = status;
- xbb->rings.common.rsp_prod_pvt += BLKIF_SEGS_TO_BLOCKS(req->nr_pages);
+ xbb->rings.common.rsp_prod_pvt++;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbb->rings.common, notify);
if (xbb->rings.common.rsp_prod_pvt == xbb->rings.common.req_cons) {
@@ -1347,7 +1342,7 @@
taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task);
if (notify)
- notify_remote_via_irq(xbb->irq);
+ xen_intr_signal(xbb->xen_intr_handle);
}
/**
@@ -1609,7 +1604,6 @@
req_ring_idx = nreq->req_ring_idx;
nr_sects = 0;
nseg = ring_req->nr_segments;
- nreq->id = ring_req->id;
nreq->nr_pages = nseg;
nreq->nr_512b_sectors = 0;
req_seg_idx = 0;
@@ -1616,8 +1610,8 @@
sg = NULL;
/* Check that number of segments is sane. */
- if (unlikely(nseg == 0)
- || unlikely(nseg > xbb->max_request_segments)) {
+ if (__predict_false(nseg == 0)
+ || __predict_false(nseg > xbb->max_request_segments)) {
DPRINTF("Bad number of segments in request (%d)\n",
nseg);
reqlist->status = BLKIF_RSP_ERROR;
@@ -1624,87 +1618,49 @@
goto send_response;
}
- block_segs = MIN(nreq->nr_pages,
- BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
+ block_segs = nseg;
sg = ring_req->seg;
last_block_sg = sg + block_segs;
- while (1) {
- while (sg < last_block_sg) {
- KASSERT(seg_idx <
- XBB_MAX_SEGMENTS_PER_REQLIST,
- ("seg_idx %d is too large, max "
- "segs %d\n", seg_idx,
- XBB_MAX_SEGMENTS_PER_REQLIST));
-
- xbb_sg->first_sect = sg->first_sect;
- xbb_sg->last_sect = sg->last_sect;
- xbb_sg->nsect =
- (int8_t)(sg->last_sect -
- sg->first_sect + 1);
+ while (sg < last_block_sg) {
+ KASSERT(seg_idx <
+ XBB_MAX_SEGMENTS_PER_REQLIST,
+ ("seg_idx %d is too large, max "
+ "segs %d\n", seg_idx,
+ XBB_MAX_SEGMENTS_PER_REQLIST));
- if ((sg->last_sect >= (PAGE_SIZE >> 9))
- || (xbb_sg->nsect <= 0)) {
- reqlist->status = BLKIF_RSP_ERROR;
- goto send_response;
- }
+ xbb_sg->first_sect = sg->first_sect;
+ xbb_sg->last_sect = sg->last_sect;
+ xbb_sg->nsect =
+ (int8_t)(sg->last_sect -
+ sg->first_sect + 1);
- nr_sects += xbb_sg->nsect;
- map->host_addr = xbb_get_gntaddr(reqlist,
- seg_idx, /*sector*/0);
- KASSERT(map->host_addr + PAGE_SIZE <=
- xbb->ring_config.gnt_addr,
- ("Host address %#jx len %d overlaps "
- "ring address %#jx\n",
- (uintmax_t)map->host_addr, PAGE_SIZE,
- (uintmax_t)xbb->ring_config.gnt_addr));
-
- map->flags = GNTMAP_host_map;
- map->ref = sg->gref;
- map->dom = xbb->otherend_id;
- if (operation == BIO_WRITE)
- map->flags |= GNTMAP_readonly;
- sg++;
- map++;
- xbb_sg++;
- seg_idx++;
- req_seg_idx++;
+ if ((sg->last_sect >= (PAGE_SIZE >> 9))
+ || (xbb_sg->nsect <= 0)) {
+ reqlist->status = BLKIF_RSP_ERROR;
+ goto send_response;
}
- block_segs = MIN(nseg - req_seg_idx,
- BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
- if (block_segs == 0)
- break;
+ nr_sects += xbb_sg->nsect;
+ map->host_addr = xbb_get_gntaddr(reqlist,
+ seg_idx, /*sector*/0);
+ KASSERT(map->host_addr + PAGE_SIZE <=
+ xbb->ring_config.gnt_addr,
+ ("Host address %#jx len %d overlaps "
+ "ring address %#jx\n",
+ (uintmax_t)map->host_addr, PAGE_SIZE,
+ (uintmax_t)xbb->ring_config.gnt_addr));
- /*
- * Fetch the next request block full of SG elements.
- * For now, only the spacing between entries is
- * different in the different ABIs, not the sg entry
- * layout.
- */
- req_ring_idx++;
- switch (xbb->abi) {
- case BLKIF_PROTOCOL_NATIVE:
- sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.native,
- req_ring_idx);
- break;
- case BLKIF_PROTOCOL_X86_32:
- {
- sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_32,
- req_ring_idx);
- break;
- }
- case BLKIF_PROTOCOL_X86_64:
- {
- sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_64,
- req_ring_idx);
- break;
- }
- default:
- panic("Unexpected blkif protocol ABI.");
- /* NOTREACHED */
- }
- last_block_sg = sg + block_segs;
+ map->flags = GNTMAP_host_map;
+ map->ref = sg->gref;
+ map->dom = xbb->otherend_id;
+ if (operation == BIO_WRITE)
+ map->flags |= GNTMAP_readonly;
+ sg++;
+ map++;
+ xbb_sg++;
+ seg_idx++;
+ req_seg_idx++;
}
/* Convert to the disk's sector size */
@@ -1734,7 +1690,7 @@
for (seg_idx = 0, map = xbb->maps; seg_idx < reqlist->nr_segments;
seg_idx++, map++){
- if (unlikely(map->status != 0)) {
+ if (__predict_false(map->status != 0)) {
DPRINTF("invalid buffer -- could not remap "
"it (%d)\n", map->status);
DPRINTF("Mapping(%d): Host Addr 0x%lx, flags "
@@ -1958,8 +1914,7 @@
* response be generated before we make room in
* the queue for that response.
*/
- xbb->rings.common.req_cons +=
- BLKIF_SEGS_TO_BLOCKS(ring_req->nr_segments);
+ xbb->rings.common.req_cons++;
xbb->reqs_received++;
cur_size = xbb_count_sects(ring_req);
@@ -2026,21 +1981,23 @@
* \param arg Callback argument registerd during event channel
* binding - the xbb_softc for this instance.
*/
-static void
-xbb_intr(void *arg)
+static int
+xbb_filter(void *arg)
{
struct xbb_softc *xbb;
- /* Defer to kernel thread. */
+ /* Defer to taskqueue thread. */
xbb = (struct xbb_softc *)arg;
taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task);
+
+ return (FILTER_HANDLED);
}
SDT_PROVIDER_DEFINE(xbb);
-SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, flush, "int");
-SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, read, "int", "uint64_t",
+SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, "int");
+SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, "int", "uint64_t",
"uint64_t");
-SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, write, "int",
+SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, "int",
"uint64_t", "uint64_t");
/*----------------------------- Backend Handlers -----------------------------*/
@@ -2061,7 +2018,6 @@
{
struct xbb_dev_data *dev_data;
struct bio *bios[XBB_MAX_SEGMENTS_PER_REQLIST];
- struct xbb_xen_req *nreq;
off_t bio_offset;
struct bio *bio;
struct xbb_sg *xbb_sg;
@@ -2079,9 +2035,8 @@
bio_idx = 0;
if (operation == BIO_FLUSH) {
- nreq = STAILQ_FIRST(&reqlist->contig_req_list);
bio = g_new_bio();
- if (unlikely(bio == NULL)) {
+ if (__predict_false(bio == NULL)) {
DPRINTF("Unable to allocate bio for BIO_FLUSH\n");
error = ENOMEM;
return (error);
@@ -2093,10 +2048,10 @@
bio->bio_offset = 0;
bio->bio_data = 0;
bio->bio_done = xbb_bio_done;
- bio->bio_caller1 = nreq;
+ bio->bio_caller1 = reqlist;
bio->bio_pblkno = 0;
- nreq->pendcnt = 1;
+ reqlist->pendcnt = 1;
SDT_PROBE1(xbb, kernel, xbb_dispatch_dev, flush,
device_get_unit(xbb->dev));
@@ -2143,7 +2098,7 @@
}
bio = bios[nbio++] = g_new_bio();
- if (unlikely(bio == NULL)) {
+ if (__predict_false(bio == NULL)) {
error = ENOMEM;
goto fail_free_bios;
}
@@ -2218,10 +2173,10 @@
return (error);
}
-SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, flush, "int");
-SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, read, "int", "uint64_t",
+SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, "int");
+SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, "int", "uint64_t",
"uint64_t");
-SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, write, "int",
+SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, "int",
"uint64_t", "uint64_t");
/**
@@ -2250,7 +2205,6 @@
void **p_vaddr;
int saved_uio_iovcnt;
#endif /* XBB_USE_BOUNCE_BUFFERS */
- int vfs_is_locked;
int error;
file_data = &xbb->backend.file;
@@ -2271,8 +2225,6 @@
SDT_PROBE1(xbb, kernel, xbb_dispatch_file, flush,
device_get_unit(xbb->dev));
- vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount);
-
(void) vn_start_write(xbb->vn, &mountpoint, V_WAIT);
vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
@@ -2281,8 +2233,6 @@
vn_finished_write(mountpoint);
- VFS_UNLOCK_GIANT(vfs_is_locked);
-
goto bailout_send_response;
/* NOTREACHED */
}
@@ -2366,7 +2316,6 @@
}
#endif /* XBB_USE_BOUNCE_BUFFERS */
- vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount);
switch (operation) {
case BIO_READ:
@@ -2442,7 +2391,6 @@
panic("invalid operation %d", operation);
/* NOTREACHED */
}
- VFS_UNLOCK_GIANT(vfs_is_locked);
#ifdef XBB_USE_BOUNCE_BUFFERS
/* We only need to copy here for read operations */
@@ -2489,7 +2437,6 @@
DPRINTF("closing dev=%s\n", xbb->dev_name);
if (xbb->vn) {
int flags = FREAD;
- int vfs_is_locked = 0;
if ((xbb->flags & XBBF_READ_ONLY) == 0)
flags |= FWRITE;
@@ -2504,7 +2451,6 @@
}
break;
case XBB_TYPE_FILE:
- vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount);
break;
case XBB_TYPE_NONE:
default:
@@ -2519,7 +2465,6 @@
case XBB_TYPE_DISK:
break;
case XBB_TYPE_FILE:
- VFS_UNLOCK_GIANT(vfs_is_locked);
if (xbb->backend.file.cred != NULL) {
crfree(xbb->backend.file.cred);
xbb->backend.file.cred = NULL;
@@ -2684,7 +2629,6 @@
struct nameidata nd;
int flags;
int error;
- int vfs_is_locked;
flags = FREAD;
error = 0;
@@ -2744,8 +2688,6 @@
return (error);
}
- vfs_is_locked = NDHASGIANT(&nd);
-
NDFREE(&nd, NDF_ONLY_PNBUF);
xbb->vn = nd.ni_vp;
@@ -2761,7 +2703,6 @@
"or file", xbb->dev_name);
}
VOP_UNLOCK(xbb->vn, 0);
- VFS_UNLOCK_GIANT(vfs_is_locked);
if (error != 0) {
xbb_close_backend(xbb);
@@ -2789,7 +2730,7 @@
{
if (xbb->kva != 0) {
#ifndef XENHVM
- kmem_free(kernel_map, xbb->kva, xbb->kva_size);
+ kva_free(xbb->kva, xbb->kva_size);
#else
if (xbb->pseudo_phys_res != NULL) {
bus_release_resource(xbb->dev, SYS_RES_MEMORY,
@@ -2825,10 +2766,7 @@
if ((xbb->flags & XBBF_RING_CONNECTED) == 0)
return (0);
- if (xbb->irq != 0) {
- unbind_from_irqhandler(xbb->irq);
- xbb->irq = 0;
- }
+ xen_intr_unbind(&xbb->xen_intr_handle);
mtx_unlock(&xbb->lock);
taskqueue_drain(xbb->io_taskqueue, &xbb->io_task);
@@ -2980,13 +2918,14 @@
xbb->flags |= XBBF_RING_CONNECTED;
- error =
- bind_interdomain_evtchn_to_irqhandler(xbb->otherend_id,
- xbb->ring_config.evtchn,
- device_get_nameunit(xbb->dev),
- xbb_intr, /*arg*/xbb,
- INTR_TYPE_BIO | INTR_MPSAFE,
- &xbb->irq);
+ error = xen_intr_bind_remote_port(xbb->dev,
+ xbb->otherend_id,
+ xbb->ring_config.evtchn,
+ xbb_filter,
+ /*ithread_handler*/NULL,
+ /*arg*/xbb,
+ INTR_TYPE_BIO | INTR_MPSAFE,
+ &xbb->xen_intr_handle);
if (error) {
(void)xbb_disconnect(xbb);
xenbus_dev_fatal(xbb->dev, error, "binding event channel");
@@ -3028,7 +2967,7 @@
device_get_nameunit(xbb->dev), xbb->kva_size,
xbb->reqlist_kva_size);
#ifndef XENHVM
- xbb->kva = kmem_alloc_nofault(kernel_map, xbb->kva_size);
+ xbb->kva = kva_alloc(xbb->kva_size);
if (xbb->kva == 0)
return (ENOMEM);
xbb->gnt_base_addr = xbb->kva;
@@ -3079,7 +3018,7 @@
* Protocol defaults valid even if all negotiation fails.
*/
xbb->ring_config.ring_pages = 1;
- xbb->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
+ xbb->max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
xbb->max_request_size = xbb->max_request_segments * PAGE_SIZE;
/*
@@ -3110,60 +3049,23 @@
* fields.
*/
ring_page_order = 0;
+ xbb->max_requests = 32;
+
(void)xs_scanf(XST_NIL, otherend_path,
"ring-page-order", NULL, "%u",
&ring_page_order);
xbb->ring_config.ring_pages = 1 << ring_page_order;
- (void)xs_scanf(XST_NIL, otherend_path,
- "num-ring-pages", NULL, "%u",
- &xbb->ring_config.ring_pages);
ring_size = PAGE_SIZE * xbb->ring_config.ring_pages;
xbb->max_requests = BLKIF_MAX_RING_REQUESTS(ring_size);
- (void)xs_scanf(XST_NIL, otherend_path,
- "max-requests", NULL, "%u",
- &xbb->max_requests);
-
- (void)xs_scanf(XST_NIL, otherend_path,
- "max-request-segments", NULL, "%u",
- &xbb->max_request_segments);
-
- (void)xs_scanf(XST_NIL, otherend_path,
- "max-request-size", NULL, "%u",
- &xbb->max_request_size);
-
if (xbb->ring_config.ring_pages > XBB_MAX_RING_PAGES) {
xenbus_dev_fatal(xbb->dev, EINVAL,
"Front-end specified ring-pages of %u "
- "exceeds backend limit of %zu. "
+ "exceeds backend limit of %u. "
"Unable to connect.",
xbb->ring_config.ring_pages,
XBB_MAX_RING_PAGES);
return (EINVAL);
- } else if (xbb->max_requests > XBB_MAX_REQUESTS) {
- xenbus_dev_fatal(xbb->dev, EINVAL,
- "Front-end specified max_requests of %u "
- "exceeds backend limit of %u. "
- "Unable to connect.",
- xbb->max_requests,
- XBB_MAX_REQUESTS);
- return (EINVAL);
- } else if (xbb->max_request_segments > XBB_MAX_SEGMENTS_PER_REQUEST) {
- xenbus_dev_fatal(xbb->dev, EINVAL,
- "Front-end specified max_requests_segments "
- "of %u exceeds backend limit of %u. "
- "Unable to connect.",
- xbb->max_request_segments,
- XBB_MAX_SEGMENTS_PER_REQUEST);
- return (EINVAL);
- } else if (xbb->max_request_size > XBB_MAX_REQUEST_SIZE) {
- xenbus_dev_fatal(xbb->dev, EINVAL,
- "Front-end specified max_request_size "
- "of %u exceeds backend limit of %u. "
- "Unable to connect.",
- xbb->max_request_size,
- XBB_MAX_REQUEST_SIZE);
- return (EINVAL);
}
if (xbb->ring_config.ring_pages == 1) {
@@ -3701,18 +3603,6 @@
return (error);
}
- /*
- * Amazon EC2 client compatility. They refer to max-ring-pages
- * instead of to max-ring-page-order.
- */
- error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
- "max-ring-pages", "%zu", XBB_MAX_RING_PAGES);
- if (error) {
- xbb_attach_failed(xbb, error, "writing %s/max-ring-pages",
- xenbus_get_node(xbb->dev));
- return (error);
- }
-
max_ring_page_order = flsl(XBB_MAX_RING_PAGES) - 1;
error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
"max-ring-page-order", "%u", max_ring_page_order);
@@ -3722,32 +3612,6 @@
return (error);
}
- error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
- "max-requests", "%u", XBB_MAX_REQUESTS);
- if (error) {
- xbb_attach_failed(xbb, error, "writing %s/max-requests",
- xenbus_get_node(xbb->dev));
- return (error);
- }
-
- error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
- "max-request-segments", "%u",
- XBB_MAX_SEGMENTS_PER_REQUEST);
- if (error) {
- xbb_attach_failed(xbb, error, "writing %s/max-request-segments",
- xenbus_get_node(xbb->dev));
- return (error);
- }
-
- error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
- "max-request-size", "%u",
- XBB_MAX_REQUEST_SIZE);
- if (error) {
- xbb_attach_failed(xbb, error, "writing %s/max-request-size",
- xenbus_get_node(xbb->dev));
- return (error);
- }
-
/* Collect physical device information. */
error = xs_gather(XST_NIL, xenbus_get_otherend_path(xbb->dev),
"device-type", NULL, &xbb->dev_type,
@@ -3805,9 +3669,10 @@
* Create a taskqueue for doing work that must occur from a
* thread context.
*/
- xbb->io_taskqueue = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
- taskqueue_thread_enqueue,
- /*context*/&xbb->io_taskqueue);
+ xbb->io_taskqueue = taskqueue_create_fast(device_get_nameunit(dev),
+ M_NOWAIT,
+ taskqueue_thread_enqueue,
+ /*contxt*/&xbb->io_taskqueue);
if (xbb->io_taskqueue == NULL) {
xbb_attach_failed(xbb, error, "Unable to create taskqueue");
return (ENOMEM);
Modified: trunk/sys/dev/xen/blkfront/blkfront.c
===================================================================
--- trunk/sys/dev/xen/blkfront/blkfront.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/blkfront/blkfront.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,6 +1,8 @@
+/* $MidnightBSD$ */
/*
* XenBSD block device driver
*
+ * Copyright (c) 2010-2013 Spectra Logic Corporation
* Copyright (c) 2009 Scott Long, Yahoo!
* Copyright (c) 2009 Frank Suchomel, Citrix
* Copyright (c) 2009 Doug F. Rabson, Citrix
@@ -28,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/blkfront/blkfront.c 315676 2017-03-21 09:38:59Z royger $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -50,19 +52,17 @@
#include <machine/vmparam.h>
#include <sys/bus_dma.h>
-#include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-
+#include <xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
-#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/protocols.h>
#include <xen/xenbus/xenbusvar.h>
+#include <machine/_inttypes.h>
+#include <machine/xen/xenvar.h>
+
#include <geom/geom_disk.h>
#include <dev/xen/blkfront/block.h>
@@ -69,214 +69,398 @@
#include "xenbus_if.h"
-/* prototypes */
-static void xb_free_command(struct xb_command *cm);
-static void xb_startio(struct xb_softc *sc);
-static void blkfront_connect(struct xb_softc *);
-static void blkfront_closing(device_t);
-static int blkfront_detach(device_t);
-static int setup_blkring(struct xb_softc *);
-static void blkif_int(void *);
-static void blkfront_initialize(struct xb_softc *);
-static int blkif_completion(struct xb_command *);
-static void blkif_free(struct xb_softc *);
-static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int);
+/*--------------------------- Forward Declarations ---------------------------*/
+static void xbd_closing(device_t);
+static void xbd_startio(struct xbd_softc *sc);
+/*---------------------------------- Macros ----------------------------------*/
+#if 0
+#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+#define XBD_SECTOR_SHFT 9
+
+/*---------------------------- Global Static Data ----------------------------*/
static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
-#define GRANT_INVALID_REF 0
+static int xbd_enable_indirect = 1;
+SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD, 0, "xbd driver parameters");
+SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN,
+ &xbd_enable_indirect, 0, "Enable xbd indirect segments");
-/* Control whether runtime update of vbds is enabled. */
-#define ENABLE_VBD_UPDATE 0
+/*---------------------------- Command Processing ----------------------------*/
+static void
+xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag)
+{
+ if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0)
+ return;
-#if ENABLE_VBD_UPDATE
-static void vbd_update(void);
-#endif
+ sc->xbd_flags |= xbd_flag;
+ sc->xbd_qfrozen_cnt++;
+}
-#define BLKIF_STATE_DISCONNECTED 0
-#define BLKIF_STATE_CONNECTED 1
-#define BLKIF_STATE_SUSPENDED 2
+static void
+xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag)
+{
+ if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0)
+ return;
-#ifdef notyet
-static char *blkif_state_name[] = {
- [BLKIF_STATE_DISCONNECTED] = "disconnected",
- [BLKIF_STATE_CONNECTED] = "connected",
- [BLKIF_STATE_SUSPENDED] = "closed",
-};
+ if (sc->xbd_qfrozen_cnt == 0)
+ panic("%s: Thaw with flag 0x%x while not frozen.",
+ __func__, xbd_flag);
-static char * blkif_status_name[] = {
- [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
- [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
- [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
- [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
-};
-#endif
+ sc->xbd_flags &= ~xbd_flag;
+ sc->xbd_qfrozen_cnt--;
+}
-#if 0
-#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
-#else
-#define DPRINTK(fmt, args...)
-#endif
+static void
+xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag)
+{
+ if ((cm->cm_flags & XBDCF_FROZEN) != 0)
+ return;
-static int blkif_open(struct disk *dp);
-static int blkif_close(struct disk *dp);
-static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
-static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm);
-static void xb_strategy(struct bio *bp);
+ cm->cm_flags |= XBDCF_FROZEN|cm_flag;
+ xbd_freeze(sc, XBDF_NONE);
+}
-// In order to quiesce the device during kernel dumps, outstanding requests to
-// DOM0 for disk reads/writes need to be accounted for.
-static int xb_dump(void *, void *, vm_offset_t, off_t, size_t);
+static void
+xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm)
+{
+ if ((cm->cm_flags & XBDCF_FROZEN) == 0)
+ return;
-/* XXX move to xb_vbd.c when VBD update support is added */
-#define MAX_VBDS 64
+ cm->cm_flags &= ~XBDCF_FROZEN;
+ xbd_thaw(sc, XBDF_NONE);
+}
-#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */
-#define XBD_SECTOR_SHFT 9
+static inline void
+xbd_flush_requests(struct xbd_softc *sc)
+{
+ int notify;
-/*
- * Translate Linux major/minor to an appropriate name and unit
- * number. For HVM guests, this allows us to use the same drive names
- * with blkfront as the emulated drives, easing transition slightly.
- */
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify);
+
+ if (notify)
+ xen_intr_signal(sc->xen_intr_handle);
+}
+
static void
-blkfront_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
+xbd_free_command(struct xbd_command *cm)
{
- static struct vdev_info {
- int major;
- int shift;
- int base;
- const char *name;
- } info[] = {
- {3, 6, 0, "ad"}, /* ide0 */
- {22, 6, 2, "ad"}, /* ide1 */
- {33, 6, 4, "ad"}, /* ide2 */
- {34, 6, 6, "ad"}, /* ide3 */
- {56, 6, 8, "ad"}, /* ide4 */
- {57, 6, 10, "ad"}, /* ide5 */
- {88, 6, 12, "ad"}, /* ide6 */
- {89, 6, 14, "ad"}, /* ide7 */
- {90, 6, 16, "ad"}, /* ide8 */
- {91, 6, 18, "ad"}, /* ide9 */
- {8, 4, 0, "da"}, /* scsi disk0 */
- {65, 4, 16, "da"}, /* scsi disk1 */
- {66, 4, 32, "da"}, /* scsi disk2 */
- {67, 4, 48, "da"}, /* scsi disk3 */
- {68, 4, 64, "da"}, /* scsi disk4 */
- {69, 4, 80, "da"}, /* scsi disk5 */
- {70, 4, 96, "da"}, /* scsi disk6 */
- {71, 4, 112, "da"}, /* scsi disk7 */
- {128, 4, 128, "da"}, /* scsi disk8 */
- {129, 4, 144, "da"}, /* scsi disk9 */
- {130, 4, 160, "da"}, /* scsi disk10 */
- {131, 4, 176, "da"}, /* scsi disk11 */
- {132, 4, 192, "da"}, /* scsi disk12 */
- {133, 4, 208, "da"}, /* scsi disk13 */
- {134, 4, 224, "da"}, /* scsi disk14 */
- {135, 4, 240, "da"}, /* scsi disk15 */
+ KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE,
+ ("Freeing command that is still on queue %d.",
+ cm->cm_flags & XBDCF_Q_MASK));
- {202, 4, 0, "xbd"}, /* xbd */
+ cm->cm_flags = XBDCF_INITIALIZER;
+ cm->cm_bp = NULL;
+ cm->cm_complete = NULL;
+ xbd_enqueue_cm(cm, XBD_Q_FREE);
+ xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE);
+}
- {0, 0, 0, NULL},
- };
- int major = vdevice >> 8;
- int minor = vdevice & 0xff;
- int i;
+static void
+xbd_mksegarray(bus_dma_segment_t *segs, int nsegs,
+ grant_ref_t * gref_head, int otherend_id, int readonly,
+ grant_ref_t * sg_ref, blkif_request_segment_t * sg)
+{
+ struct blkif_request_segment *last_block_sg = sg + nsegs;
+ vm_paddr_t buffer_ma;
+ uint64_t fsect, lsect;
+ int ref;
- if (vdevice & (1 << 28)) {
- *unit = (vdevice & ((1 << 28) - 1)) >> 8;
- *name = "xbd";
+ while (sg < last_block_sg) {
+ buffer_ma = segs->ds_addr;
+ fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
+ lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1;
+
+ KASSERT(lsect <= 7, ("XEN disk driver data cannot "
+ "cross a page boundary"));
+
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(gref_head);
+
+ /*
+ * GNTTAB_LIST_END == 0xffffffff, but it is private
+ * to gnttab.c.
+ */
+ KASSERT(ref != ~0, ("grant_reference failed"));
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ otherend_id,
+ buffer_ma >> PAGE_SHIFT,
+ readonly);
+
+ *sg_ref = ref;
+ *sg = (struct blkif_request_segment) {
+ .gref = ref,
+ .first_sect = fsect,
+ .last_sect = lsect
+ };
+ sg++;
+ sg_ref++;
+ segs++;
+ }
+}
+
+static void
+xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
+{
+ struct xbd_softc *sc;
+ struct xbd_command *cm;
+ int op;
+
+ cm = arg;
+ sc = cm->cm_sc;
+
+ if (error) {
+ cm->cm_bp->bio_error = EIO;
+ biodone(cm->cm_bp);
+ xbd_free_command(cm);
return;
}
- for (i = 0; info[i].major; i++) {
- if (info[i].major == major) {
- *unit = info[i].base + (minor >> info[i].shift);
- *name = info[i].name;
- return;
- }
+ KASSERT(nsegs <= sc->xbd_max_request_segments,
+ ("Too many segments in a blkfront I/O"));
+
+ if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+ blkif_request_t *ring_req;
+
+ /* Fill out a blkif_request_t structure. */
+ ring_req = (blkif_request_t *)
+ RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
+ sc->xbd_ring.req_prod_pvt++;
+ ring_req->id = cm->cm_id;
+ ring_req->operation = cm->cm_operation;
+ ring_req->sector_number = cm->cm_sector_number;
+ ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
+ ring_req->nr_segments = nsegs;
+ cm->cm_nseg = nsegs;
+ xbd_mksegarray(segs, nsegs, &cm->cm_gref_head,
+ xenbus_get_otherend_id(sc->xbd_dev),
+ cm->cm_operation == BLKIF_OP_WRITE,
+ cm->cm_sg_refs, ring_req->seg);
+ } else {
+ blkif_request_indirect_t *ring_req;
+
+ /* Fill out a blkif_request_indirect_t structure. */
+ ring_req = (blkif_request_indirect_t *)
+ RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
+ sc->xbd_ring.req_prod_pvt++;
+ ring_req->id = cm->cm_id;
+ ring_req->operation = BLKIF_OP_INDIRECT;
+ ring_req->indirect_op = cm->cm_operation;
+ ring_req->sector_number = cm->cm_sector_number;
+ ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
+ ring_req->nr_segments = nsegs;
+ cm->cm_nseg = nsegs;
+ xbd_mksegarray(segs, nsegs, &cm->cm_gref_head,
+ xenbus_get_otherend_id(sc->xbd_dev),
+ cm->cm_operation == BLKIF_OP_WRITE,
+ cm->cm_sg_refs, cm->cm_indirectionpages);
+ memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs,
+ sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages);
}
- *unit = minor >> 4;
- *name = "xbd";
+ if (cm->cm_operation == BLKIF_OP_READ)
+ op = BUS_DMASYNC_PREREAD;
+ else if (cm->cm_operation == BLKIF_OP_WRITE)
+ op = BUS_DMASYNC_PREWRITE;
+ else
+ op = 0;
+ bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
+
+ gnttab_free_grant_references(cm->cm_gref_head);
+
+ xbd_enqueue_cm(cm, XBD_Q_BUSY);
+
+ /*
+ * If bus dma had to asynchronously call us back to dispatch
+ * this command, we are no longer executing in the context of
+ * xbd_startio(). Thus we cannot rely on xbd_startio()'s call to
+ * xbd_flush_requests() to publish this command to the backend
+ * along with any other commands that it could batch.
+ */
+ if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0)
+ xbd_flush_requests(sc);
+
+ return;
}
-int
-xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors,
- int vdevice, uint16_t vdisk_info, unsigned long sector_size)
+static int
+xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm)
{
- int unit, error = 0;
- const char *name;
+ int error;
- blkfront_vdevice_to_unit(vdevice, &unit, &name);
+ error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data,
+ cm->cm_datalen, xbd_queue_cb, cm, 0);
+ if (error == EINPROGRESS) {
+ /*
+ * Maintain queuing order by freezing the queue. The next
+ * command may not require as many resources as the command
+ * we just attempted to map, so we can't rely on bus dma
+ * blocking for it too.
+ */
+ xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING);
+ return (0);
+ }
- sc->xb_unit = unit;
+ return (error);
+}
- if (strcmp(name, "xbd"))
- device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit);
+static void
+xbd_restart_queue_callback(void *arg)
+{
+ struct xbd_softc *sc = arg;
- sc->xb_disk = disk_alloc();
- sc->xb_disk->d_unit = sc->xb_unit;
- sc->xb_disk->d_open = blkif_open;
- sc->xb_disk->d_close = blkif_close;
- sc->xb_disk->d_ioctl = blkif_ioctl;
- sc->xb_disk->d_strategy = xb_strategy;
- sc->xb_disk->d_dump = xb_dump;
- sc->xb_disk->d_name = name;
- sc->xb_disk->d_drv1 = sc;
- sc->xb_disk->d_sectorsize = sector_size;
+ mtx_lock(&sc->xbd_io_lock);
- sc->xb_disk->d_mediasize = sectors * sector_size;
- sc->xb_disk->d_maxsize = sc->max_request_size;
- sc->xb_disk->d_flags = 0;
- disk_create(sc->xb_disk, DISK_VERSION);
+ xbd_thaw(sc, XBDF_GNT_SHORTAGE);
- return error;
+ xbd_startio(sc);
+
+ mtx_unlock(&sc->xbd_io_lock);
}
-/************************ end VBD support *****************/
+static struct xbd_command *
+xbd_bio_command(struct xbd_softc *sc)
+{
+ struct xbd_command *cm;
+ struct bio *bp;
+ if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED))
+ return (NULL);
+
+ bp = xbd_dequeue_bio(sc);
+ if (bp == NULL)
+ return (NULL);
+
+ if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) {
+ xbd_freeze(sc, XBDF_CM_SHORTAGE);
+ xbd_requeue_bio(sc, bp);
+ return (NULL);
+ }
+
+ if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
+ &cm->cm_gref_head) != 0) {
+ gnttab_request_free_callback(&sc->xbd_callback,
+ xbd_restart_queue_callback, sc,
+ sc->xbd_max_request_segments);
+ xbd_freeze(sc, XBDF_GNT_SHORTAGE);
+ xbd_requeue_bio(sc, bp);
+ xbd_enqueue_cm(cm, XBD_Q_FREE);
+ return (NULL);
+ }
+
+ cm->cm_bp = bp;
+ cm->cm_data = bp->bio_data;
+ cm->cm_datalen = bp->bio_bcount;
+ cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno;
+
+ switch (bp->bio_cmd) {
+ case BIO_READ:
+ cm->cm_operation = BLKIF_OP_READ;
+ break;
+ case BIO_WRITE:
+ cm->cm_operation = BLKIF_OP_WRITE;
+ if ((bp->bio_flags & BIO_ORDERED) != 0) {
+ if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
+ cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
+ } else {
+ /*
+ * Single step this command.
+ */
+ cm->cm_flags |= XBDCF_Q_FREEZE;
+ if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
+ /*
+ * Wait for in-flight requests to
+ * finish.
+ */
+ xbd_freeze(sc, XBDF_WAIT_IDLE);
+ xbd_requeue_cm(cm, XBD_Q_READY);
+ return (NULL);
+ }
+ }
+ }
+ break;
+ case BIO_FLUSH:
+ if ((sc->xbd_flags & XBDF_FLUSH) != 0)
+ cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE;
+ else if ((sc->xbd_flags & XBDF_BARRIER) != 0)
+ cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
+ else
+ panic("flush request, but no flush support available");
+ break;
+ default:
+ panic("unknown bio command %d", bp->bio_cmd);
+ }
+
+ return (cm);
+}
+
/*
- * Read/write routine for a buffer. Finds the proper unit, place it on
- * the sortq and kick the controller.
+ * Dequeue buffers and place them in the shared communication ring.
+ * Return when no more requests can be accepted or all buffers have
+ * been queued.
+ *
+ * Signal XEN once the ring has been filled out.
*/
static void
-xb_strategy(struct bio *bp)
+xbd_startio(struct xbd_softc *sc)
{
- struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+ struct xbd_command *cm;
+ int error, queued = 0;
- /* bogus disk? */
- if (sc == NULL) {
- bp->bio_error = EINVAL;
- bp->bio_flags |= BIO_ERROR;
- bp->bio_resid = bp->bio_bcount;
- biodone(bp);
+ mtx_assert(&sc->xbd_io_lock, MA_OWNED);
+
+ if (sc->xbd_state != XBD_STATE_CONNECTED)
return;
- }
- /*
- * Place it in the queue of disk activities for this disk
- */
- mtx_lock(&sc->xb_io_lock);
+ while (!RING_FULL(&sc->xbd_ring)) {
- xb_enqueue_bio(sc, bp);
- xb_startio(sc);
+ if (sc->xbd_qfrozen_cnt != 0)
+ break;
- mtx_unlock(&sc->xb_io_lock);
- return;
+ cm = xbd_dequeue_cm(sc, XBD_Q_READY);
+
+ if (cm == NULL)
+ cm = xbd_bio_command(sc);
+
+ if (cm == NULL)
+ break;
+
+ if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) {
+ /*
+ * Single step command. Future work is
+ * held off until this command completes.
+ */
+ xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE);
+ }
+
+ if ((error = xbd_queue_request(sc, cm)) != 0) {
+ printf("xbd_queue_request returned %d\n", error);
+ break;
+ }
+ queued++;
+ }
+
+ if (queued != 0)
+ xbd_flush_requests(sc);
}
static void
-xb_bio_complete(struct xb_softc *sc, struct xb_command *cm)
+xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm)
{
struct bio *bp;
- bp = cm->bp;
+ bp = cm->cm_bp;
- if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) {
+ if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) {
disk_err(bp, "disk error" , -1, 0);
- printf(" status: %x\n", cm->status);
+ printf(" status: %x\n", cm->cm_status);
bp->bio_flags |= BIO_ERROR;
}
@@ -285,24 +469,107 @@
else
bp->bio_resid = 0;
- xb_free_command(cm);
+ xbd_free_command(cm);
biodone(bp);
}
-// Quiesce the disk writes for a dump file before allowing the next buffer.
static void
-xb_quiesce(struct xb_softc *sc)
+xbd_int(void *xsc)
{
- int mtd;
+ struct xbd_softc *sc = xsc;
+ struct xbd_command *cm;
+ blkif_response_t *bret;
+ RING_IDX i, rp;
+ int op;
+ mtx_lock(&sc->xbd_io_lock);
+
+ if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) {
+ mtx_unlock(&sc->xbd_io_lock);
+ return;
+ }
+
+ again:
+ rp = sc->xbd_ring.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ for (i = sc->xbd_ring.rsp_cons; i != rp;) {
+ bret = RING_GET_RESPONSE(&sc->xbd_ring, i);
+ cm = &sc->xbd_shadow[bret->id];
+
+ xbd_remove_cm(cm, XBD_Q_BUSY);
+ gnttab_end_foreign_access_references(cm->cm_nseg,
+ cm->cm_sg_refs);
+ i++;
+
+ if (cm->cm_operation == BLKIF_OP_READ)
+ op = BUS_DMASYNC_POSTREAD;
+ else if (cm->cm_operation == BLKIF_OP_WRITE ||
+ cm->cm_operation == BLKIF_OP_WRITE_BARRIER)
+ op = BUS_DMASYNC_POSTWRITE;
+ else
+ op = 0;
+ bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
+ bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map);
+
+ /*
+ * Release any hold this command has on future command
+ * dispatch.
+ */
+ xbd_cm_thaw(sc, cm);
+
+ /*
+ * Directly call the i/o complete routine to save an
+ * an indirection in the common case.
+ */
+ cm->cm_status = bret->status;
+ if (cm->cm_bp)
+ xbd_bio_complete(sc, cm);
+ else if (cm->cm_complete != NULL)
+ cm->cm_complete(cm);
+ else
+ xbd_free_command(cm);
+ }
+
+ sc->xbd_ring.rsp_cons = i;
+
+ if (i != sc->xbd_ring.req_prod_pvt) {
+ int more_to_do;
+ RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do);
+ if (more_to_do)
+ goto again;
+ } else {
+ sc->xbd_ring.sring->rsp_event = i + 1;
+ }
+
+ if (xbd_queue_length(sc, XBD_Q_BUSY) == 0)
+ xbd_thaw(sc, XBDF_WAIT_IDLE);
+
+ xbd_startio(sc);
+
+ if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED))
+ wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]);
+
+ mtx_unlock(&sc->xbd_io_lock);
+}
+
+/*------------------------------- Dump Support -------------------------------*/
+/**
+ * Quiesce the disk writes for a dump file before allowing the next buffer.
+ */
+static void
+xbd_quiesce(struct xbd_softc *sc)
+{
+ int mtd;
+
// While there are outstanding requests
- while (!TAILQ_EMPTY(&sc->cm_busy)) {
- RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd);
+ while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
+ RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd);
if (mtd) {
/* Recieved request completions, update queue. */
- blkif_int(sc);
+ xbd_int(sc);
}
- if (!TAILQ_EMPTY(&sc->cm_busy)) {
+ if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
/*
* Still pending requests, wait for the disk i/o
* to complete.
@@ -314,60 +581,60 @@
/* Kernel dump function for a paravirtualized disk device */
static void
-xb_dump_complete(struct xb_command *cm)
+xbd_dump_complete(struct xbd_command *cm)
{
- xb_enqueue_complete(cm);
+ xbd_enqueue_cm(cm, XBD_Q_COMPLETE);
}
static int
-xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
- size_t length)
+xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
+ size_t length)
{
- struct disk *dp = arg;
- struct xb_softc *sc = (struct xb_softc *) dp->d_drv1;
- struct xb_command *cm;
- size_t chunk;
- int sbp;
- int rc = 0;
+ struct disk *dp = arg;
+ struct xbd_softc *sc = dp->d_drv1;
+ struct xbd_command *cm;
+ size_t chunk;
+ int sbp;
+ int rc = 0;
if (length <= 0)
return (rc);
- xb_quiesce(sc); /* All quiet on the western front. */
+ xbd_quiesce(sc); /* All quiet on the western front. */
/*
* If this lock is held, then this module is failing, and a
* successful kernel dump is highly unlikely anyway.
*/
- mtx_lock(&sc->xb_io_lock);
+ mtx_lock(&sc->xbd_io_lock);
/* Split the 64KB block as needed */
for (sbp=0; length > 0; sbp++) {
- cm = xb_dequeue_free(sc);
+ cm = xbd_dequeue_cm(sc, XBD_Q_FREE);
if (cm == NULL) {
- mtx_unlock(&sc->xb_io_lock);
- device_printf(sc->xb_dev, "dump: no more commands?\n");
+ mtx_unlock(&sc->xbd_io_lock);
+ device_printf(sc->xbd_dev, "dump: no more commands?\n");
return (EBUSY);
}
- if (gnttab_alloc_grant_references(sc->max_request_segments,
- &cm->gref_head) != 0) {
- xb_free_command(cm);
- mtx_unlock(&sc->xb_io_lock);
- device_printf(sc->xb_dev, "no more grant allocs?\n");
+ if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
+ &cm->cm_gref_head) != 0) {
+ xbd_free_command(cm);
+ mtx_unlock(&sc->xbd_io_lock);
+ device_printf(sc->xbd_dev, "no more grant allocs?\n");
return (EBUSY);
}
- chunk = length > sc->max_request_size
- ? sc->max_request_size : length;
- cm->data = virtual;
- cm->datalen = chunk;
- cm->operation = BLKIF_OP_WRITE;
- cm->sector_number = offset / dp->d_sectorsize;
- cm->cm_complete = xb_dump_complete;
+ chunk = length > sc->xbd_max_request_size ?
+ sc->xbd_max_request_size : length;
+ cm->cm_data = virtual;
+ cm->cm_datalen = chunk;
+ cm->cm_operation = BLKIF_OP_WRITE;
+ cm->cm_sector_number = offset / dp->d_sectorsize;
+ cm->cm_complete = xbd_dump_complete;
- xb_enqueue_ready(cm);
+ xbd_enqueue_cm(cm, XBD_Q_READY);
length -= chunk;
offset += chunk;
@@ -375,175 +642,462 @@
}
/* Tell DOM0 to do the I/O */
- xb_startio(sc);
- mtx_unlock(&sc->xb_io_lock);
+ xbd_startio(sc);
+ mtx_unlock(&sc->xbd_io_lock);
/* Poll for the completion. */
- xb_quiesce(sc); /* All quite on the eastern front */
+ xbd_quiesce(sc); /* All quite on the eastern front */
/* If there were any errors, bail out... */
- while ((cm = xb_dequeue_complete(sc)) != NULL) {
- if (cm->status != BLKIF_RSP_OKAY) {
- device_printf(sc->xb_dev,
+ while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) {
+ if (cm->cm_status != BLKIF_RSP_OKAY) {
+ device_printf(sc->xbd_dev,
"Dump I/O failed at sector %jd\n",
- cm->sector_number);
+ cm->cm_sector_number);
rc = EIO;
}
- xb_free_command(cm);
+ xbd_free_command(cm);
}
return (rc);
}
+/*----------------------------- Disk Entrypoints -----------------------------*/
+static int
+xbd_open(struct disk *dp)
+{
+ struct xbd_softc *sc = dp->d_drv1;
+ if (sc == NULL) {
+ printf("xb%d: not found", sc->xbd_unit);
+ return (ENXIO);
+ }
+
+ sc->xbd_flags |= XBDF_OPEN;
+ sc->xbd_users++;
+ return (0);
+}
+
static int
-blkfront_probe(device_t dev)
+xbd_close(struct disk *dp)
{
+ struct xbd_softc *sc = dp->d_drv1;
- if (!strcmp(xenbus_get_type(dev), "vbd")) {
- device_set_desc(dev, "Virtual Block Device");
- device_quiet(dev);
- return (0);
+ if (sc == NULL)
+ return (ENXIO);
+ sc->xbd_flags &= ~XBDF_OPEN;
+ if (--(sc->xbd_users) == 0) {
+ /*
+ * Check whether we have been instructed to close. We will
+ * have ignored this request initially, as the device was
+ * still mounted.
+ */
+ if (xenbus_get_otherend_state(sc->xbd_dev) ==
+ XenbusStateClosing)
+ xbd_closing(sc->xbd_dev);
}
+ return (0);
+}
- return (ENXIO);
+static int
+xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
+{
+ struct xbd_softc *sc = dp->d_drv1;
+
+ if (sc == NULL)
+ return (ENXIO);
+
+ return (ENOTTY);
}
+/*
+ * Read/write routine for a buffer. Finds the proper unit, place it on
+ * the sortq and kick the controller.
+ */
static void
-xb_setup_sysctl(struct xb_softc *xb)
+xbd_strategy(struct bio *bp)
{
+ struct xbd_softc *sc = bp->bio_disk->d_drv1;
+
+ /* bogus disk? */
+ if (sc == NULL) {
+ bp->bio_error = EINVAL;
+ bp->bio_flags |= BIO_ERROR;
+ bp->bio_resid = bp->bio_bcount;
+ biodone(bp);
+ return;
+ }
+
+ /*
+ * Place it in the queue of disk activities for this disk
+ */
+ mtx_lock(&sc->xbd_io_lock);
+
+ xbd_enqueue_bio(sc, bp);
+ xbd_startio(sc);
+
+ mtx_unlock(&sc->xbd_io_lock);
+ return;
+}
+
+/*------------------------------ Ring Management -----------------------------*/
+static int
+xbd_alloc_ring(struct xbd_softc *sc)
+{
+ blkif_sring_t *sring;
+ uintptr_t sring_page_addr;
+ int error;
+ int i;
+
+ sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
+ M_NOWAIT|M_ZERO);
+ if (sring == NULL) {
+ xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring");
+ return (ENOMEM);
+ }
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE);
+
+ for (i = 0, sring_page_addr = (uintptr_t)sring;
+ i < sc->xbd_ring_pages;
+ i++, sring_page_addr += PAGE_SIZE) {
+
+ error = xenbus_grant_ring(sc->xbd_dev,
+ (vtomach(sring_page_addr) >> PAGE_SHIFT),
+ &sc->xbd_ring_ref[i]);
+ if (error) {
+ xenbus_dev_fatal(sc->xbd_dev, error,
+ "granting ring_ref(%d)", i);
+ return (error);
+ }
+ }
+ if (sc->xbd_ring_pages == 1) {
+ error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
+ "ring-ref", "%u", sc->xbd_ring_ref[0]);
+ if (error) {
+ xenbus_dev_fatal(sc->xbd_dev, error,
+ "writing %s/ring-ref",
+ xenbus_get_node(sc->xbd_dev));
+ return (error);
+ }
+ } else {
+ for (i = 0; i < sc->xbd_ring_pages; i++) {
+ char ring_ref_name[]= "ring_refXX";
+
+ snprintf(ring_ref_name, sizeof(ring_ref_name),
+ "ring-ref%u", i);
+ error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
+ ring_ref_name, "%u", sc->xbd_ring_ref[i]);
+ if (error) {
+ xenbus_dev_fatal(sc->xbd_dev, error,
+ "writing %s/%s",
+ xenbus_get_node(sc->xbd_dev),
+ ring_ref_name);
+ return (error);
+ }
+ }
+ }
+
+ error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev,
+ xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc,
+ INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle);
+ if (error) {
+ xenbus_dev_fatal(sc->xbd_dev, error,
+ "xen_intr_alloc_and_bind_local_port failed");
+ return (error);
+ }
+
+ return (0);
+}
+
+static void
+xbd_free_ring(struct xbd_softc *sc)
+{
+ int i;
+
+ if (sc->xbd_ring.sring == NULL)
+ return;
+
+ for (i = 0; i < sc->xbd_ring_pages; i++) {
+ if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) {
+ gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]);
+ sc->xbd_ring_ref[i] = GRANT_REF_INVALID;
+ }
+ }
+ free(sc->xbd_ring.sring, M_XENBLOCKFRONT);
+ sc->xbd_ring.sring = NULL;
+}
+
+/*-------------------------- Initialization/Teardown -------------------------*/
+static int
+xbd_feature_string(struct xbd_softc *sc, char *features, size_t len)
+{
+ struct sbuf sb;
+ int feature_cnt;
+
+ sbuf_new(&sb, features, len, SBUF_FIXEDLEN);
+
+ feature_cnt = 0;
+ if ((sc->xbd_flags & XBDF_FLUSH) != 0) {
+ sbuf_printf(&sb, "flush");
+ feature_cnt++;
+ }
+
+ if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
+ if (feature_cnt != 0)
+ sbuf_printf(&sb, ", ");
+ sbuf_printf(&sb, "write_barrier");
+ feature_cnt++;
+ }
+
+ if ((sc->xbd_flags & XBDF_DISCARD) != 0) {
+ if (feature_cnt != 0)
+ sbuf_printf(&sb, ", ");
+ sbuf_printf(&sb, "discard");
+ feature_cnt++;
+ }
+
+ if ((sc->xbd_flags & XBDF_PERSISTENT) != 0) {
+ if (feature_cnt != 0)
+ sbuf_printf(&sb, ", ");
+ sbuf_printf(&sb, "persistent_grants");
+ feature_cnt++;
+ }
+
+ (void) sbuf_finish(&sb);
+ return (sbuf_len(&sb));
+}
+
+static int
+xbd_sysctl_features(SYSCTL_HANDLER_ARGS)
+{
+ char features[80];
+ struct xbd_softc *sc = arg1;
+ int error;
+ int len;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+
+ len = xbd_feature_string(sc, features, sizeof(features));
+
+ /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */
+ return (SYSCTL_OUT(req, features, len + 1/*NUL*/));
+}
+
+static void
+xbd_setup_sysctl(struct xbd_softc *xbd)
+{
struct sysctl_ctx_list *sysctl_ctx = NULL;
- struct sysctl_oid *sysctl_tree = NULL;
+ struct sysctl_oid *sysctl_tree = NULL;
+ struct sysctl_oid_list *children;
- sysctl_ctx = device_get_sysctl_ctx(xb->xb_dev);
+ sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev);
if (sysctl_ctx == NULL)
return;
- sysctl_tree = device_get_sysctl_tree(xb->xb_dev);
+ sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev);
if (sysctl_tree == NULL)
return;
- SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
- "max_requests", CTLFLAG_RD, &xb->max_requests, -1,
- "maximum outstanding requests (negotiated)");
+ children = SYSCTL_CHILDREN(sysctl_tree);
+ SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+ "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
+ "maximum outstanding requests (negotiated)");
- SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
- "max_request_segments", CTLFLAG_RD,
- &xb->max_request_segments, 0,
- "maximum number of pages per requests (negotiated)");
+ SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+ "max_request_segments", CTLFLAG_RD,
+ &xbd->xbd_max_request_segments, 0,
+ "maximum number of pages per requests (negotiated)");
- SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
- "max_request_size", CTLFLAG_RD,
- &xb->max_request_size, 0,
- "maximum size in bytes of a request (negotiated)");
+ SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+ "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0,
+ "maximum size in bytes of a request (negotiated)");
- SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
- "ring_pages", CTLFLAG_RD,
- &xb->ring_pages, 0,
- "communication channel pages (negotiated)");
+ SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+ "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0,
+ "communication channel pages (negotiated)");
+
+ SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO,
+ "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0,
+ xbd_sysctl_features, "A", "protocol features (negotiated)");
}
/*
- * Setup supplies the backend dir, virtual device. We place an event
- * channel and shared frame entries. We watch backend to wait if it's
- * ok.
+ * Translate Linux major/minor to an appropriate name and unit
+ * number. For HVM guests, this allows us to use the same drive names
+ * with blkfront as the emulated drives, easing transition slightly.
*/
-static int
-blkfront_attach(device_t dev)
+static void
+xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
{
- struct xb_softc *sc;
- const char *name;
- uint32_t vdevice;
- int error;
+ static struct vdev_info {
+ int major;
+ int shift;
+ int base;
+ const char *name;
+ } info[] = {
+ {3, 6, 0, "ada"}, /* ide0 */
+ {22, 6, 2, "ada"}, /* ide1 */
+ {33, 6, 4, "ada"}, /* ide2 */
+ {34, 6, 6, "ada"}, /* ide3 */
+ {56, 6, 8, "ada"}, /* ide4 */
+ {57, 6, 10, "ada"}, /* ide5 */
+ {88, 6, 12, "ada"}, /* ide6 */
+ {89, 6, 14, "ada"}, /* ide7 */
+ {90, 6, 16, "ada"}, /* ide8 */
+ {91, 6, 18, "ada"}, /* ide9 */
+
+ {8, 4, 0, "da"}, /* scsi disk0 */
+ {65, 4, 16, "da"}, /* scsi disk1 */
+ {66, 4, 32, "da"}, /* scsi disk2 */
+ {67, 4, 48, "da"}, /* scsi disk3 */
+ {68, 4, 64, "da"}, /* scsi disk4 */
+ {69, 4, 80, "da"}, /* scsi disk5 */
+ {70, 4, 96, "da"}, /* scsi disk6 */
+ {71, 4, 112, "da"}, /* scsi disk7 */
+ {128, 4, 128, "da"}, /* scsi disk8 */
+ {129, 4, 144, "da"}, /* scsi disk9 */
+ {130, 4, 160, "da"}, /* scsi disk10 */
+ {131, 4, 176, "da"}, /* scsi disk11 */
+ {132, 4, 192, "da"}, /* scsi disk12 */
+ {133, 4, 208, "da"}, /* scsi disk13 */
+ {134, 4, 224, "da"}, /* scsi disk14 */
+ {135, 4, 240, "da"}, /* scsi disk15 */
+
+ {202, 4, 0, "xbd"}, /* xbd */
+
+ {0, 0, 0, NULL},
+ };
+ int major = vdevice >> 8;
+ int minor = vdevice & 0xff;
int i;
- int unit;
- /* FIXME: Use dynamic device id if this is not set. */
- error = xs_scanf(XST_NIL, xenbus_get_node(dev),
- "virtual-device", NULL, "%" PRIu32, &vdevice);
- if (error) {
- xenbus_dev_fatal(dev, error, "reading virtual-device");
- device_printf(dev, "Couldn't determine virtual device.\n");
- return (error);
+ if (vdevice & (1 << 28)) {
+ *unit = (vdevice & ((1 << 28) - 1)) >> 8;
+ *name = "xbd";
+ return;
}
- blkfront_vdevice_to_unit(vdevice, &unit, &name);
- if (!strcmp(name, "xbd"))
- device_set_unit(dev, unit);
+ for (i = 0; info[i].major; i++) {
+ if (info[i].major == major) {
+ *unit = info[i].base + (minor >> info[i].shift);
+ *name = info[i].name;
+ return;
+ }
+ }
- sc = device_get_softc(dev);
- mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
- xb_initq_free(sc);
- xb_initq_busy(sc);
- xb_initq_ready(sc);
- xb_initq_complete(sc);
- xb_initq_bio(sc);
- for (i = 0; i < XBF_MAX_RING_PAGES; i++)
- sc->ring_ref[i] = GRANT_INVALID_REF;
+ *unit = minor >> 4;
+ *name = "xbd";
+}
- sc->xb_dev = dev;
- sc->vdevice = vdevice;
- sc->connected = BLKIF_STATE_DISCONNECTED;
+int
+xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
+ int vdevice, uint16_t vdisk_info, unsigned long sector_size,
+ unsigned long phys_sector_size)
+{
+ char features[80];
+ int unit, error = 0;
+ const char *name;
- xb_setup_sysctl(sc);
+ xbd_vdevice_to_unit(vdevice, &unit, &name);
- /* Wait for backend device to publish its protocol capabilities. */
- xenbus_set_state(dev, XenbusStateInitialising);
+ sc->xbd_unit = unit;
- return (0);
-}
+ if (strcmp(name, "xbd") != 0)
+ device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit);
-static int
-blkfront_suspend(device_t dev)
-{
- struct xb_softc *sc = device_get_softc(dev);
- int retval;
- int saved_state;
+ if (xbd_feature_string(sc, features, sizeof(features)) > 0) {
+ device_printf(sc->xbd_dev, "features: %s\n",
+ features);
+ }
- /* Prevent new requests being issued until we fix things up. */
- mtx_lock(&sc->xb_io_lock);
- saved_state = sc->connected;
- sc->connected = BLKIF_STATE_SUSPENDED;
+ sc->xbd_disk = disk_alloc();
+ sc->xbd_disk->d_unit = sc->xbd_unit;
+ sc->xbd_disk->d_open = xbd_open;
+ sc->xbd_disk->d_close = xbd_close;
+ sc->xbd_disk->d_ioctl = xbd_ioctl;
+ sc->xbd_disk->d_strategy = xbd_strategy;
+ sc->xbd_disk->d_dump = xbd_dump;
+ sc->xbd_disk->d_name = name;
+ sc->xbd_disk->d_drv1 = sc;
+ sc->xbd_disk->d_sectorsize = sector_size;
+ sc->xbd_disk->d_stripesize = phys_sector_size;
+ sc->xbd_disk->d_stripeoffset = 0;
- /* Wait for outstanding I/O to drain. */
- retval = 0;
- while (TAILQ_EMPTY(&sc->cm_busy) == 0) {
- if (msleep(&sc->cm_busy, &sc->xb_io_lock,
- PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
- retval = EBUSY;
- break;
- }
+ sc->xbd_disk->d_mediasize = sectors * sector_size;
+ sc->xbd_disk->d_maxsize = sc->xbd_max_request_size;
+ sc->xbd_disk->d_flags = 0;
+ if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) {
+ sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
+ device_printf(sc->xbd_dev,
+ "synchronize cache commands enabled.\n");
}
- mtx_unlock(&sc->xb_io_lock);
+ disk_create(sc->xbd_disk, DISK_VERSION);
- if (retval != 0)
- sc->connected = saved_state;
-
- return (retval);
+ return error;
}
-static int
-blkfront_resume(device_t dev)
+static void
+xbd_free(struct xbd_softc *sc)
{
- struct xb_softc *sc = device_get_softc(dev);
+ int i;
+
+ /* Prevent new requests being issued until we fix things up. */
+ mtx_lock(&sc->xbd_io_lock);
+ sc->xbd_state = XBD_STATE_DISCONNECTED;
+ mtx_unlock(&sc->xbd_io_lock);
- DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
+ /* Free resources associated with old device channel. */
+ xbd_free_ring(sc);
+ if (sc->xbd_shadow) {
- blkif_free(sc);
- blkfront_initialize(sc);
- return (0);
+ for (i = 0; i < sc->xbd_max_requests; i++) {
+ struct xbd_command *cm;
+
+ cm = &sc->xbd_shadow[i];
+ if (cm->cm_sg_refs != NULL) {
+ free(cm->cm_sg_refs, M_XENBLOCKFRONT);
+ cm->cm_sg_refs = NULL;
+ }
+
+ if (cm->cm_indirectionpages != NULL) {
+ gnttab_end_foreign_access_references(
+ sc->xbd_max_request_indirectpages,
+ &cm->cm_indirectionrefs[0]);
+ contigfree(cm->cm_indirectionpages, PAGE_SIZE *
+ sc->xbd_max_request_indirectpages,
+ M_XENBLOCKFRONT);
+ cm->cm_indirectionpages = NULL;
+ }
+
+ bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map);
+ }
+ free(sc->xbd_shadow, M_XENBLOCKFRONT);
+ sc->xbd_shadow = NULL;
+
+ bus_dma_tag_destroy(sc->xbd_io_dmat);
+
+ xbd_initq_cm(sc, XBD_Q_FREE);
+ xbd_initq_cm(sc, XBD_Q_READY);
+ xbd_initq_cm(sc, XBD_Q_COMPLETE);
+ }
+
+ xen_intr_unbind(&sc->xen_intr_handle);
+
}
+/*--------------------------- State Change Handlers --------------------------*/
static void
-blkfront_initialize(struct xb_softc *sc)
+xbd_initialize(struct xbd_softc *sc)
{
const char *otherend_path;
const char *node_path;
uint32_t max_ring_page_order;
int error;
- int i;
- if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) {
+ if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) {
/* Initialization has already been performed. */
return;
}
@@ -553,10 +1107,7 @@
* setting fails.
*/
max_ring_page_order = 0;
- sc->ring_pages = 1;
- sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
- sc->max_request_size = XBF_SEGS_TO_SIZE(sc->max_request_segments);
- sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
+ sc->xbd_ring_pages = 1;
/*
* Protocol negotiation.
@@ -569,334 +1120,120 @@
* \note xs_scanf() does not update variables for unmatched
* fields.
*/
- otherend_path = xenbus_get_otherend_path(sc->xb_dev);
- node_path = xenbus_get_node(sc->xb_dev);
+ otherend_path = xenbus_get_otherend_path(sc->xbd_dev);
+ node_path = xenbus_get_node(sc->xbd_dev);
/* Support both backend schemes for relaying ring page limits. */
(void)xs_scanf(XST_NIL, otherend_path,
- "max-ring-page-order", NULL, "%" PRIu32,
- &max_ring_page_order);
- sc->ring_pages = 1 << max_ring_page_order;
+ "max-ring-page-order", NULL, "%" PRIu32,
+ &max_ring_page_order);
+ sc->xbd_ring_pages = 1 << max_ring_page_order;
(void)xs_scanf(XST_NIL, otherend_path,
- "max-ring-pages", NULL, "%" PRIu32,
- &sc->ring_pages);
- if (sc->ring_pages < 1)
- sc->ring_pages = 1;
+ "max-ring-pages", NULL, "%" PRIu32,
+ &sc->xbd_ring_pages);
+ if (sc->xbd_ring_pages < 1)
+ sc->xbd_ring_pages = 1;
- sc->max_requests = BLKIF_MAX_RING_REQUESTS(sc->ring_pages * PAGE_SIZE);
- (void)xs_scanf(XST_NIL, otherend_path,
- "max-requests", NULL, "%" PRIu32,
- &sc->max_requests);
-
- (void)xs_scanf(XST_NIL, otherend_path,
- "max-request-segments", NULL, "%" PRIu32,
- &sc->max_request_segments);
-
- (void)xs_scanf(XST_NIL, otherend_path,
- "max-request-size", NULL, "%" PRIu32,
- &sc->max_request_size);
-
- if (sc->ring_pages > XBF_MAX_RING_PAGES) {
- device_printf(sc->xb_dev, "Back-end specified ring-pages of "
- "%u limited to front-end limit of %zu.\n",
- sc->ring_pages, XBF_MAX_RING_PAGES);
- sc->ring_pages = XBF_MAX_RING_PAGES;
+ if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) {
+ device_printf(sc->xbd_dev,
+ "Back-end specified ring-pages of %u "
+ "limited to front-end limit of %u.\n",
+ sc->xbd_ring_pages, XBD_MAX_RING_PAGES);
+ sc->xbd_ring_pages = XBD_MAX_RING_PAGES;
}
- if (powerof2(sc->ring_pages) == 0) {
+ if (powerof2(sc->xbd_ring_pages) == 0) {
uint32_t new_page_limit;
- new_page_limit = 0x01 << (fls(sc->ring_pages) - 1);
- device_printf(sc->xb_dev, "Back-end specified ring-pages of "
- "%u is not a power of 2. Limited to %u.\n",
- sc->ring_pages, new_page_limit);
- sc->ring_pages = new_page_limit;
+ new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1);
+ device_printf(sc->xbd_dev,
+ "Back-end specified ring-pages of %u "
+ "is not a power of 2. Limited to %u.\n",
+ sc->xbd_ring_pages, new_page_limit);
+ sc->xbd_ring_pages = new_page_limit;
}
- if (sc->max_requests > XBF_MAX_REQUESTS) {
- device_printf(sc->xb_dev, "Back-end specified max_requests of "
- "%u limited to front-end limit of %u.\n",
- sc->max_requests, XBF_MAX_REQUESTS);
- sc->max_requests = XBF_MAX_REQUESTS;
+ sc->xbd_max_requests =
+ BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE);
+ if (sc->xbd_max_requests > XBD_MAX_REQUESTS) {
+ device_printf(sc->xbd_dev,
+ "Back-end specified max_requests of %u "
+ "limited to front-end limit of %zu.\n",
+ sc->xbd_max_requests, XBD_MAX_REQUESTS);
+ sc->xbd_max_requests = XBD_MAX_REQUESTS;
}
- if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) {
- device_printf(sc->xb_dev, "Back-end specified "
- "max_request_segments of %u limited to "
- "front-end limit of %u.\n",
- sc->max_request_segments,
- XBF_MAX_SEGMENTS_PER_REQUEST);
- sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST;
- }
-
- if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) {
- device_printf(sc->xb_dev, "Back-end specified "
- "max_request_size of %u limited to front-end "
- "limit of %u.\n", sc->max_request_size,
- XBF_MAX_REQUEST_SIZE);
- sc->max_request_size = XBF_MAX_REQUEST_SIZE;
- }
-
- if (sc->max_request_size > XBF_SEGS_TO_SIZE(sc->max_request_segments)) {
- device_printf(sc->xb_dev, "Back-end specified "
- "max_request_size of %u limited to front-end "
- "limit of %u. (Too few segments.)\n",
- sc->max_request_size,
- XBF_SEGS_TO_SIZE(sc->max_request_segments));
- sc->max_request_size =
- XBF_SEGS_TO_SIZE(sc->max_request_segments);
- }
-
- sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
-
- /* Allocate datastructures based on negotiated values. */
- error = bus_dma_tag_create(bus_get_dma_tag(sc->xb_dev), /* parent */
- 512, PAGE_SIZE, /* algnmnt, boundary */
- BUS_SPACE_MAXADDR, /* lowaddr */
- BUS_SPACE_MAXADDR, /* highaddr */
- NULL, NULL, /* filter, filterarg */
- sc->max_request_size,
- sc->max_request_segments,
- PAGE_SIZE, /* maxsegsize */
- BUS_DMA_ALLOCNOW, /* flags */
- busdma_lock_mutex, /* lockfunc */
- &sc->xb_io_lock, /* lockarg */
- &sc->xb_io_dmat);
- if (error != 0) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "Cannot allocate parent DMA tag\n");
+ if (xbd_alloc_ring(sc) != 0)
return;
- }
- /* Per-transaction data allocation. */
- sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests,
- M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
- if (sc->shadow == NULL) {
- bus_dma_tag_destroy(sc->xb_io_dmat);
- xenbus_dev_fatal(sc->xb_dev, error,
- "Cannot allocate request structures\n");
- return;
- }
-
- for (i = 0; i < sc->max_requests; i++) {
- struct xb_command *cm;
-
- cm = &sc->shadow[i];
- cm->sg_refs = malloc(sizeof(grant_ref_t)
- * sc->max_request_segments,
- M_XENBLOCKFRONT, M_NOWAIT);
- if (cm->sg_refs == NULL)
- break;
- cm->id = i;
- cm->cm_sc = sc;
- if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0)
- break;
- xb_free_command(cm);
- }
-
- if (setup_blkring(sc) != 0)
- return;
-
/* Support both backend schemes for relaying ring page limits. */
- if (sc->ring_pages > 1) {
+ if (sc->xbd_ring_pages > 1) {
error = xs_printf(XST_NIL, node_path,
- "num-ring-pages","%u", sc->ring_pages);
+ "num-ring-pages","%u",
+ sc->xbd_ring_pages);
if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "writing %s/num-ring-pages",
- node_path);
+ xenbus_dev_fatal(sc->xbd_dev, error,
+ "writing %s/num-ring-pages",
+ node_path);
return;
}
error = xs_printf(XST_NIL, node_path,
- "ring-page-order", "%u",
- fls(sc->ring_pages) - 1);
+ "ring-page-order", "%u",
+ fls(sc->xbd_ring_pages) - 1);
if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "writing %s/ring-page-order",
- node_path);
+ xenbus_dev_fatal(sc->xbd_dev, error,
+ "writing %s/ring-page-order",
+ node_path);
return;
}
}
- error = xs_printf(XST_NIL, node_path,
- "max-requests","%u", sc->max_requests);
- if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "writing %s/max-requests",
- node_path);
- return;
- }
-
- error = xs_printf(XST_NIL, node_path,
- "max-request-segments","%u", sc->max_request_segments);
- if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "writing %s/max-request-segments",
- node_path);
- return;
- }
-
- error = xs_printf(XST_NIL, node_path,
- "max-request-size","%u", sc->max_request_size);
- if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "writing %s/max-request-size",
- node_path);
- return;
- }
-
error = xs_printf(XST_NIL, node_path, "event-channel",
- "%u", irq_to_evtchn_port(sc->irq));
+ "%u", xen_intr_port(sc->xen_intr_handle));
if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "writing %s/event-channel",
- node_path);
+ xenbus_dev_fatal(sc->xbd_dev, error,
+ "writing %s/event-channel",
+ node_path);
return;
}
- error = xs_printf(XST_NIL, node_path,
- "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
+ error = xs_printf(XST_NIL, node_path, "protocol",
+ "%s", XEN_IO_PROTO_ABI_NATIVE);
if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "writing %s/protocol",
- node_path);
+ xenbus_dev_fatal(sc->xbd_dev, error,
+ "writing %s/protocol",
+ node_path);
return;
}
- xenbus_set_state(sc->xb_dev, XenbusStateInitialised);
+ xenbus_set_state(sc->xbd_dev, XenbusStateInitialised);
}
-static int
-setup_blkring(struct xb_softc *sc)
-{
- blkif_sring_t *sring;
- uintptr_t sring_page_addr;
- int error;
- int i;
-
- sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
- M_NOWAIT|M_ZERO);
- if (sring == NULL) {
- xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring");
- return (ENOMEM);
- }
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE);
-
- for (i = 0, sring_page_addr = (uintptr_t)sring;
- i < sc->ring_pages;
- i++, sring_page_addr += PAGE_SIZE) {
-
- error = xenbus_grant_ring(sc->xb_dev,
- (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]);
- if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "granting ring_ref(%d)", i);
- return (error);
- }
- }
- if (sc->ring_pages == 1) {
- error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
- "ring-ref", "%u", sc->ring_ref[0]);
- if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "writing %s/ring-ref",
- xenbus_get_node(sc->xb_dev));
- return (error);
- }
- } else {
- for (i = 0; i < sc->ring_pages; i++) {
- char ring_ref_name[]= "ring_refXX";
-
- snprintf(ring_ref_name, sizeof(ring_ref_name),
- "ring-ref%u", i);
- error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
- ring_ref_name, "%u", sc->ring_ref[i]);
- if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "writing %s/%s",
- xenbus_get_node(sc->xb_dev),
- ring_ref_name);
- return (error);
- }
- }
- }
-
- error = bind_listening_port_to_irqhandler(
- xenbus_get_otherend_id(sc->xb_dev),
- "xbd", (driver_intr_t *)blkif_int, sc,
- INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq);
- if (error) {
- xenbus_dev_fatal(sc->xb_dev, error,
- "bind_evtchn_to_irqhandler failed");
- return (error);
- }
-
- return (0);
-}
-
-/**
- * Callback received when the backend's state changes.
+/*
+ * Invoked when the backend is finally 'ready' (and has published
+ * the details about the physical device - #sectors, size, etc).
*/
-static void
-blkfront_backend_changed(device_t dev, XenbusState backend_state)
-{
- struct xb_softc *sc = device_get_softc(dev);
-
- DPRINTK("backend_state=%d\n", backend_state);
-
- switch (backend_state) {
- case XenbusStateUnknown:
- case XenbusStateInitialising:
- case XenbusStateReconfigured:
- case XenbusStateReconfiguring:
- case XenbusStateClosed:
- break;
-
- case XenbusStateInitWait:
- case XenbusStateInitialised:
- blkfront_initialize(sc);
- break;
-
- case XenbusStateConnected:
- blkfront_initialize(sc);
- blkfront_connect(sc);
- break;
-
- case XenbusStateClosing:
- if (sc->users > 0)
- xenbus_dev_error(dev, -EBUSY,
- "Device in use; refusing to close");
- else
- blkfront_closing(dev);
- break;
- }
-}
-
-/*
-** Invoked when the backend is finally 'ready' (and has published
-** the details about the physical device - #sectors, size, etc).
-*/
static void
-blkfront_connect(struct xb_softc *sc)
+xbd_connect(struct xbd_softc *sc)
{
- device_t dev = sc->xb_dev;
- unsigned long sectors, sector_size;
+ device_t dev = sc->xbd_dev;
+ unsigned long sectors, sector_size, phys_sector_size;
unsigned int binfo;
- int err, feature_barrier;
+ int err, feature_barrier, feature_flush;
+ int i, j;
- if( (sc->connected == BLKIF_STATE_CONNECTED) ||
- (sc->connected == BLKIF_STATE_SUSPENDED) )
+ if (sc->xbd_state == XBD_STATE_CONNECTED ||
+ sc->xbd_state == XBD_STATE_SUSPENDED)
return;
DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
- "sectors", "%lu", §ors,
- "info", "%u", &binfo,
- "sector-size", "%lu", §or_size,
- NULL);
+ "sectors", "%lu", §ors,
+ "info", "%u", &binfo,
+ "sector-size", "%lu", §or_size,
+ NULL);
if (err) {
xenbus_dev_fatal(dev, err,
"reading backend fields at %s",
@@ -903,13 +1240,114 @@
xenbus_get_otherend_path(dev));
return;
}
+ if ((sectors == 0) || (sector_size == 0)) {
+ xenbus_dev_fatal(dev, 0,
+ "invalid parameters from %s:"
+ " sectors = %lu, sector_size = %lu",
+ xenbus_get_otherend_path(dev),
+ sectors, sector_size);
+ return;
+ }
err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
- "feature-barrier", "%lu", &feature_barrier,
- NULL);
- if (!err || feature_barrier)
- sc->xb_flags |= XB_BARRIER;
+ "physical-sector-size", "%lu", &phys_sector_size,
+ NULL);
+ if (err || phys_sector_size <= sector_size)
+ phys_sector_size = 0;
+ err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
+ "feature-barrier", "%d", &feature_barrier,
+ NULL);
+ if (err == 0 && feature_barrier != 0)
+ sc->xbd_flags |= XBDF_BARRIER;
- if (sc->xb_disk == NULL) {
+ err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
+ "feature-flush-cache", "%d", &feature_flush,
+ NULL);
+ if (err == 0 && feature_flush != 0)
+ sc->xbd_flags |= XBDF_FLUSH;
+
+ err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
+ "feature-max-indirect-segments", "%" PRIu32,
+ &sc->xbd_max_request_segments, NULL);
+ if ((err != 0) || (xbd_enable_indirect == 0))
+ sc->xbd_max_request_segments = 0;
+ if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS)
+ sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS;
+ if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(MAXPHYS))
+ sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(MAXPHYS);
+ sc->xbd_max_request_indirectpages =
+ XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments);
+ if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
+ sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ sc->xbd_max_request_size =
+ XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments);
+
+ /* Allocate datastructures based on negotiated values. */
+ err = bus_dma_tag_create(
+ bus_get_dma_tag(sc->xbd_dev), /* parent */
+ 512, PAGE_SIZE, /* algnmnt, boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ sc->xbd_max_request_size,
+ sc->xbd_max_request_segments,
+ PAGE_SIZE, /* maxsegsize */
+ BUS_DMA_ALLOCNOW, /* flags */
+ busdma_lock_mutex, /* lockfunc */
+ &sc->xbd_io_lock, /* lockarg */
+ &sc->xbd_io_dmat);
+ if (err != 0) {
+ xenbus_dev_fatal(sc->xbd_dev, err,
+ "Cannot allocate parent DMA tag\n");
+ return;
+ }
+
+ /* Per-transaction data allocation. */
+ sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests,
+ M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
+ if (sc->xbd_shadow == NULL) {
+ bus_dma_tag_destroy(sc->xbd_io_dmat);
+ xenbus_dev_fatal(sc->xbd_dev, ENOMEM,
+ "Cannot allocate request structures\n");
+ return;
+ }
+
+ for (i = 0; i < sc->xbd_max_requests; i++) {
+ struct xbd_command *cm;
+ void * indirectpages;
+
+ cm = &sc->xbd_shadow[i];
+ cm->cm_sg_refs = malloc(
+ sizeof(grant_ref_t) * sc->xbd_max_request_segments,
+ M_XENBLOCKFRONT, M_NOWAIT);
+ if (cm->cm_sg_refs == NULL)
+ break;
+ cm->cm_id = i;
+ cm->cm_flags = XBDCF_INITIALIZER;
+ cm->cm_sc = sc;
+ if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0)
+ break;
+ if (sc->xbd_max_request_indirectpages > 0) {
+ indirectpages = contigmalloc(
+ PAGE_SIZE * sc->xbd_max_request_indirectpages,
+ M_XENBLOCKFRONT, M_ZERO, 0, ~0, PAGE_SIZE, 0);
+ } else {
+ indirectpages = NULL;
+ }
+ for (j = 0; j < sc->xbd_max_request_indirectpages; j++) {
+ if (gnttab_grant_foreign_access(
+ xenbus_get_otherend_id(sc->xbd_dev),
+ (vtomach(indirectpages) >> PAGE_SHIFT) + j,
+ 1 /* grant read-only access */,
+ &cm->cm_indirectionrefs[j]))
+ break;
+ }
+ if (j < sc->xbd_max_request_indirectpages)
+ break;
+ cm->cm_indirectionpages = indirectpages;
+ xbd_free_command(cm);
+ }
+
+ if (sc->xbd_disk == NULL) {
device_printf(dev, "%juMB <%s> at %s",
(uintmax_t) sectors / (1048576 / sector_size),
device_get_desc(dev),
@@ -916,17 +1354,18 @@
xenbus_get_node(dev));
bus_print_child_footer(device_get_parent(dev), dev);
- xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size);
+ xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo,
+ sector_size, phys_sector_size);
}
(void)xenbus_set_state(dev, XenbusStateConnected);
/* Kick pending requests. */
- mtx_lock(&sc->xb_io_lock);
- sc->connected = BLKIF_STATE_CONNECTED;
- xb_startio(sc);
- sc->xb_flags |= XB_READY;
- mtx_unlock(&sc->xb_io_lock);
+ mtx_lock(&sc->xbd_io_lock);
+ sc->xbd_state = XBD_STATE_CONNECTED;
+ xbd_startio(sc);
+ sc->xbd_flags |= XBDF_READY;
+ mtx_unlock(&sc->xbd_io_lock);
}
/**
@@ -936,493 +1375,236 @@
* acknowledgement.
*/
static void
-blkfront_closing(device_t dev)
+xbd_closing(device_t dev)
{
- struct xb_softc *sc = device_get_softc(dev);
+ struct xbd_softc *sc = device_get_softc(dev);
xenbus_set_state(dev, XenbusStateClosing);
- DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
+ DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev));
- if (sc->xb_disk != NULL) {
- disk_destroy(sc->xb_disk);
- sc->xb_disk = NULL;
+ if (sc->xbd_disk != NULL) {
+ disk_destroy(sc->xbd_disk);
+ sc->xbd_disk = NULL;
}
xenbus_set_state(dev, XenbusStateClosed);
}
-
+/*---------------------------- NewBus Entrypoints ----------------------------*/
static int
-blkfront_detach(device_t dev)
+xbd_probe(device_t dev)
{
- struct xb_softc *sc = device_get_softc(dev);
+ if (strcmp(xenbus_get_type(dev), "vbd") != 0)
+ return (ENXIO);
- DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
+#ifdef XENHVM
+ if (xen_disable_pv_disks != 0)
+ return (ENXIO);
+#endif
- blkif_free(sc);
- mtx_destroy(&sc->xb_io_lock);
+ if (xen_hvm_domain()) {
+ int error;
+ char *type;
- return 0;
-}
+ /*
+ * When running in an HVM domain, IDE disk emulation is
+ * disabled early in boot so that native drivers will
+ * not see emulated hardware. However, CDROM device
+ * emulation cannot be disabled.
+ *
+ * Through use of FreeBSD's vm_guest and xen_hvm_domain()
+ * APIs, we could modify the native CDROM driver to fail its
+ * probe when running under Xen. Unfortunatlely, the PV
+ * CDROM support in XenServer (up through at least version
+ * 6.2) isn't functional, so we instead rely on the emulated
+ * CDROM instance, and fail to attach the PV one here in
+ * the blkfront driver.
+ */
+ error = xs_read(XST_NIL, xenbus_get_node(dev),
+ "device-type", NULL, (void **) &type);
+ if (error)
+ return (ENXIO);
+ if (strncmp(type, "cdrom", 5) == 0) {
+ free(type, M_XENSTORE);
+ return (ENXIO);
+ }
+ free(type, M_XENSTORE);
+ }
-static inline void
-flush_requests(struct xb_softc *sc)
-{
- int notify;
-
- RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify);
-
- if (notify)
- notify_remote_via_irq(sc->irq);
+ device_set_desc(dev, "Virtual Block Device");
+ device_quiet(dev);
+ return (0);
}
-static void
-blkif_restart_queue_callback(void *arg)
+/*
+ * Setup supplies the backend dir, virtual device. We place an event
+ * channel and shared frame entries. We watch backend to wait if it's
+ * ok.
+ */
+static int
+xbd_attach(device_t dev)
{
- struct xb_softc *sc = arg;
+ struct xbd_softc *sc;
+ const char *name;
+ uint32_t vdevice;
+ int error;
+ int i;
+ int unit;
- mtx_lock(&sc->xb_io_lock);
+ /* FIXME: Use dynamic device id if this is not set. */
+ error = xs_scanf(XST_NIL, xenbus_get_node(dev),
+ "virtual-device", NULL, "%" PRIu32, &vdevice);
+ if (error)
+ error = xs_scanf(XST_NIL, xenbus_get_node(dev),
+ "virtual-device-ext", NULL, "%" PRIu32, &vdevice);
+ if (error) {
+ xenbus_dev_fatal(dev, error, "reading virtual-device");
+ device_printf(dev, "Couldn't determine virtual device.\n");
+ return (error);
+ }
- xb_startio(sc);
+ xbd_vdevice_to_unit(vdevice, &unit, &name);
+ if (!strcmp(name, "xbd"))
+ device_set_unit(dev, unit);
- mtx_unlock(&sc->xb_io_lock);
-}
+ sc = device_get_softc(dev);
+ mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
+ xbd_initqs(sc);
+ for (i = 0; i < XBD_MAX_RING_PAGES; i++)
+ sc->xbd_ring_ref[i] = GRANT_REF_INVALID;
-static int
-blkif_open(struct disk *dp)
-{
- struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
+ sc->xbd_dev = dev;
+ sc->xbd_vdevice = vdevice;
+ sc->xbd_state = XBD_STATE_DISCONNECTED;
- if (sc == NULL) {
- printf("xb%d: not found", sc->xb_unit);
- return (ENXIO);
- }
+ xbd_setup_sysctl(sc);
- sc->xb_flags |= XB_OPEN;
- sc->users++;
- return (0);
-}
+ /* Wait for backend device to publish its protocol capabilities. */
+ xenbus_set_state(dev, XenbusStateInitialising);
-static int
-blkif_close(struct disk *dp)
-{
- struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
-
- if (sc == NULL)
- return (ENXIO);
- sc->xb_flags &= ~XB_OPEN;
- if (--(sc->users) == 0) {
- /*
- * Check whether we have been instructed to close. We will
- * have ignored this request initially, as the device was
- * still mounted.
- */
- if (xenbus_get_otherend_state(sc->xb_dev) == XenbusStateClosing)
- blkfront_closing(sc->xb_dev);
- }
return (0);
}
static int
-blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
+xbd_detach(device_t dev)
{
- struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
+ struct xbd_softc *sc = device_get_softc(dev);
- if (sc == NULL)
- return (ENXIO);
+ DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev));
- return (ENOTTY);
-}
+ xbd_free(sc);
+ mtx_destroy(&sc->xbd_io_lock);
-static void
-xb_free_command(struct xb_command *cm)
-{
-
- KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0,
- ("Freeing command that is still on a queue\n"));
-
- cm->cm_flags = 0;
- cm->bp = NULL;
- cm->cm_complete = NULL;
- xb_enqueue_free(cm);
+ return 0;
}
-/*
- * blkif_queue_request
- *
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- * virtual address in the guest os.
- */
-static struct xb_command *
-xb_bio_command(struct xb_softc *sc)
+static int
+xbd_suspend(device_t dev)
{
- struct xb_command *cm;
- struct bio *bp;
+ struct xbd_softc *sc = device_get_softc(dev);
+ int retval;
+ int saved_state;
- if (unlikely(sc->connected != BLKIF_STATE_CONNECTED))
- return (NULL);
+ /* Prevent new requests being issued until we fix things up. */
+ mtx_lock(&sc->xbd_io_lock);
+ saved_state = sc->xbd_state;
+ sc->xbd_state = XBD_STATE_SUSPENDED;
- bp = xb_dequeue_bio(sc);
- if (bp == NULL)
- return (NULL);
-
- if ((cm = xb_dequeue_free(sc)) == NULL) {
- xb_requeue_bio(sc, bp);
- return (NULL);
+ /* Wait for outstanding I/O to drain. */
+ retval = 0;
+ while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
+ if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock,
+ PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
+ retval = EBUSY;
+ break;
+ }
}
+ mtx_unlock(&sc->xbd_io_lock);
- if (gnttab_alloc_grant_references(sc->max_request_segments,
- &cm->gref_head) != 0) {
- gnttab_request_free_callback(&sc->callback,
- blkif_restart_queue_callback, sc,
- sc->max_request_segments);
- xb_requeue_bio(sc, bp);
- xb_enqueue_free(cm);
- sc->xb_flags |= XB_FROZEN;
- return (NULL);
- }
+ if (retval != 0)
+ sc->xbd_state = saved_state;
- cm->bp = bp;
- cm->data = bp->bio_data;
- cm->datalen = bp->bio_bcount;
- cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
- BLKIF_OP_WRITE;
- cm->sector_number = (blkif_sector_t)bp->bio_pblkno;
-
- return (cm);
+ return (retval);
}
static int
-blkif_queue_request(struct xb_softc *sc, struct xb_command *cm)
+xbd_resume(device_t dev)
{
- int error;
+ struct xbd_softc *sc = device_get_softc(dev);
- error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen,
- blkif_queue_cb, cm, 0);
- if (error == EINPROGRESS) {
- printf("EINPROGRESS\n");
- sc->xb_flags |= XB_FROZEN;
- cm->cm_flags |= XB_CMD_FROZEN;
+ if (xen_suspend_cancelled) {
+ sc->xbd_state = XBD_STATE_CONNECTED;
return (0);
}
- return (error);
-}
+ DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev));
-static void
-blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
-{
- struct xb_softc *sc;
- struct xb_command *cm;
- blkif_request_t *ring_req;
- struct blkif_request_segment *sg;
- struct blkif_request_segment *last_block_sg;
- grant_ref_t *sg_ref;
- vm_paddr_t buffer_ma;
- uint64_t fsect, lsect;
- int ref;
- int op;
- int block_segs;
-
- cm = arg;
- sc = cm->cm_sc;
-
-//printf("%s: Start\n", __func__);
- if (error) {
- printf("error %d in blkif_queue_cb\n", error);
- cm->bp->bio_error = EIO;
- biodone(cm->bp);
- xb_free_command(cm);
- return;
- }
-
- /* Fill out a communications ring structure. */
- ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
- sc->ring.req_prod_pvt++;
- ring_req->id = cm->id;
- ring_req->operation = cm->operation;
- ring_req->sector_number = cm->sector_number;
- ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
- ring_req->nr_segments = nsegs;
- cm->nseg = nsegs;
-
- block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
- sg = ring_req->seg;
- last_block_sg = sg + block_segs;
- sg_ref = cm->sg_refs;
-
- while (1) {
-
- while (sg < last_block_sg) {
- buffer_ma = segs->ds_addr;
- fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
- lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1;
-
- KASSERT(lsect <= 7, ("XEN disk driver data cannot "
- "cross a page boundary"));
-
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&cm->gref_head);
-
- /*
- * GNTTAB_LIST_END == 0xffffffff, but it is private
- * to gnttab.c.
- */
- KASSERT(ref != ~0, ("grant_reference failed"));
-
- gnttab_grant_foreign_access_ref(
- ref,
- xenbus_get_otherend_id(sc->xb_dev),
- buffer_ma >> PAGE_SHIFT,
- ring_req->operation == BLKIF_OP_WRITE);
-
- *sg_ref = ref;
- *sg = (struct blkif_request_segment) {
- .gref = ref,
- .first_sect = fsect,
- .last_sect = lsect };
- sg++;
- sg_ref++;
- segs++;
- nsegs--;
- }
- block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
- if (block_segs == 0)
- break;
-
- sg = BLKRING_GET_SEG_BLOCK(&sc->ring, sc->ring.req_prod_pvt);
- sc->ring.req_prod_pvt++;
- last_block_sg = sg + block_segs;
- }
-
- if (cm->operation == BLKIF_OP_READ)
- op = BUS_DMASYNC_PREREAD;
- else if (cm->operation == BLKIF_OP_WRITE)
- op = BUS_DMASYNC_PREWRITE;
- else
- op = 0;
- bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
-
- gnttab_free_grant_references(cm->gref_head);
-
- xb_enqueue_busy(cm);
-
- /*
- * This flag means that we're probably executing in the busdma swi
- * instead of in the startio context, so an explicit flush is needed.
- */
- if (cm->cm_flags & XB_CMD_FROZEN)
- flush_requests(sc);
-
-//printf("%s: Done\n", __func__);
- return;
+ xbd_free(sc);
+ xbd_initialize(sc);
+ return (0);
}
-/*
- * Dequeue buffers and place them in the shared communication ring.
- * Return when no more requests can be accepted or all buffers have
- * been queued.
- *
- * Signal XEN once the ring has been filled out.
+/**
+ * Callback received when the backend's state changes.
*/
static void
-xb_startio(struct xb_softc *sc)
+xbd_backend_changed(device_t dev, XenbusState backend_state)
{
- struct xb_command *cm;
- int error, queued = 0;
+ struct xbd_softc *sc = device_get_softc(dev);
- mtx_assert(&sc->xb_io_lock, MA_OWNED);
+ DPRINTK("backend_state=%d\n", backend_state);
- if (sc->connected != BLKIF_STATE_CONNECTED)
- return;
+ switch (backend_state) {
+ case XenbusStateUnknown:
+ case XenbusStateInitialising:
+ case XenbusStateReconfigured:
+ case XenbusStateReconfiguring:
+ case XenbusStateClosed:
+ break;
- while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) {
- if (sc->xb_flags & XB_FROZEN)
- break;
+ case XenbusStateInitWait:
+ case XenbusStateInitialised:
+ xbd_initialize(sc);
+ break;
- cm = xb_dequeue_ready(sc);
+ case XenbusStateConnected:
+ xbd_initialize(sc);
+ xbd_connect(sc);
+ break;
- if (cm == NULL)
- cm = xb_bio_command(sc);
-
- if (cm == NULL)
- break;
-
- if ((error = blkif_queue_request(sc, cm)) != 0) {
- printf("blkif_queue_request returned %d\n", error);
- break;
- }
- queued++;
- }
-
- if (queued != 0)
- flush_requests(sc);
-}
-
-static void
-blkif_int(void *xsc)
-{
- struct xb_softc *sc = xsc;
- struct xb_command *cm;
- blkif_response_t *bret;
- RING_IDX i, rp;
- int op;
-
- mtx_lock(&sc->xb_io_lock);
-
- if (unlikely(sc->connected == BLKIF_STATE_DISCONNECTED)) {
- mtx_unlock(&sc->xb_io_lock);
- return;
- }
-
- again:
- rp = sc->ring.sring->rsp_prod;
- rmb(); /* Ensure we see queued responses up to 'rp'. */
-
- for (i = sc->ring.rsp_cons; i != rp;) {
- bret = RING_GET_RESPONSE(&sc->ring, i);
- cm = &sc->shadow[bret->id];
-
- xb_remove_busy(cm);
- i += blkif_completion(cm);
-
- if (cm->operation == BLKIF_OP_READ)
- op = BUS_DMASYNC_POSTREAD;
- else if (cm->operation == BLKIF_OP_WRITE)
- op = BUS_DMASYNC_POSTWRITE;
+ case XenbusStateClosing:
+ if (sc->xbd_users > 0)
+ xenbus_dev_error(dev, -EBUSY,
+ "Device in use; refusing to close");
else
- op = 0;
- bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
- bus_dmamap_unload(sc->xb_io_dmat, cm->map);
-
- /*
- * If commands are completing then resources are probably
- * being freed as well. It's a cheap assumption even when
- * wrong.
- */
- sc->xb_flags &= ~XB_FROZEN;
-
- /*
- * Directly call the i/o complete routine to save an
- * an indirection in the common case.
- */
- cm->status = bret->status;
- if (cm->bp)
- xb_bio_complete(sc, cm);
- else if (cm->cm_complete)
- (cm->cm_complete)(cm);
- else
- xb_free_command(cm);
+ xbd_closing(dev);
+ break;
}
-
- sc->ring.rsp_cons = i;
-
- if (i != sc->ring.req_prod_pvt) {
- int more_to_do;
- RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do);
- if (more_to_do)
- goto again;
- } else {
- sc->ring.sring->rsp_event = i + 1;
- }
-
- xb_startio(sc);
-
- if (unlikely(sc->connected == BLKIF_STATE_SUSPENDED))
- wakeup(&sc->cm_busy);
-
- mtx_unlock(&sc->xb_io_lock);
}
-static void
-blkif_free(struct xb_softc *sc)
-{
- uint8_t *sring_page_ptr;
- int i;
-
- /* Prevent new requests being issued until we fix things up. */
- mtx_lock(&sc->xb_io_lock);
- sc->connected = BLKIF_STATE_DISCONNECTED;
- mtx_unlock(&sc->xb_io_lock);
-
- /* Free resources associated with old device channel. */
- if (sc->ring.sring != NULL) {
- sring_page_ptr = (uint8_t *)sc->ring.sring;
- for (i = 0; i < sc->ring_pages; i++) {
- if (sc->ring_ref[i] != GRANT_INVALID_REF) {
- gnttab_end_foreign_access_ref(sc->ring_ref[i]);
- sc->ring_ref[i] = GRANT_INVALID_REF;
- }
- sring_page_ptr += PAGE_SIZE;
- }
- free(sc->ring.sring, M_XENBLOCKFRONT);
- sc->ring.sring = NULL;
- }
-
- if (sc->shadow) {
-
- for (i = 0; i < sc->max_requests; i++) {
- struct xb_command *cm;
-
- cm = &sc->shadow[i];
- if (cm->sg_refs != NULL) {
- free(cm->sg_refs, M_XENBLOCKFRONT);
- cm->sg_refs = NULL;
- }
-
- bus_dmamap_destroy(sc->xb_io_dmat, cm->map);
- }
- free(sc->shadow, M_XENBLOCKFRONT);
- sc->shadow = NULL;
-
- bus_dma_tag_destroy(sc->xb_io_dmat);
-
- xb_initq_free(sc);
- xb_initq_ready(sc);
- xb_initq_complete(sc);
- }
-
- if (sc->irq) {
- unbind_from_irqhandler(sc->irq);
- sc->irq = 0;
- }
-}
-
-static int
-blkif_completion(struct xb_command *s)
-{
-//printf("%s: Req %p(%d)\n", __func__, s, s->nseg);
- gnttab_end_foreign_access_references(s->nseg, s->sg_refs);
- return (BLKIF_SEGS_TO_BLOCKS(s->nseg));
-}
-
-/* ** Driver registration ** */
-static device_method_t blkfront_methods[] = {
+/*---------------------------- NewBus Registration ---------------------------*/
+static device_method_t xbd_methods[] = {
/* Device interface */
- DEVMETHOD(device_probe, blkfront_probe),
- DEVMETHOD(device_attach, blkfront_attach),
- DEVMETHOD(device_detach, blkfront_detach),
+ DEVMETHOD(device_probe, xbd_probe),
+ DEVMETHOD(device_attach, xbd_attach),
+ DEVMETHOD(device_detach, xbd_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, blkfront_suspend),
- DEVMETHOD(device_resume, blkfront_resume),
+ DEVMETHOD(device_suspend, xbd_suspend),
+ DEVMETHOD(device_resume, xbd_resume),
/* Xenbus interface */
- DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed),
+ DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed),
{ 0, 0 }
};
-static driver_t blkfront_driver = {
+static driver_t xbd_driver = {
"xbd",
- blkfront_methods,
- sizeof(struct xb_softc),
+ xbd_methods,
+ sizeof(struct xbd_softc),
};
-devclass_t blkfront_devclass;
+devclass_t xbd_devclass;
-DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0);
+DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0);
Modified: trunk/sys/dev/xen/blkfront/block.h
===================================================================
--- trunk/sys/dev/xen/blkfront/block.h 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/blkfront/block.h 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,6 +1,8 @@
+/* $MidnightBSD$ */
/*
* XenBSD block device driver
*
+ * Copyright (c) 2010-2013 Spectra Logic Corporation
* Copyright (c) 2009 Scott Long, Yahoo!
* Copyright (c) 2009 Frank Suchomel, Citrix
* Copyright (c) 2009 Doug F. Rabson, Citrix
@@ -26,12 +28,11 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/xen/blkfront/block.h 298960 2016-05-03 07:52:06Z mav $
*/
-
-#ifndef __XEN_DRIVERS_BLOCK_H__
-#define __XEN_DRIVERS_BLOCK_H__
+#ifndef __XEN_BLKFRONT_BLOCK_H__
+#define __XEN_BLKFRONT_BLOCK_H__
#include <xen/blkif.h>
/**
@@ -44,7 +45,7 @@
* guarantee we can handle an unaligned transfer without the need to
* use a bounce buffer.
*/
-#define XBF_SEGS_TO_SIZE(segs) \
+#define XBD_SEGS_TO_SIZE(segs) \
(((segs) - 1) * PAGE_SIZE)
/**
@@ -57,264 +58,301 @@
* \note We reserve a segement to guarantee we can handle an unaligned
* transfer without the need to use a bounce buffer.
*/
-#define XBF_SIZE_TO_SEGS(size) \
+#define XBD_SIZE_TO_SEGS(size) \
((size / PAGE_SIZE) + 1)
/**
- * The maximum number of outstanding requests blocks (request headers plus
- * additional segment blocks) we will allow in a negotiated block-front/back
- * communication channel.
+ * The maximum number of shared memory ring pages we will allow in a
+ * negotiated block-front/back communication channel. Allow enough
+ * ring space for all requests to be XBD_MAX_REQUEST_SIZE'd.
*/
-#define XBF_MAX_REQUESTS 256
+#define XBD_MAX_RING_PAGES 32
/**
- * The maximum mapped region size per request we will allow in a negotiated
+ * The maximum number of outstanding requests we will allow in a negotiated
* block-front/back communication channel.
*/
-#define XBF_MAX_REQUEST_SIZE \
- MIN(MAXPHYS, XBF_SEGS_TO_SIZE(BLKIF_MAX_SEGMENTS_PER_REQUEST))
+#define XBD_MAX_REQUESTS \
+ __CONST_RING_SIZE(blkif, PAGE_SIZE * XBD_MAX_RING_PAGES)
/**
- * The maximum number of segments (within a request header and accompanying
- * segment blocks) per request we will allow in a negotiated block-front/back
- * communication channel.
+ * The maximum number of blkif segments which can be provided per indirect
+ * page in an indirect request.
*/
-#define XBF_MAX_SEGMENTS_PER_REQUEST \
- (MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
- XBF_SIZE_TO_SEGS(XBF_MAX_REQUEST_SIZE)))
+#define XBD_MAX_SEGMENTS_PER_PAGE \
+ (PAGE_SIZE / sizeof(struct blkif_request_segment))
/**
- * The maximum number of shared memory ring pages we will allow in a
- * negotiated block-front/back communication channel. Allow enough
- * ring space for all requests to be XBF_MAX_REQUEST_SIZE'd.
+ * The maximum number of blkif segments which can be provided in an indirect
+ * request.
*/
-#define XBF_MAX_RING_PAGES \
- BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBF_MAX_SEGMENTS_PER_REQUEST) \
- * XBF_MAX_REQUESTS)
+#define XBD_MAX_INDIRECT_SEGMENTS \
+ (BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST * XBD_MAX_SEGMENTS_PER_PAGE)
-struct xlbd_type_info
-{
- int partn_shift;
- int disks_per_major;
- char *devname;
- char *diskname;
-};
+/**
+ * Compute the number of indirect segment pages required for an I/O with the
+ * specified number of indirect segments.
+ */
+#define XBD_INDIRECT_SEGS_TO_PAGES(segs) \
+ ((segs + XBD_MAX_SEGMENTS_PER_PAGE - 1) / XBD_MAX_SEGMENTS_PER_PAGE)
-struct xlbd_major_info
-{
- int major;
- int index;
- int usage;
- struct xlbd_type_info *type;
-};
+typedef enum {
+ XBDCF_Q_MASK = 0xFF,
+ /* This command has contributed to xbd_qfrozen_cnt. */
+ XBDCF_FROZEN = 1<<8,
+ /* Freeze the command queue on dispatch (i.e. single step command). */
+ XBDCF_Q_FREEZE = 1<<9,
+ /* Bus DMA returned EINPROGRESS for this command. */
+ XBDCF_ASYNC_MAPPING = 1<<10,
+ XBDCF_INITIALIZER = XBDCF_Q_MASK
+} xbdc_flag_t;
-struct xb_command {
- TAILQ_ENTRY(xb_command) cm_link;
- struct xb_softc *cm_sc;
- u_int cm_flags;
-#define XB_CMD_FROZEN (1<<0)
-#define XB_CMD_POLLED (1<<1)
-#define XB_ON_XBQ_FREE (1<<2)
-#define XB_ON_XBQ_READY (1<<3)
-#define XB_ON_XBQ_BUSY (1<<4)
-#define XB_ON_XBQ_COMPLETE (1<<5)
-#define XB_ON_XBQ_MASK ((1<<2)|(1<<3)|(1<<4)|(1<<5))
- bus_dmamap_t map;
- uint64_t id;
- grant_ref_t *sg_refs;
- struct bio *bp;
- grant_ref_t gref_head;
- void *data;
- size_t datalen;
- u_int nseg;
- int operation;
- blkif_sector_t sector_number;
- int status;
- void (* cm_complete)(struct xb_command *);
+struct xbd_command;
+typedef void xbd_cbcf_t(struct xbd_command *);
+
+struct xbd_command {
+ TAILQ_ENTRY(xbd_command) cm_link;
+ struct xbd_softc *cm_sc;
+ xbdc_flag_t cm_flags;
+ bus_dmamap_t cm_map;
+ uint64_t cm_id;
+ grant_ref_t *cm_sg_refs;
+ struct bio *cm_bp;
+ grant_ref_t cm_gref_head;
+ void *cm_data;
+ size_t cm_datalen;
+ u_int cm_nseg;
+ int cm_operation;
+ blkif_sector_t cm_sector_number;
+ int cm_status;
+ xbd_cbcf_t *cm_complete;
+ void *cm_indirectionpages;
+ grant_ref_t cm_indirectionrefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
};
-#define XBQ_FREE 0
-#define XBQ_BIO 1
-#define XBQ_READY 2
-#define XBQ_BUSY 3
-#define XBQ_COMPLETE 4
-#define XBQ_COUNT 5
+typedef enum {
+ XBD_Q_FREE,
+ XBD_Q_READY,
+ XBD_Q_BUSY,
+ XBD_Q_COMPLETE,
+ XBD_Q_BIO,
+ XBD_Q_COUNT,
+ XBD_Q_NONE = XBDCF_Q_MASK
+} xbd_q_index_t;
-struct xb_qstat {
- uint32_t q_length;
- uint32_t q_max;
-};
+typedef struct xbd_cm_q {
+ TAILQ_HEAD(, xbd_command) q_tailq;
+ uint32_t q_length;
+ uint32_t q_max;
+} xbd_cm_q_t;
-union xb_statrequest {
- uint32_t ms_item;
- struct xb_qstat ms_qstat;
-};
+typedef enum {
+ XBD_STATE_DISCONNECTED,
+ XBD_STATE_CONNECTED,
+ XBD_STATE_SUSPENDED
+} xbd_state_t;
+typedef enum {
+ XBDF_NONE = 0,
+ XBDF_OPEN = 1 << 0, /* drive is open (can't shut down) */
+ XBDF_BARRIER = 1 << 1, /* backend supports barriers */
+ XBDF_FLUSH = 1 << 2, /* backend supports flush */
+ XBDF_READY = 1 << 3, /* Is ready */
+ XBDF_CM_SHORTAGE = 1 << 4, /* Free cm resource shortage active. */
+ XBDF_GNT_SHORTAGE = 1 << 5, /* Grant ref resource shortage active */
+ XBDF_WAIT_IDLE = 1 << 6, /*
+ * No new work until oustanding work
+ * completes.
+ */
+ XBDF_DISCARD = 1 << 7, /* backend supports discard */
+ XBDF_PERSISTENT = 1 << 8 /* backend supports persistent grants */
+} xbd_flag_t;
+
/*
* We have one of these per vbd, whether ide, scsi or 'other'.
*/
-struct xb_softc {
- device_t xb_dev;
- struct disk *xb_disk; /* disk params */
- struct bio_queue_head xb_bioq; /* sort queue */
- int xb_unit;
- int xb_flags;
-#define XB_OPEN (1<<0) /* drive is open (can't shut down) */
-#define XB_BARRIER (1 << 1) /* backend supports barriers */
-#define XB_READY (1 << 2) /* Is ready */
-#define XB_FROZEN (1 << 3) /* Waiting for resources */
- int vdevice;
- int connected;
- u_int ring_pages;
- uint32_t max_requests;
- uint32_t max_request_segments;
- uint32_t max_request_blocks;
- uint32_t max_request_size;
- grant_ref_t ring_ref[XBF_MAX_RING_PAGES];
- blkif_front_ring_t ring;
- unsigned int irq;
- struct gnttab_free_callback callback;
- TAILQ_HEAD(,xb_command) cm_free;
- TAILQ_HEAD(,xb_command) cm_ready;
- TAILQ_HEAD(,xb_command) cm_busy;
- TAILQ_HEAD(,xb_command) cm_complete;
- struct xb_qstat xb_qstat[XBQ_COUNT];
- bus_dma_tag_t xb_io_dmat;
+struct xbd_softc {
+ device_t xbd_dev;
+ struct disk *xbd_disk; /* disk params */
+ struct bio_queue_head xbd_bioq; /* sort queue */
+ int xbd_unit;
+ xbd_flag_t xbd_flags;
+ int xbd_qfrozen_cnt;
+ int xbd_vdevice;
+ xbd_state_t xbd_state;
+ u_int xbd_ring_pages;
+ uint32_t xbd_max_requests;
+ uint32_t xbd_max_request_segments;
+ uint32_t xbd_max_request_size;
+ uint32_t xbd_max_request_indirectpages;
+ grant_ref_t xbd_ring_ref[XBD_MAX_RING_PAGES];
+ blkif_front_ring_t xbd_ring;
+ xen_intr_handle_t xen_intr_handle;
+ struct gnttab_free_callback xbd_callback;
+ xbd_cm_q_t xbd_cm_q[XBD_Q_COUNT];
+ bus_dma_tag_t xbd_io_dmat;
/**
* The number of people holding this device open. We won't allow a
* hot-unplug unless this is 0.
*/
- int users;
- struct mtx xb_io_lock;
+ int xbd_users;
+ struct mtx xbd_io_lock;
- struct xb_command *shadow;
+ struct xbd_command *xbd_shadow;
};
-int xlvbd_add(struct xb_softc *, blkif_sector_t sectors, int device,
- uint16_t vdisk_info, unsigned long sector_size);
-void xlvbd_del(struct xb_softc *);
+int xbd_instance_create(struct xbd_softc *, blkif_sector_t sectors, int device,
+ uint16_t vdisk_info, unsigned long sector_size,
+ unsigned long phys_sector_size);
-#define XBQ_ADD(sc, qname) \
- do { \
- struct xb_qstat *qs; \
- \
- qs = &(sc)->xb_qstat[qname]; \
- qs->q_length++; \
- if (qs->q_length > qs->q_max) \
- qs->q_max = qs->q_length; \
- } while (0)
+static inline void
+xbd_added_qentry(struct xbd_softc *sc, xbd_q_index_t index)
+{
+ struct xbd_cm_q *cmq;
-#define XBQ_REMOVE(sc, qname) (sc)->xb_qstat[qname].q_length--
+ cmq = &sc->xbd_cm_q[index];
+ cmq->q_length++;
+ if (cmq->q_length > cmq->q_max)
+ cmq->q_max = cmq->q_length;
+}
-#define XBQ_INIT(sc, qname) \
- do { \
- sc->xb_qstat[qname].q_length = 0; \
- sc->xb_qstat[qname].q_max = 0; \
- } while (0)
+static inline void
+xbd_removed_qentry(struct xbd_softc *sc, xbd_q_index_t index)
+{
+ sc->xbd_cm_q[index].q_length--;
+}
-#define XBQ_COMMAND_QUEUE(name, index) \
- static __inline void \
- xb_initq_ ## name (struct xb_softc *sc) \
- { \
- TAILQ_INIT(&sc->cm_ ## name); \
- XBQ_INIT(sc, index); \
- } \
- static __inline void \
- xb_enqueue_ ## name (struct xb_command *cm) \
- { \
- if ((cm->cm_flags & XB_ON_XBQ_MASK) != 0) { \
- printf("command %p is on another queue, " \
- "flags = %#x\n", cm, cm->cm_flags); \
- panic("command is on another queue"); \
- } \
- TAILQ_INSERT_TAIL(&cm->cm_sc->cm_ ## name, cm, cm_link); \
- cm->cm_flags |= XB_ON_ ## index; \
- XBQ_ADD(cm->cm_sc, index); \
- } \
- static __inline void \
- xb_requeue_ ## name (struct xb_command *cm) \
- { \
- if ((cm->cm_flags & XB_ON_XBQ_MASK) != 0) { \
- printf("command %p is on another queue, " \
- "flags = %#x\n", cm, cm->cm_flags); \
- panic("command is on another queue"); \
- } \
- TAILQ_INSERT_HEAD(&cm->cm_sc->cm_ ## name, cm, cm_link); \
- cm->cm_flags |= XB_ON_ ## index; \
- XBQ_ADD(cm->cm_sc, index); \
- } \
- static __inline struct xb_command * \
- xb_dequeue_ ## name (struct xb_softc *sc) \
- { \
- struct xb_command *cm; \
- \
- if ((cm = TAILQ_FIRST(&sc->cm_ ## name)) != NULL) { \
- if ((cm->cm_flags & XB_ON_XBQ_MASK) != \
- XB_ON_ ## index) { \
- printf("command %p not in queue, " \
- "flags = %#x, bit = %#x\n", cm, \
- cm->cm_flags, XB_ON_ ## index); \
- panic("command not in queue"); \
- } \
- TAILQ_REMOVE(&sc->cm_ ## name, cm, cm_link); \
- cm->cm_flags &= ~XB_ON_ ## index; \
- XBQ_REMOVE(sc, index); \
- } \
- return (cm); \
- } \
- static __inline void \
- xb_remove_ ## name (struct xb_command *cm) \
- { \
- if ((cm->cm_flags & XB_ON_XBQ_MASK) != XB_ON_ ## index){\
- printf("command %p not in queue, flags = %#x, " \
- "bit = %#x\n", cm, cm->cm_flags, \
- XB_ON_ ## index); \
- panic("command not in queue"); \
- } \
- TAILQ_REMOVE(&cm->cm_sc->cm_ ## name, cm, cm_link); \
- cm->cm_flags &= ~XB_ON_ ## index; \
- XBQ_REMOVE(cm->cm_sc, index); \
- } \
-struct hack
+static inline uint32_t
+xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index)
+{
+ return (sc->xbd_cm_q[index].q_length);
+}
-XBQ_COMMAND_QUEUE(free, XBQ_FREE);
-XBQ_COMMAND_QUEUE(ready, XBQ_READY);
-XBQ_COMMAND_QUEUE(busy, XBQ_BUSY);
-XBQ_COMMAND_QUEUE(complete, XBQ_COMPLETE);
+static inline void
+xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index)
+{
+ struct xbd_cm_q *cmq;
-static __inline void
-xb_initq_bio(struct xb_softc *sc)
+ cmq = &sc->xbd_cm_q[index];
+ TAILQ_INIT(&cmq->q_tailq);
+ cmq->q_length = 0;
+ cmq->q_max = 0;
+}
+
+static inline void
+xbd_enqueue_cm(struct xbd_command *cm, xbd_q_index_t index)
{
- bioq_init(&sc->xb_bioq);
- XBQ_INIT(sc, XBQ_BIO);
+ KASSERT(index != XBD_Q_BIO,
+ ("%s: Commands cannot access the bio queue.", __func__));
+ if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE)
+ panic("%s: command %p is already on queue %d.",
+ __func__, cm, cm->cm_flags & XBDCF_Q_MASK);
+ TAILQ_INSERT_TAIL(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link);
+ cm->cm_flags &= ~XBDCF_Q_MASK;
+ cm->cm_flags |= index;
+ xbd_added_qentry(cm->cm_sc, index);
}
-static __inline void
-xb_enqueue_bio(struct xb_softc *sc, struct bio *bp)
+static inline void
+xbd_requeue_cm(struct xbd_command *cm, xbd_q_index_t index)
{
- bioq_insert_tail(&sc->xb_bioq, bp);
- XBQ_ADD(sc, XBQ_BIO);
+ KASSERT(index != XBD_Q_BIO,
+ ("%s: Commands cannot access the bio queue.", __func__));
+ if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE)
+ panic("%s: command %p is already on queue %d.",
+ __func__, cm, cm->cm_flags & XBDCF_Q_MASK);
+ TAILQ_INSERT_HEAD(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link);
+ cm->cm_flags &= ~XBDCF_Q_MASK;
+ cm->cm_flags |= index;
+ xbd_added_qentry(cm->cm_sc, index);
}
-static __inline void
-xb_requeue_bio(struct xb_softc *sc, struct bio *bp)
+static inline struct xbd_command *
+xbd_dequeue_cm(struct xbd_softc *sc, xbd_q_index_t index)
{
- bioq_insert_head(&sc->xb_bioq, bp);
- XBQ_ADD(sc, XBQ_BIO);
+ struct xbd_command *cm;
+
+ KASSERT(index != XBD_Q_BIO,
+ ("%s: Commands cannot access the bio queue.", __func__));
+
+ if ((cm = TAILQ_FIRST(&sc->xbd_cm_q[index].q_tailq)) != NULL) {
+ if ((cm->cm_flags & XBDCF_Q_MASK) != index) {
+ panic("%s: command %p is on queue %d, "
+ "not specified queue %d",
+ __func__, cm,
+ cm->cm_flags & XBDCF_Q_MASK,
+ index);
+ }
+ TAILQ_REMOVE(&sc->xbd_cm_q[index].q_tailq, cm, cm_link);
+ cm->cm_flags &= ~XBDCF_Q_MASK;
+ cm->cm_flags |= XBD_Q_NONE;
+ xbd_removed_qentry(cm->cm_sc, index);
+ }
+ return (cm);
}
-static __inline struct bio *
-xb_dequeue_bio(struct xb_softc *sc)
+static inline void
+xbd_remove_cm(struct xbd_command *cm, xbd_q_index_t expected_index)
{
+ xbd_q_index_t index;
+
+ index = cm->cm_flags & XBDCF_Q_MASK;
+
+ KASSERT(index != XBD_Q_BIO,
+ ("%s: Commands cannot access the bio queue.", __func__));
+
+ if (index != expected_index) {
+ panic("%s: command %p is on queue %d, not specified queue %d",
+ __func__, cm, index, expected_index);
+ }
+ TAILQ_REMOVE(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link);
+ cm->cm_flags &= ~XBDCF_Q_MASK;
+ cm->cm_flags |= XBD_Q_NONE;
+ xbd_removed_qentry(cm->cm_sc, index);
+}
+
+static inline void
+xbd_initq_bio(struct xbd_softc *sc)
+{
+ bioq_init(&sc->xbd_bioq);
+}
+
+static inline void
+xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp)
+{
+ bioq_insert_tail(&sc->xbd_bioq, bp);
+ xbd_added_qentry(sc, XBD_Q_BIO);
+}
+
+static inline void
+xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp)
+{
+ bioq_insert_head(&sc->xbd_bioq, bp);
+ xbd_added_qentry(sc, XBD_Q_BIO);
+}
+
+static inline struct bio *
+xbd_dequeue_bio(struct xbd_softc *sc)
+{
struct bio *bp;
- if ((bp = bioq_first(&sc->xb_bioq)) != NULL) {
- bioq_remove(&sc->xb_bioq, bp);
- XBQ_REMOVE(sc, XBQ_BIO);
+ if ((bp = bioq_first(&sc->xbd_bioq)) != NULL) {
+ bioq_remove(&sc->xbd_bioq, bp);
+ xbd_removed_qentry(sc, XBD_Q_BIO);
}
return (bp);
}
-#endif /* __XEN_DRIVERS_BLOCK_H__ */
+static inline void
+xbd_initqs(struct xbd_softc *sc)
+{
+ u_int index;
+ for (index = 0; index < XBD_Q_COUNT; index++)
+ xbd_initq_cm(sc, index);
+
+ xbd_initq_bio(sc);
+}
+
+#endif /* __XEN_BLKFRONT_BLOCK_H__ */
Modified: trunk/sys/dev/xen/console/console.c
===================================================================
--- trunk/sys/dev/xen/console/console.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/console/console.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/console/console.c 265999 2014-05-14 01:35:43Z ian $");
#include <sys/param.h>
#include <sys/module.h>
@@ -15,7 +16,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <sys/cons.h>
@@ -71,6 +72,8 @@
static int rc, rp;
static unsigned int cnsl_evt_reg;
static unsigned int wc, wp; /* write_cons, write_prod */
+xen_intr_handle_t xen_intr_handle;
+device_t xencons_dev;
#ifdef KDB
static int xc_altbrk;
@@ -224,7 +227,7 @@
xc_probe(device_t dev)
{
- return (0);
+ return (BUS_PROBE_NOWILDCARD);
}
static int
@@ -232,6 +235,7 @@
{
int error;
+ xencons_dev = dev;
xccons = tty_alloc(&xc_ttydevsw, NULL);
tty_makedev(xccons, NULL, "xc%r", 0);
@@ -243,15 +247,10 @@
callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons);
if (xen_start_info->flags & SIF_INITDOMAIN) {
- error = bind_virq_to_irqhandler(
- VIRQ_CONSOLE,
- 0,
- "console",
- NULL,
- xencons_priv_interrupt, NULL,
- INTR_TYPE_TTY, NULL);
-
- KASSERT(error >= 0, ("can't register console interrupt"));
+ error = xen_intr_bind_virq(dev, VIRQ_CONSOLE, 0, NULL,
+ xencons_priv_interrupt, NULL,
+ INTR_TYPE_TTY, &xen_intr_handle);
+ KASSERT(error >= 0, ("can't register console interrupt"));
}
/* register handler to flush console on shutdown */
@@ -358,6 +357,7 @@
xen_console_up = 0;
}
+#if 0
static inline int
__xencons_put_char(int ch)
{
@@ -367,6 +367,7 @@
wbuf[WBUF_MASK(wp++)] = _ch;
return 1;
}
+#endif
static void
@@ -410,7 +411,8 @@
DEVMETHOD(device_identify, xc_identify),
DEVMETHOD(device_probe, xc_probe),
DEVMETHOD(device_attach, xc_attach),
- {0, 0}
+
+ DEVMETHOD_END
};
static driver_t xc_driver = {
Modified: trunk/sys/dev/xen/console/xencons_ring.c
===================================================================
--- trunk/sys/dev/xen/console/xencons_ring.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/console/xencons_ring.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/console/xencons_ring.c 255040 2013-08-29 19:52:18Z gibbs $");
#include <sys/param.h>
#include <sys/module.h>
@@ -16,7 +17,8 @@
#include <sys/cons.h>
#include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
+
+#include <xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <sys/cons.h>
@@ -30,9 +32,10 @@
#include <xen/interface/io/console.h>
#define console_evtchn console.domU.evtchn
-static unsigned int console_irq;
+xen_intr_handle_t console_handle;
extern char *console_page;
extern struct mtx cn_mtx;
+extern device_t xencons_dev;
static inline struct xencons_interface *
xencons_interface(void)
@@ -74,7 +77,7 @@
wmb();
intf->out_prod = prod;
- notify_remote_via_evtchn(xen_start_info->console_evtchn);
+ xen_intr_signal(console_handle);
return sent;
@@ -106,7 +109,7 @@
intf->in_cons = cons;
CN_LOCK(cn_mtx);
- notify_remote_via_evtchn(xen_start_info->console_evtchn);
+ xen_intr_signal(console_handle);
xencons_tx();
CN_UNLOCK(cn_mtx);
@@ -126,9 +129,9 @@
if (!xen_start_info->console_evtchn)
return 0;
- err = bind_caller_port_to_irqhandler(xen_start_info->console_evtchn,
- "xencons", xencons_handle_input, NULL,
- INTR_TYPE_MISC | INTR_MPSAFE, &console_irq);
+ err = xen_intr_bind_local_port(xencons_dev,
+ xen_start_info->console_evtchn, NULL, xencons_handle_input, NULL,
+ INTR_TYPE_MISC | INTR_MPSAFE, &console_handle);
if (err) {
return err;
}
@@ -146,7 +149,7 @@
if (!xen_start_info->console_evtchn)
return;
- unbind_from_irqhandler(console_irq);
+ xen_intr_unbind(&console_handle);
}
void
Modified: trunk/sys/dev/xen/console/xencons_ring.h
===================================================================
--- trunk/sys/dev/xen/console/xencons_ring.h 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/console/xencons_ring.h 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
/*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/dev/xen/console/xencons_ring.h 192003 2009-05-11 22:55:49Z kmacy $
*
*/
#ifndef _XENCONS_RING_H
Modified: trunk/sys/dev/xen/control/control.c
===================================================================
--- trunk/sys/dev/xen/control/control.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/control/control.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation
* All rights reserved.
@@ -89,7 +90,7 @@
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/control/control.c 315676 2017-03-21 09:38:59Z royger $");
/**
* \file control.c
@@ -119,31 +120,39 @@
#include <sys/taskqueue.h>
#include <sys/types.h>
#include <sys/vnode.h>
-
-#ifndef XENHVM
#include <sys/sched.h>
#include <sys/smp.h>
-#endif
+#include <sys/eventhandler.h>
#include <geom/geom.h>
#include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
+#include <xen/xen-os.h>
#include <xen/blkif.h>
#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
+#ifdef XENHVM
+#include <xen/hvm.h>
+#endif
+
#include <xen/interface/event_channel.h>
#include <xen/interface/grant_table.h>
#include <xen/xenbus/xenbusvar.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+
+bool xen_suspend_cancelled;
/*--------------------------- Forward Declarations --------------------------*/
/** Function signature for shutdown event handlers. */
typedef void (xctrl_shutdown_handler_t)(void);
@@ -152,7 +161,6 @@
static xctrl_shutdown_handler_t xctrl_reboot;
static xctrl_shutdown_handler_t xctrl_suspend;
static xctrl_shutdown_handler_t xctrl_crash;
-static xctrl_shutdown_handler_t xctrl_halt;
/*-------------------------- Private Data Structures -------------------------*/
/** Element type for lookup table of event name to handler. */
@@ -167,7 +175,7 @@
{ "reboot", xctrl_reboot },
{ "suspend", xctrl_suspend },
{ "crash", xctrl_crash },
- { "halt", xctrl_halt },
+ { "halt", xctrl_poweroff },
};
struct xctrl_softc {
@@ -195,7 +203,7 @@
static void
xctrl_suspend()
{
- int i, j, k, fpp;
+ int i, j, k, fpp, suspend_cancelled;
unsigned long max_pfn, start_info_mfn;
EVENTHANDLER_INVOKE(power_suspend);
@@ -242,6 +250,7 @@
xencons_suspend();
gnttab_suspend();
+ intr_suspend();
max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
@@ -259,7 +268,7 @@
*/
start_info_mfn = VTOMFN(xen_start_info);
pmap_suspend();
- HYPERVISOR_suspend(start_info_mfn);
+ suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
pmap_resume();
pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
@@ -282,7 +291,7 @@
HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
gnttab_resume();
- irq_resume();
+ intr_resume(suspend_cancelled != 0);
local_irq_enable();
xencons_resume();
@@ -326,17 +335,35 @@
}
#else
-extern void xenpci_resume(void);
/* HVM mode suspension. */
static void
xctrl_suspend()
{
- int suspend_cancelled;
+#ifdef SMP
+ cpuset_t cpu_suspend_map;
+#endif
+ EVENTHANDLER_INVOKE(power_suspend_early);
+ xs_lock();
+ stop_all_proc();
+ xs_unlock();
EVENTHANDLER_INVOKE(power_suspend);
+ if (smp_started) {
+ thread_lock(curthread);
+ sched_bind(curthread, 0);
+ thread_unlock(curthread);
+ }
+ KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
+
/*
+ * Clear our XenStore node so the toolstack knows we are
+ * responding to the suspend request.
+ */
+ xs_write(XST_NIL, "control", "shutdown", "");
+
+ /*
* Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
* drivers need this.
*/
@@ -348,33 +375,76 @@
}
mtx_unlock(&Giant);
+#ifdef SMP
+ CPU_ZERO(&cpu_suspend_map); /* silence gcc */
+ if (smp_started) {
+ /*
+ * Suspend other CPUs. This prevents IPIs while we
+ * are resuming, and will allow us to reset per-cpu
+ * vcpu_info on resume.
+ */
+ cpu_suspend_map = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
+ if (!CPU_EMPTY(&cpu_suspend_map))
+ suspend_cpus(cpu_suspend_map);
+ }
+#endif
+
/*
* Prevent any races with evtchn_interrupt() handler.
*/
disable_intr();
- irq_suspend();
+ intr_suspend();
+ xen_hvm_suspend();
- suspend_cancelled = HYPERVISOR_suspend(0);
- if (suspend_cancelled)
- irq_resume();
- else
- xenpci_resume();
+ xen_suspend_cancelled = !!HYPERVISOR_suspend(0);
+ if (!xen_suspend_cancelled) {
+ xen_hvm_resume(false);
+ }
+ intr_resume(xen_suspend_cancelled != 0);
+ enable_intr();
+
/*
- * Re-enable interrupts and put the scheduler back to normal.
+ * Reset grant table info.
*/
- enable_intr();
+ if (!xen_suspend_cancelled) {
+ gnttab_resume();
+ }
+#ifdef SMP
+ /* Send an IPI_BITMAP in case there are pending bitmap IPIs. */
+ lapic_ipi_vectored(IPI_BITMAP_VECTOR, APIC_IPI_DEST_ALL);
+ if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
+ /*
+ * Now that event channels have been initialized,
+ * resume CPUs.
+ */
+ resume_cpus(cpu_suspend_map);
+ }
+#endif
+
/*
* FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
* similar.
*/
mtx_lock(&Giant);
- if (!suspend_cancelled)
- DEVICE_RESUME(root_bus);
+ DEVICE_RESUME(root_bus);
mtx_unlock(&Giant);
+ if (smp_started) {
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+ }
+
+ resume_all_proc();
+
EVENTHANDLER_INVOKE(power_resume);
+
+ if (bootverbose)
+ printf("System resumed after suspension\n");
+
}
#endif
@@ -384,12 +454,6 @@
panic("Xen directed crash");
}
-static void
-xctrl_halt()
-{
- shutdown_nice(RB_HALT);
-}
-
/*------------------------------ Event Reception -----------------------------*/
static void
xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len)
Modified: trunk/sys/dev/xen/netback/netback.c
===================================================================
--- trunk/sys/dev/xen/netback/netback.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/netback/netback.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009-2011 Spectra Logic Corporation
* All rights reserved.
@@ -33,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/netback/netback.c 319222 2017-05-30 16:15:52Z asomers $");
/**
* \file netback.c
@@ -42,6 +43,7 @@
* from this FreeBSD domain to other domains.
*/
#include "opt_inet.h"
+#include "opt_inet6.h"
#include "opt_global.h"
#include "opt_sctp.h"
@@ -79,14 +81,15 @@
#include <vm/vm_kern.h>
#include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
-#include <xen/evtchn.h>
+#include <xen/xen-os.h>
+#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <xen/interface/io/netif.h>
#include <xen/xenbus/xenbusvar.h>
+#include <machine/xen/xenvar.h>
+
/*--------------------------- Compile-time Tunables --------------------------*/
/*---------------------------------- Macros ----------------------------------*/
@@ -182,7 +185,6 @@
static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt,
const gnttab_copy_table gnttab, int n_entries,
netif_rx_back_ring_t *ring);
-static void xnb_add_mbuf_cksum(struct mbuf *mbufc);
static void xnb_stop(struct xnb_softc*);
static int xnb_ioctl(struct ifnet*, u_long, caddr_t);
static void xnb_start_locked(struct ifnet*);
@@ -193,6 +195,9 @@
static int xnb_unit_test_main(SYSCTL_HANDLER_ARGS);
static int xnb_dump_rings(SYSCTL_HANDLER_ARGS);
#endif
+#if defined(INET) || defined(INET6)
+static void xnb_add_mbuf_cksum(struct mbuf *mbufc);
+#endif
/*------------------------------ Data Structures -----------------------------*/
@@ -433,8 +438,8 @@
/** Xen device handle.*/
long handle;
- /** IRQ mapping for the communication ring event channel. */
- int irq;
+ /** Handle to the communication ring event channel. */
+ xen_intr_handle_t xen_intr_handle;
/**
* \brief Cached value of the front-end's domain id.
@@ -587,14 +592,14 @@
if (m->m_flags & M_PKTHDR) {
printf(" flowid=%10d, csum_flags=%#8x, csum_data=%#8x, "
"tso_segsz=%5hd\n",
- m->m_pkthdr.flowid, m->m_pkthdr.csum_flags,
+ m->m_pkthdr.flowid, (int)m->m_pkthdr.csum_flags,
m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz);
- printf(" rcvif=%16p, header=%18p, len=%19d\n",
- m->m_pkthdr.rcvif, m->m_pkthdr.header, m->m_pkthdr.len);
+ printf(" rcvif=%16p, len=%19d\n",
+ m->m_pkthdr.rcvif, m->m_pkthdr.len);
}
printf(" m_next=%16p, m_nextpk=%16p, m_data=%16p\n",
m->m_next, m->m_nextpkt, m->m_data);
- printf(" m_len=%17d, m_flags=%#15x, m_type=%18hd\n",
+ printf(" m_len=%17d, m_flags=%#15x, m_type=%18u\n",
m->m_len, m->m_flags, m->m_type);
len = m->m_len;
@@ -621,7 +626,7 @@
{
if (xnb->kva != 0) {
#ifndef XENHVM
- kmem_free(kernel_map, xnb->kva, xnb->kva_size);
+ kva_free(xnb->kva, xnb->kva_size);
#else
if (xnb->pseudo_phys_res != NULL) {
bus_release_resource(xnb->dev, SYS_RES_MEMORY,
@@ -647,10 +652,7 @@
int error;
int i;
- if (xnb->irq != 0) {
- unbind_from_irqhandler(xnb->irq);
- xnb->irq = 0;
- }
+ xen_intr_unbind(xnb->xen_intr_handle);
/*
* We may still have another thread currently processing requests. We
@@ -773,13 +775,13 @@
xnb->flags |= XNBF_RING_CONNECTED;
- error =
- bind_interdomain_evtchn_to_irqhandler(xnb->otherend_id,
- xnb->evtchn,
- device_get_nameunit(xnb->dev),
- xnb_intr, /*arg*/xnb,
- INTR_TYPE_BIO | INTR_MPSAFE,
- &xnb->irq);
+ error = xen_intr_bind_remote_port(xnb->dev,
+ xnb->otherend_id,
+ xnb->evtchn,
+ /*filter*/NULL,
+ xnb_intr, /*arg*/xnb,
+ INTR_TYPE_BIO | INTR_MPSAFE,
+ &xnb->xen_intr_handle);
if (error != 0) {
(void)xnb_disconnect(xnb);
xenbus_dev_fatal(xnb->dev, error, "binding event channel");
@@ -811,7 +813,7 @@
xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE;
}
#ifndef XENHVM
- xnb->kva = kmem_alloc_nofault(kernel_map, xnb->kva_size);
+ xnb->kva = kva_alloc(xnb->kva_size);
if (xnb->kva == 0)
return (ENOMEM);
xnb->gnt_base_addr = xnb->kva;
@@ -1110,14 +1112,13 @@
xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev),
"hotplug-error", fmt, ap_hotplug);
va_end(ap_hotplug);
- xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
+ (void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
"hotplug-status", "error");
xenbus_dev_vfatal(xnb->dev, err, fmt, ap);
va_end(ap);
- xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
- "online", "0");
+ (void)xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "online", "0");
xnb_detach(xnb->dev);
}
@@ -1448,7 +1449,7 @@
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify);
if (notify != 0)
- notify_remote_via_irq(xnb->irq);
+ xen_intr_signal(xnb->xen_intr_handle);
txb->sring->req_event = txb->req_cons + 1;
xen_mb();
@@ -1780,7 +1781,9 @@
}
mbufc->m_pkthdr.len = total_size;
+#if defined(INET) || defined(INET6)
xnb_add_mbuf_cksum(mbufc);
+#endif
}
/**
@@ -1811,12 +1814,12 @@
if (num_consumed == 0)
return 0; /* Nothing to receive */
- /* update statistics indepdent of errors */
+ /* update statistics independent of errors */
ifnet->if_ipackets++;
/*
* if we got here, then 1 or more requests was consumed, but the packet
- * is not necesarily valid.
+ * is not necessarily valid.
*/
if (xnb_pkt_is_valid(&pkt) == 0) {
/* got a garbage packet, respond and drop it */
@@ -2123,6 +2126,7 @@
return n_responses;
}
+#if defined(INET) || defined(INET6)
/**
* Add IP, TCP, and/or UDP checksums to every mbuf in a chain. The first mbuf
* in the chain must start with a struct ether_header.
@@ -2177,6 +2181,7 @@
break;
}
}
+#endif /* INET || INET6 */
static void
xnb_stop(struct xnb_softc *xnb)
@@ -2193,8 +2198,8 @@
xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct xnb_softc *xnb = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq*) data;
#ifdef INET
- struct ifreq *ifr = (struct ifreq*) data;
struct ifaddr *ifa = (struct ifaddr*)data;
#endif
int error = 0;
@@ -2361,7 +2366,7 @@
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify);
if ((notify != 0) || (out_of_space != 0))
- notify_remote_via_irq(xnb->irq);
+ xen_intr_signal(xnb->xen_intr_handle);
rxb->sring->req_event = req_prod_local + 1;
xen_mb();
} while (rxb->sring->req_prod != req_prod_local) ;
Modified: trunk/sys/dev/xen/netback/netback_unit_tests.c
===================================================================
--- trunk/sys/dev/xen/netback/netback_unit_tests.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/netback/netback_unit_tests.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009-2011 Spectra Logic Corporation
* All rights reserved.
@@ -33,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/netback/netback_unit_tests.c 319222 2017-05-30 16:15:52Z asomers $");
/**
* \file netback_unit_tests.c
@@ -104,10 +105,6 @@
typedef struct test_fixture test_fixture_t;
-static void xnb_fill_eh_and_ip(struct mbuf *m, uint16_t ip_len,
- uint16_t ip_id, uint16_t ip_p,
- uint16_t ip_off, uint16_t ip_sum);
-static void xnb_fill_tcp(struct mbuf *m);
static int xnb_get1pkt(struct xnb_pkt *pkt, size_t size, uint16_t flags);
static int xnb_unit_test_runner(test_fixture_t const tests[], int ntests,
char *buffer, size_t buflen);
@@ -163,6 +160,13 @@
static testcase_t xnb_rxpkt2rsp_2short;
static testcase_t xnb_rxpkt2rsp_2slots;
static testcase_t xnb_rxpkt2rsp_copyerror;
+static testcase_t xnb_sscanf_llu;
+static testcase_t xnb_sscanf_lld;
+static testcase_t xnb_sscanf_hhu;
+static testcase_t xnb_sscanf_hhd;
+static testcase_t xnb_sscanf_hhn;
+
+#if defined(INET) || defined(INET6)
/* TODO: add test cases for xnb_add_mbuf_cksum for IPV6 tcp and udp */
static testcase_t xnb_add_mbuf_cksum_arp;
static testcase_t xnb_add_mbuf_cksum_tcp;
@@ -169,11 +173,11 @@
static testcase_t xnb_add_mbuf_cksum_udp;
static testcase_t xnb_add_mbuf_cksum_icmp;
static testcase_t xnb_add_mbuf_cksum_tcp_swcksum;
-static testcase_t xnb_sscanf_llu;
-static testcase_t xnb_sscanf_lld;
-static testcase_t xnb_sscanf_hhu;
-static testcase_t xnb_sscanf_hhd;
-static testcase_t xnb_sscanf_hhn;
+static void xnb_fill_eh_and_ip(struct mbuf *m, uint16_t ip_len,
+ uint16_t ip_id, uint16_t ip_p,
+ uint16_t ip_off, uint16_t ip_sum);
+static void xnb_fill_tcp(struct mbuf *m);
+#endif /* INET || INET6 */
/** Private data used by unit tests */
static struct {
@@ -307,11 +311,13 @@
{setup_pvt_data, xnb_rxpkt2rsp_2short, teardown_pvt_data},
{setup_pvt_data, xnb_rxpkt2rsp_2slots, teardown_pvt_data},
{setup_pvt_data, xnb_rxpkt2rsp_copyerror, teardown_pvt_data},
+#if defined(INET) || defined(INET6)
{null_setup, xnb_add_mbuf_cksum_arp, null_teardown},
{null_setup, xnb_add_mbuf_cksum_icmp, null_teardown},
{null_setup, xnb_add_mbuf_cksum_tcp, null_teardown},
{null_setup, xnb_add_mbuf_cksum_tcp_swcksum, null_teardown},
{null_setup, xnb_add_mbuf_cksum_udp, null_teardown},
+#endif
{null_setup, xnb_sscanf_hhd, null_teardown},
{null_setup, xnb_sscanf_hhu, null_teardown},
{null_setup, xnb_sscanf_lld, null_teardown},
@@ -1222,6 +1228,10 @@
xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, xnb_unit_pvt.txb.req_cons);
pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp);
+ XNB_ASSERT(pMbuf != NULL);
+ if (pMbuf == NULL)
+ return;
+
n_entries = xnb_txpkt2gnttab(&pkt, pMbuf, xnb_unit_pvt.gnttab,
&xnb_unit_pvt.txb, DOMID_FIRST_RESERVED);
@@ -1266,8 +1276,7 @@
/* should never get here */
XNB_ASSERT(0);
}
- if (pMbuf != NULL)
- m_freem(pMbuf);
+ m_freem(pMbuf);
}
@@ -1478,7 +1487,7 @@
safe_m_freem(&mbuf);
}
-/** xnb_mbufc2pkt on a a two-mbuf chain with short data regions */
+/** xnb_mbufc2pkt on a two-mbuf chain with short data regions */
static void
xnb_mbufc2pkt_2short(char *buffer, size_t buflen) {
struct xnb_pkt pkt;
@@ -1489,15 +1498,14 @@
struct mbuf *mbufc, *mbufc2;
mbufc = m_getm(NULL, size1, M_WAITOK, MT_DATA);
+ XNB_ASSERT(mbufc != NULL);
+ if (mbufc == NULL)
+ return;
mbufc->m_flags |= M_PKTHDR;
- if (mbufc == NULL) {
- XNB_ASSERT(mbufc != NULL);
- return;
- }
mbufc2 = m_getm(mbufc, size2, M_WAITOK, MT_DATA);
+ XNB_ASSERT(mbufc2 != NULL);
if (mbufc2 == NULL) {
- XNB_ASSERT(mbufc2 != NULL);
safe_m_freem(&mbufc);
return;
}
@@ -1521,7 +1529,7 @@
safe_m_freem(&mbufc2);
}
-/** xnb_mbufc2pkt on a a mbuf chain with >1 mbuf cluster */
+/** xnb_mbufc2pkt on a mbuf chain with >1 mbuf cluster */
static void
xnb_mbufc2pkt_long(char *buffer, size_t buflen) {
struct xnb_pkt pkt;
@@ -1532,11 +1540,10 @@
struct mbuf *mbufc, *m;
mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA);
+ XNB_ASSERT(mbufc != NULL);
+ if (mbufc == NULL)
+ return;
mbufc->m_flags |= M_PKTHDR;
- if (mbufc == NULL) {
- XNB_ASSERT(mbufc != NULL);
- return;
- }
mbufc->m_pkthdr.len = size;
size_remaining = size;
@@ -1560,7 +1567,7 @@
safe_m_freem(&mbufc);
}
-/** xnb_mbufc2pkt on a a mbuf chain with >1 mbuf cluster and extra info */
+/** xnb_mbufc2pkt on a mbuf chain with >1 mbuf cluster and extra info */
static void
xnb_mbufc2pkt_extra(char *buffer, size_t buflen) {
struct xnb_pkt pkt;
@@ -1571,10 +1578,9 @@
struct mbuf *mbufc, *m;
mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA);
- if (mbufc == NULL) {
- XNB_ASSERT(mbufc != NULL);
+ XNB_ASSERT(mbufc != NULL);
+ if (mbufc == NULL)
return;
- }
mbufc->m_flags |= M_PKTHDR;
mbufc->m_pkthdr.len = size;
@@ -1614,11 +1620,10 @@
int error;
mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA);
+ XNB_ASSERT(mbufc != NULL);
+ if (mbufc == NULL)
+ return;
mbufc->m_flags |= M_PKTHDR;
- if (mbufc == NULL) {
- XNB_ASSERT(mbufc != NULL);
- return;
- }
mbufc->m_pkthdr.len = size;
size_remaining = size;
@@ -1835,10 +1840,9 @@
struct netif_extra_info *ext;
mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA);
- if (mbufc == NULL) {
- XNB_ASSERT(mbufc != NULL);
+ XNB_ASSERT(mbufc != NULL);
+ if (mbufc == NULL)
return;
- }
mbufc->m_flags |= M_PKTHDR;
mbufc->m_pkthdr.len = size;
@@ -1969,11 +1973,10 @@
struct mbuf *mbufc;
mbufc = m_getm(NULL, size1, M_WAITOK, MT_DATA);
+ XNB_ASSERT(mbufc != NULL);
+ if (mbufc == NULL)
+ return;
mbufc->m_flags |= M_PKTHDR;
- if (mbufc == NULL) {
- XNB_ASSERT(mbufc != NULL);
- return;
- }
m_getm(mbufc, size2, M_WAITOK, MT_DATA);
XNB_ASSERT(mbufc->m_next != NULL);
@@ -2066,6 +2069,7 @@
safe_m_freem(&mbuf);
}
+#if defined(INET) || defined(INET6)
/**
* xnb_add_mbuf_cksum on an ARP request packet
*/
@@ -2430,6 +2434,7 @@
m_freem(mbufc);
}
+#endif /* INET || INET6 */
/**
* sscanf on unsigned chars
@@ -2444,7 +2449,7 @@
for (i = 0; i < 12; i++)
dest[i] = 'X';
- sscanf(mystr, "%hhu", &dest[4]);
+ XNB_ASSERT(sscanf(mystr, "%hhu", &dest[4]) == 1);
for (i = 0; i < 12; i++)
XNB_ASSERT(dest[i] == (i == 4 ? 137 : 'X'));
}
@@ -2462,7 +2467,7 @@
for (i = 0; i < 12; i++)
dest[i] = 'X';
- sscanf(mystr, "%hhd", &dest[4]);
+ XNB_ASSERT(sscanf(mystr, "%hhd", &dest[4]) == 1);
for (i = 0; i < 12; i++)
XNB_ASSERT(dest[i] == (i == 4 ? -27 : 'X'));
}
@@ -2480,7 +2485,7 @@
for (i = 0; i < 3; i++)
dest[i] = (long long)0xdeadbeefdeadbeef;
- sscanf(mystr, "%lld", &dest[1]);
+ XNB_ASSERT(sscanf(mystr, "%lld", &dest[1]) == 1);
for (i = 0; i < 3; i++)
XNB_ASSERT(dest[i] == (i != 1 ? (long long)0xdeadbeefdeadbeef :
-123456789012345));
@@ -2499,7 +2504,7 @@
for (i = 0; i < 3; i++)
dest[i] = (long long)0xdeadbeefdeadbeef;
- sscanf(mystr, "%llu", &dest[1]);
+ XNB_ASSERT(sscanf(mystr, "%llu", &dest[1]) == 1);
for (i = 0; i < 3; i++)
XNB_ASSERT(dest[i] == (i != 1 ? (long long)0xdeadbeefdeadbeef :
12802747070103273189ull));
@@ -2521,10 +2526,10 @@
for (i = 0; i < 12; i++)
dest[i] = (unsigned char)'X';
- sscanf(mystr,
+ XNB_ASSERT(sscanf(mystr,
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"
"202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f"
- "404142434445464748494a4b4c4d4e4f%hhn", &dest[4]);
+ "404142434445464748494a4b4c4d4e4f%hhn", &dest[4]) == 0);
for (i = 0; i < 12; i++)
XNB_ASSERT(dest[i] == (i == 4 ? 160 : 'X'));
}
Modified: trunk/sys/dev/xen/netfront/mbufq.h
===================================================================
--- trunk/sys/dev/xen/netfront/mbufq.h 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/netfront/mbufq.h 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
@@ -25,7 +26,7 @@
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-$MidnightBSD$
+$FreeBSD: stable/10/sys/dev/xen/netfront/mbufq.h 181643 2008-08-12 20:01:57Z kmacy $
***************************************************************************/
Modified: trunk/sys/dev/xen/netfront/netfront.c
===================================================================
--- trunk/sys/dev/xen/netfront/netfront.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/netfront/netfront.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004-2006 Kip Macy
* All rights reserved.
@@ -25,9 +26,10 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/netfront/netfront.c 316170 2017-03-29 17:11:41Z ngie $");
#include "opt_inet.h"
+#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -41,6 +43,7 @@
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/sx.h>
+#include <sys/limits.h>
#include <net/if.h>
#include <net/if_arp.h>
@@ -76,17 +79,16 @@
#include <machine/intr_machdep.h>
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenfunc.h>
-#include <machine/xen/xenvar.h>
+#include <xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
-#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/interface/memory.h>
#include <xen/interface/io/netif.h>
#include <xen/xenbus/xenbusvar.h>
+#include <machine/xen/xenvar.h>
+
#include <dev/xen/netfront/mbufq.h>
#include "xenbus_if.h"
@@ -165,7 +167,6 @@
static void xn_watchdog(struct ifnet *);
#endif
-static void show_device(struct netfront_info *sc);
#ifdef notyet
static void netfront_closing(device_t dev);
#endif
@@ -256,8 +257,7 @@
struct mtx rx_lock;
struct mtx sc_lock;
- u_int handle;
- u_int irq;
+ xen_intr_handle_t xen_intr_handle;
u_int copying_receiver;
u_int carrier;
u_int maxfrags;
@@ -288,6 +288,8 @@
multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
mmu_update_t rx_mmu[NET_RX_RING_SIZE];
struct ifmedia sc_media;
+
+ bool xn_resume;
};
#define rx_mbufs xn_cdata.xn_rx_chain
@@ -450,6 +452,11 @@
netfront_probe(device_t dev)
{
+#ifdef XENHVM
+ if (xen_disable_pv_nics != 0)
+ return (ENXIO);
+#endif
+
if (!strcmp(xenbus_get_type(dev), "vif")) {
device_set_desc(dev, "Virtual Network Interface");
return (0);
@@ -472,7 +479,7 @@
#if __FreeBSD_version >= 700000
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
+ OID_AUTO, "enable_lro", CTLFLAG_RW,
&xn_enable_lro, 0, "Large Receive Offload");
#endif
@@ -503,6 +510,16 @@
{
struct netfront_info *info = device_get_softc(dev);
+ if (xen_suspend_cancelled) {
+ XN_RX_LOCK(info);
+ XN_TX_LOCK(info);
+ netfront_carrier_on(info);
+ XN_TX_UNLOCK(info);
+ XN_RX_UNLOCK(info);
+ return (0);
+ }
+
+ info->xn_resume = true;
netif_disconnect_backend(info);
return (0);
}
@@ -546,7 +563,8 @@
goto abort_transaction;
}
err = xs_printf(xst, node,
- "event-channel", "%u", irq_to_evtchn_port(info->irq));
+ "event-channel", "%u",
+ xen_intr_port(info->xen_intr_handle));
if (err) {
message = "writing event-channel";
goto abort_transaction;
@@ -608,7 +626,6 @@
info->rx_ring_ref = GRANT_REF_INVALID;
info->rx.sring = NULL;
info->tx.sring = NULL;
- info->irq = 0;
txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
if (!txs) {
@@ -635,17 +652,16 @@
if (error)
goto fail;
- error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
- "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq);
+ error = xen_intr_alloc_and_bind_local_port(dev,
+ xenbus_get_otherend_id(dev), /*filter*/NULL, xn_intr, info,
+ INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, &info->xen_intr_handle);
if (error) {
xenbus_dev_fatal(dev, error,
- "bind_evtchn_to_irqhandler failed");
+ "xen_intr_alloc_and_bind_local_port failed");
goto fail;
}
- show_device(info);
-
return (0);
fail:
@@ -686,7 +702,6 @@
switch (newstate) {
case XenbusStateInitialising:
case XenbusStateInitialised:
- case XenbusStateConnected:
case XenbusStateUnknown:
case XenbusStateClosed:
case XenbusStateReconfigured:
@@ -698,13 +713,15 @@
if (network_connect(sc) != 0)
break;
xenbus_set_state(dev, XenbusStateConnected);
+ break;
+ case XenbusStateClosing:
+ xenbus_set_state(dev, XenbusStateClosed);
+ break;
+ case XenbusStateConnected:
#ifdef INET
netfront_send_fake_arp(dev, sc);
#endif
break;
- case XenbusStateClosing:
- xenbus_set_state(dev, XenbusStateClosed);
- break;
}
}
@@ -789,6 +806,45 @@
}
static void
+netif_release_rx_bufs_copy(struct netfront_info *np)
+{
+ struct mbuf *m;
+ grant_ref_t ref;
+ unsigned int i, busy, inuse;
+
+ XN_RX_LOCK(np);
+
+ for (busy = inuse = i = 0; i < NET_RX_RING_SIZE; i++) {
+ ref = np->grant_rx_ref[i];
+
+ if (ref == GRANT_REF_INVALID)
+ continue;
+
+ inuse++;
+
+ m = np->rx_mbufs[i];
+
+ if (!gnttab_end_foreign_access_ref(ref)) {
+ busy++;
+ continue;
+ }
+
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+ np->grant_rx_ref[i] = GRANT_REF_INVALID;
+ add_id_to_freelist(np->rx_mbufs, i);
+
+ m_freem(m);
+ }
+
+ if (busy != 0)
+ device_printf(np->xbdev,
+ "Unable to release %u of %u in use grant references out of %zu total.\n",
+ busy, inuse, NET_RX_RING_SIZE);
+
+ XN_RX_UNLOCK(np);
+}
+
+static void
network_alloc_rx_buffers(struct netfront_info *sc)
{
int otherend_id = xenbus_get_otherend_id(sc->xbdev);
@@ -805,7 +861,7 @@
req_prod = sc->rx.req_prod_pvt;
- if (unlikely(sc->carrier == 0))
+ if (__predict_false(sc->carrier == 0))
return;
/*
@@ -945,7 +1001,7 @@
/* Zap PTEs and give away pages in one big multicall. */
(void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
- if (unlikely(sc->rx_mcl[i].result != i ||
+ if (__predict_false(sc->rx_mcl[i].result != i ||
HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation) != i))
panic("%s: unable to reduce memory "
@@ -960,7 +1016,7 @@
push:
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
if (notify)
- notify_remote_via_irq(sc->irq);
+ xen_intr_signal(sc->xen_intr_handle);
}
static void
@@ -967,7 +1023,7 @@
xn_rxeof(struct netfront_info *np)
{
struct ifnet *ifp;
-#if __FreeBSD_version >= 700000
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
struct lro_ctrl *lro = &np->xn_lro;
struct lro_entry *queued;
#endif
@@ -1002,7 +1058,7 @@
err = xennet_get_responses(np, &rinfo, rp, &i, &m,
&pages_flipped);
- if (unlikely(err)) {
+ if (__predict_false(err)) {
if (m)
mbufq_tail(&errq, m);
np->stats.rx_errors++;
@@ -1064,7 +1120,7 @@
* Do we really need to drop the rx lock?
*/
XN_RX_UNLOCK(np);
-#if __FreeBSD_version >= 700000
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
/* Use LRO if possible */
if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
@@ -1082,7 +1138,7 @@
np->rx.rsp_cons = i;
-#if __FreeBSD_version >= 700000
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
/*
* Flush any outstanding LRO work
*/
@@ -1150,7 +1206,7 @@
*/
if (!m->m_next)
ifp->if_opackets++;
- if (unlikely(gnttab_query_foreign_access(
+ if (__predict_false(gnttab_query_foreign_access(
np->grant_tx_ref[id]) != 0)) {
panic("%s: grant id %u still in use by the "
"backend", __func__, id);
@@ -1248,7 +1304,7 @@
struct mbuf *m;
grant_ref_t ref;
- if (unlikely(*cons + 1 == rp)) {
+ if (__predict_false(*cons + 1 == rp)) {
#if 0
if (net_ratelimit())
WPRINTK("Missing extra info\n");
@@ -1260,7 +1316,7 @@
extra = (struct netif_extra_info *)
RING_GET_RESPONSE(&np->rx, ++(*cons));
- if (unlikely(!extra->type ||
+ if (__predict_false(!extra->type ||
extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
#if 0
if (net_ratelimit())
@@ -1316,7 +1372,7 @@
DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n",
rx->status, rx->offset, frags);
#endif
- if (unlikely(rx->status < 0 ||
+ if (__predict_false(rx->status < 0 ||
rx->offset + rx->status > PAGE_SIZE)) {
#if 0
@@ -1678,7 +1734,7 @@
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
if (notify)
- notify_remote_via_irq(sc->irq);
+ xen_intr_signal(sc->xen_intr_handle);
if (RING_FULL(&sc->tx)) {
sc->tx_full = 1;
@@ -1960,7 +2016,7 @@
* packets.
*/
netfront_carrier_on(np);
- notify_remote_via_irq(np->irq);
+ xen_intr_signal(np->xen_intr_handle);
XN_TX_LOCK(np);
xn_txeof(np);
XN_TX_UNLOCK(np);
@@ -1969,25 +2025,6 @@
return (0);
}
-static void
-show_device(struct netfront_info *sc)
-{
-#ifdef DEBUG
- if (sc) {
- IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
- sc->xn_ifno,
- be_state_name[sc->xn_backend_state],
- sc->xn_user_state ? "open" : "closed",
- sc->xn_evtchn,
- sc->xn_irq,
- sc->xn_tx_if,
- sc->xn_rx_if);
- } else {
- IPRINTK("<vif NULL>\n");
- }
-#endif
-}
-
static void
xn_query_features(struct netfront_info *np)
{
@@ -2021,18 +2058,33 @@
static int
xn_configure_features(struct netfront_info *np)
{
- int err;
+ int err, cap_enabled;
err = 0;
-#if __FreeBSD_version >= 700000
- if ((np->xn_ifp->if_capenable & IFCAP_LRO) != 0)
+
+ if (np->xn_resume &&
+ ((np->xn_ifp->if_capenable & np->xn_ifp->if_capabilities)
+ == np->xn_ifp->if_capenable)) {
+ /* Current options are available, no need to do anything. */
+ return (0);
+ }
+
+ /* Try to preserve as many options as possible. */
+ if (np->xn_resume)
+ cap_enabled = np->xn_ifp->if_capenable;
+ else
+ cap_enabled = UINT_MAX;
+
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
+ if ((np->xn_ifp->if_capenable & IFCAP_LRO) == (cap_enabled & IFCAP_LRO))
tcp_lro_free(&np->xn_lro);
#endif
np->xn_ifp->if_capenable =
- np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4);
+ np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4) & cap_enabled;
np->xn_ifp->if_hwassist &= ~CSUM_TSO;
-#if __FreeBSD_version >= 700000
- if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) != 0) {
+#if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
+ if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) ==
+ (cap_enabled & IFCAP_LRO)) {
err = tcp_lro_init(&np->xn_lro);
if (err) {
device_printf(np->xbdev, "LRO initialization failed\n");
@@ -2041,7 +2093,8 @@
np->xn_ifp->if_capenable |= IFCAP_LRO;
}
}
- if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) != 0) {
+ if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) ==
+ (cap_enabled & IFCAP_TSO4)) {
np->xn_ifp->if_capenable |= IFCAP_TSO4;
np->xn_ifp->if_hwassist |= CSUM_TSO;
}
@@ -2049,8 +2102,9 @@
return (err);
}
-/** Create a network device.
- * @param handle device handle
+/**
+ * Create a network device.
+ * @param dev Newbus device representing this virtual NIC.
*/
int
create_netdev(device_t dev)
@@ -2118,14 +2172,16 @@
ifp->if_watchdog = xn_watchdog;
#endif
ifp->if_init = xn_ifinit;
- ifp->if_mtu = ETHERMTU;
ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
ifp->if_hwassist = XN_CSUM_FEATURES;
ifp->if_capabilities = IFCAP_HWCSUM;
+ ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
+ ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS;
+ ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
ether_ifattach(ifp, np->mac);
- callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
+ callout_init(&np->xn_stat_ch, 1);
netfront_carrier_off(np);
return (0);
@@ -2172,10 +2228,23 @@
static void
netif_free(struct netfront_info *info)
{
+ XN_LOCK(info);
+ xn_stop(info);
+ XN_UNLOCK(info);
+ callout_drain(&info->xn_stat_ch);
netif_disconnect_backend(info);
-#if 0
- close_netdev(info);
-#endif
+ if (info->xn_ifp != NULL) {
+ ether_ifdetach(info->xn_ifp);
+ if_free(info->xn_ifp);
+ info->xn_ifp = NULL;
+ }
+ ifmedia_removeall(&info->sc_media);
+ netif_release_tx_bufs(info);
+ if (info->copying_receiver)
+ netif_release_rx_bufs_copy(info);
+
+ gnttab_free_grant_references(info->gref_tx_head);
+ gnttab_free_grant_references(info->gref_rx_head);
}
static void
@@ -2190,10 +2259,7 @@
free_ring(&info->tx_ring_ref, &info->tx.sring);
free_ring(&info->rx_ring_ref, &info->rx.sring);
- if (info->irq)
- unbind_from_irqhandler(info->irq);
-
- info->irq = 0;
+ xen_intr_unbind(&info->xen_intr_handle);
}
static void
Modified: trunk/sys/dev/xen/pcifront/pcifront.c
===================================================================
--- trunk/sys/dev/xen/pcifront/pcifront.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/pcifront/pcifront.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* Copyright (c) 2006, Cisco Systems, Inc.
* All rights reserved.
@@ -29,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/pcifront/pcifront.c 265999 2014-05-14 01:35:43Z ian $");
#include <sys/param.h>
#include <sys/module.h>
@@ -441,7 +442,7 @@
struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev);
DPRINTF("xpcife probe (unit=%d)\n", pdev->unit);
#endif
- return 0;
+ return (BUS_PROBE_NOWILDCARD);
}
/* Newbus xpcife device driver attach */
Added: trunk/sys/dev/xen/timer/timer.c
===================================================================
--- trunk/sys/dev/xen/timer/timer.c (rev 0)
+++ trunk/sys/dev/xen/timer/timer.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -0,0 +1,597 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009 Adrian Chadd
+ * Copyright (c) 2012 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/**
+ * \file dev/xen/timer/timer.c
+ * \brief A timer driver for the Xen hypervisor's PV clock.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/timer/timer.c 265999 2014-05-14 01:35:43Z ian $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/time.h>
+#include <sys/timetc.h>
+#include <sys/timeet.h>
+#include <sys/smp.h>
+#include <sys/limits.h>
+#include <sys/clock.h>
+
+#include <xen/xen-os.h>
+#include <xen/features.h>
+#include <xen/xen_intr.h>
+#include <xen/hypervisor.h>
+#include <xen/interface/io/xenbus.h>
+#include <xen/interface/vcpu.h>
+
+#include <machine/cpu.h>
+#include <machine/cpufunc.h>
+#include <machine/clock.h>
+#include <machine/_inttypes.h>
+#include <machine/smp.h>
+
+#include "clock_if.h"
+
+static devclass_t xentimer_devclass;
+
+#define NSEC_IN_SEC 1000000000ULL
+#define NSEC_IN_USEC 1000ULL
+/* 18446744073 = int(2^64 / NSEC_IN_SC) = 1 ns in 64-bit fractions */
+#define FRAC_IN_NSEC 18446744073LL
+
+/* Xen timers may fire up to 100us off */
+#define XENTIMER_MIN_PERIOD_IN_NSEC 100*NSEC_IN_USEC
+#define XENCLOCK_RESOLUTION 10000000
+
+#define ETIME 62 /* Xen "bad time" error */
+
+#define XENTIMER_QUALITY 950
+
+struct xentimer_pcpu_data {
+ uint64_t timer;
+ uint64_t last_processed;
+ void *irq_handle;
+};
+
+DPCPU_DEFINE(struct xentimer_pcpu_data, xentimer_pcpu);
+
+DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
+
+struct xentimer_softc {
+ device_t dev;
+ struct timecounter tc;
+ struct eventtimer et;
+};
+
+/* Last time; this guarantees a monotonically increasing clock. */
+volatile uint64_t xen_timer_last_time = 0;
+
+static void
+xentimer_identify(driver_t *driver, device_t parent)
+{
+ if (!xen_domain())
+ return;
+
+ /* Handle all Xen PV timers in one device instance. */
+ if (devclass_get_device(xentimer_devclass, 0))
+ return;
+
+ BUS_ADD_CHILD(parent, 0, "xen_et", 0);
+}
+
+static int
+xentimer_probe(device_t dev)
+{
+ KASSERT((xen_domain()), ("Trying to use Xen timer on bare metal"));
+ /*
+ * In order to attach, this driver requires the following:
+ * - Vector callback support by the hypervisor, in order to deliver
+ * timer interrupts to the correct CPU for CPUs other than 0.
+ * - Access to the hypervisor shared info page, in order to look up
+ * each VCPU's timer information and the Xen wallclock time.
+ * - The hypervisor must say its PV clock is "safe" to use.
+ * - The hypervisor must support VCPUOP hypercalls.
+ * - The maximum number of CPUs supported by FreeBSD must not exceed
+ * the number of VCPUs supported by the hypervisor.
+ */
+#define XTREQUIRES(condition, reason...) \
+ if (!(condition)) { \
+ device_printf(dev, ## reason); \
+ device_detach(dev); \
+ return (ENXIO); \
+ }
+
+ if (xen_hvm_domain()) {
+ XTREQUIRES(xen_vector_callback_enabled,
+ "vector callbacks unavailable\n");
+ XTREQUIRES(xen_feature(XENFEAT_hvm_safe_pvclock),
+ "HVM safe pvclock unavailable\n");
+ }
+ XTREQUIRES(HYPERVISOR_shared_info != NULL,
+ "shared info page unavailable\n");
+ XTREQUIRES(HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, 0, NULL) == 0,
+ "VCPUOPs interface unavailable\n");
+#undef XTREQUIRES
+ device_set_desc(dev, "Xen PV Clock");
+ return (BUS_PROBE_NOWILDCARD);
+}
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline uint64_t
+scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
+{
+ uint64_t product;
+
+ if (shift < 0)
+ delta >>= -shift;
+ else
+ delta <<= shift;
+
+#if defined(__i386__)
+ {
+ uint32_t tmp1, tmp2;
+
+ /**
+ * For i386, the formula looks like:
+ *
+ * lower = (mul_frac * (delta & UINT_MAX)) >> 32
+ * upper = mul_frac * (delta >> 32)
+ * product = lower + upper
+ */
+ __asm__ (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "xor %5,%5 ; "
+ "add %4,%%eax ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)),
+ "2" (mul_frac) );
+ }
+#elif defined(__amd64__)
+ {
+ unsigned long tmp;
+
+ __asm__ (
+ "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
+ : [lo]"=a" (product), [hi]"=d" (tmp)
+ : "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac));
+ }
+#else
+#error "xentimer: unsupported architecture"
+#endif
+
+ return (product);
+}
+
+static uint64_t
+get_nsec_offset(struct vcpu_time_info *tinfo)
+{
+
+ return (scale_delta(rdtsc() - tinfo->tsc_timestamp,
+ tinfo->tsc_to_system_mul, tinfo->tsc_shift));
+}
+
+/*
+ * Read the current hypervisor system uptime value from Xen.
+ * See <xen/interface/xen.h> for a description of how this works.
+ */
+static uint32_t
+xen_fetch_vcpu_tinfo(struct vcpu_time_info *dst, struct vcpu_time_info *src)
+{
+
+ do {
+ dst->version = src->version;
+ rmb();
+ dst->tsc_timestamp = src->tsc_timestamp;
+ dst->system_time = src->system_time;
+ dst->tsc_to_system_mul = src->tsc_to_system_mul;
+ dst->tsc_shift = src->tsc_shift;
+ rmb();
+ } while ((src->version & 1) | (dst->version ^ src->version));
+
+ return (dst->version);
+}
+
+/**
+ * \brief Get the current time, in nanoseconds, since the hypervisor booted.
+ *
+ * \note This function returns the current CPU's idea of this value, unless
+ * it happens to be less than another CPU's previously determined value.
+ */
+static uint64_t
+xen_fetch_vcpu_time(void)
+{
+ struct vcpu_time_info dst;
+ struct vcpu_time_info *src;
+ uint32_t pre_version;
+ uint64_t now;
+ volatile uint64_t last;
+ struct vcpu_info *vcpu = DPCPU_GET(vcpu_info);
+
+ src = &vcpu->time;
+
+ critical_enter();
+ do {
+ pre_version = xen_fetch_vcpu_tinfo(&dst, src);
+ barrier();
+ now = dst.system_time + get_nsec_offset(&dst);
+ barrier();
+ } while (pre_version != src->version);
+
+ /*
+ * Enforce a monotonically increasing clock time across all
+ * VCPUs. If our time is too old, use the last time and return.
+ * Otherwise, try to update the last time.
+ */
+ do {
+ last = xen_timer_last_time;
+ if (last > now) {
+ now = last;
+ break;
+ }
+ } while (!atomic_cmpset_64(&xen_timer_last_time, last, now));
+
+ critical_exit();
+
+ return (now);
+}
+
+static uint32_t
+xentimer_get_timecount(struct timecounter *tc)
+{
+
+ return ((uint32_t)xen_fetch_vcpu_time() & UINT_MAX);
+}
+
+/**
+ * \brief Fetch the hypervisor boot time, known as the "Xen wallclock".
+ *
+ * \param ts Timespec to store the current stable value.
+ * \param version Pointer to store the corresponding wallclock version.
+ *
+ * \note This value is updated when Domain-0 shifts its clock to follow
+ * clock drift, e.g. as detected by NTP.
+ */
+static void
+xen_fetch_wallclock(struct timespec *ts)
+{
+ shared_info_t *src = HYPERVISOR_shared_info;
+ uint32_t version = 0;
+
+ do {
+ version = src->wc_version;
+ rmb();
+ ts->tv_sec = src->wc_sec;
+ ts->tv_nsec = src->wc_nsec;
+ rmb();
+ } while ((src->wc_version & 1) | (version ^ src->wc_version));
+}
+
+static void
+xen_fetch_uptime(struct timespec *ts)
+{
+ uint64_t uptime = xen_fetch_vcpu_time();
+ ts->tv_sec = uptime / NSEC_IN_SEC;
+ ts->tv_nsec = uptime % NSEC_IN_SEC;
+}
+
+static int
+xentimer_settime(device_t dev __unused, struct timespec *ts)
+{
+ /*
+ * Don't return EINVAL here; just silently fail if the domain isn't
+ * privileged enough to set the TOD.
+ */
+ return (0);
+}
+
+/**
+ * \brief Return current time according to the Xen Hypervisor wallclock.
+ *
+ * \param dev Xentimer device.
+ * \param ts Pointer to store the wallclock time.
+ *
+ * \note The Xen time structures document the hypervisor start time and the
+ * uptime-since-hypervisor-start (in nsec.) They need to be combined
+ * in order to calculate a TOD clock.
+ */
+static int
+xentimer_gettime(device_t dev, struct timespec *ts)
+{
+ struct timespec u_ts;
+
+ timespecclear(ts);
+ xen_fetch_wallclock(ts);
+ xen_fetch_uptime(&u_ts);
+ timespecadd(ts, &u_ts);
+
+ return (0);
+}
+
+/**
+ * \brief Handle a timer interrupt for the Xen PV timer driver.
+ *
+ * \param arg Xen timer driver softc that is expecting the interrupt.
+ */
+static int
+xentimer_intr(void *arg)
+{
+ struct xentimer_softc *sc = (struct xentimer_softc *)arg;
+ struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu);
+
+ pcpu->last_processed = xen_fetch_vcpu_time();
+ if (pcpu->timer != 0 && sc->et.et_active)
+ sc->et.et_event_cb(&sc->et, sc->et.et_arg);
+
+ return (FILTER_HANDLED);
+}
+
+static int
+xentimer_vcpu_start_timer(int vcpu, uint64_t next_time)
+{
+ struct vcpu_set_singleshot_timer single;
+
+ single.timeout_abs_ns = next_time;
+ single.flags = VCPU_SSHOTTMR_future;
+ return (HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, vcpu, &single));
+}
+
+static int
+xentimer_vcpu_stop_timer(int vcpu)
+{
+
+ return (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, vcpu, NULL));
+}
+
+/**
+ * \brief Set the next oneshot time for the current CPU.
+ *
+ * \param et Xen timer driver event timer to schedule on.
+ * \param first Delta to the next time to schedule the interrupt for.
+ * \param period Not used.
+ *
+ * \note See eventtimers(9) for more information.
+ * \note
+ *
+ * \returns 0
+ */
+static int
+xentimer_et_start(struct eventtimer *et,
+ sbintime_t first, sbintime_t period)
+{
+ int error = 0, i = 0;
+ struct xentimer_softc *sc = et->et_priv;
+ int cpu = PCPU_GET(vcpu_id);
+ struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu);
+ uint64_t first_in_ns, next_time;
+
+ /* See sbttots() for this formula. */
+ first_in_ns = (((first >> 32) * NSEC_IN_SEC) +
+ (((uint64_t)NSEC_IN_SEC * (uint32_t)first) >> 32));
+
+ /*
+ * Retry any timer scheduling failures, where the hypervisor
+ * returns -ETIME. Sometimes even a 100us timer period isn't large
+ * enough, but larger period instances are relatively uncommon.
+ *
+ * XXX Remove the panics once et_start() and its consumers are
+ * equipped to deal with start failures.
+ */
+ do {
+ if (++i == 60)
+ panic("can't schedule timer");
+ next_time = xen_fetch_vcpu_time() + first_in_ns;
+ error = xentimer_vcpu_start_timer(cpu, next_time);
+ } while (error == -ETIME);
+
+ if (error)
+ panic("%s: Error %d setting singleshot timer to %"PRIu64"\n",
+ device_get_nameunit(sc->dev), error, next_time);
+
+ pcpu->timer = next_time;
+ return (error);
+}
+
+/**
+ * \brief Cancel the event timer's currently running timer, if any.
+ */
+static int
+xentimer_et_stop(struct eventtimer *et)
+{
+ int cpu = PCPU_GET(vcpu_id);
+ struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu);
+
+ pcpu->timer = 0;
+ return (xentimer_vcpu_stop_timer(cpu));
+}
+
+/**
+ * \brief Attach a Xen PV timer driver instance.
+ *
+ * \param dev Bus device object to attach.
+ *
+ * \note
+ * \returns EINVAL
+ */
+static int
+xentimer_attach(device_t dev)
+{
+ struct xentimer_softc *sc = device_get_softc(dev);
+ int error, i;
+
+ sc->dev = dev;
+
+ /* Bind an event channel to a VIRQ on each VCPU. */
+ CPU_FOREACH(i) {
+ struct xentimer_pcpu_data *pcpu;
+
+ pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
+ error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
+ if (error) {
+ device_printf(dev, "Error disabling Xen periodic timer "
+ "on CPU %d\n", i);
+ return (error);
+ }
+
+ error = xen_intr_bind_virq(dev, VIRQ_TIMER, i, xentimer_intr,
+ NULL, sc, INTR_TYPE_CLK, &pcpu->irq_handle);
+ if (error) {
+ device_printf(dev, "Error %d binding VIRQ_TIMER "
+ "to VCPU %d\n", error, i);
+ return (error);
+ }
+ xen_intr_describe(pcpu->irq_handle, "c%d", i);
+ }
+
+ /* Register the event timer. */
+ sc->et.et_name = "XENTIMER";
+ sc->et.et_quality = XENTIMER_QUALITY;
+ sc->et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU;
+ sc->et.et_frequency = NSEC_IN_SEC;
+ /* See tstosbt() for this formula */
+ sc->et.et_min_period = (XENTIMER_MIN_PERIOD_IN_NSEC *
+ (((uint64_t)1 << 63) / 500000000) >> 32);
+ sc->et.et_max_period = ((sbintime_t)4 << 32);
+ sc->et.et_start = xentimer_et_start;
+ sc->et.et_stop = xentimer_et_stop;
+ sc->et.et_priv = sc;
+ et_register(&sc->et);
+
+ /* Register the timecounter. */
+ sc->tc.tc_name = "XENTIMER";
+ sc->tc.tc_quality = XENTIMER_QUALITY;
+ sc->tc.tc_flags = TC_FLAGS_SUSPEND_SAFE;
+ /*
+ * The underlying resolution is in nanoseconds, since the timer info
+ * scales TSC frequencies using a fraction that represents time in
+ * terms of nanoseconds.
+ */
+ sc->tc.tc_frequency = NSEC_IN_SEC;
+ sc->tc.tc_counter_mask = ~0u;
+ sc->tc.tc_get_timecount = xentimer_get_timecount;
+ sc->tc.tc_priv = sc;
+ tc_init(&sc->tc);
+
+ /* Register the Hypervisor wall clock */
+ clock_register(dev, XENCLOCK_RESOLUTION);
+
+ return (0);
+}
+
+static int
+xentimer_detach(device_t dev)
+{
+
+ /* Implement Xen PV clock teardown - XXX see hpet_detach ? */
+ /* If possible:
+ * 1. need to deregister timecounter
+ * 2. need to deregister event timer
+ * 3. need to deregister virtual IRQ event channels
+ */
+ return (EBUSY);
+}
+
+static void
+xentimer_percpu_resume(void *arg)
+{
+ device_t dev = (device_t) arg;
+ struct xentimer_softc *sc = device_get_softc(dev);
+
+ xentimer_et_start(&sc->et, sc->et.et_min_period, 0);
+}
+
+static int
+xentimer_resume(device_t dev)
+{
+ int error;
+ int i;
+
+ /* Disable the periodic timer */
+ CPU_FOREACH(i) {
+ error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
+ if (error != 0) {
+ device_printf(dev,
+ "Error disabling Xen periodic timer on CPU %d\n",
+ i);
+ return (error);
+ }
+ }
+
+ /* Reset the last uptime value */
+ xen_timer_last_time = 0;
+
+ /* Reset the RTC clock */
+ inittodr(time_second);
+
+ /* Kick the timers on all CPUs */
+ smp_rendezvous(NULL, xentimer_percpu_resume, NULL, dev);
+
+ if (bootverbose)
+ device_printf(dev, "resumed operation after suspension\n");
+
+ return (0);
+}
+
+static int
+xentimer_suspend(device_t dev)
+{
+ return (0);
+}
+
+static device_method_t xentimer_methods[] = {
+ DEVMETHOD(device_identify, xentimer_identify),
+ DEVMETHOD(device_probe, xentimer_probe),
+ DEVMETHOD(device_attach, xentimer_attach),
+ DEVMETHOD(device_detach, xentimer_detach),
+ DEVMETHOD(device_suspend, xentimer_suspend),
+ DEVMETHOD(device_resume, xentimer_resume),
+ /* clock interface */
+ DEVMETHOD(clock_gettime, xentimer_gettime),
+ DEVMETHOD(clock_settime, xentimer_settime),
+ DEVMETHOD_END
+};
+
+static driver_t xentimer_driver = {
+ "xen_et",
+ xentimer_methods,
+ sizeof(struct xentimer_softc),
+};
+
+DRIVER_MODULE(xentimer, nexus, xentimer_driver, xentimer_devclass, 0, 0);
+MODULE_DEPEND(xentimer, nexus, 1, 1, 1);
Property changes on: trunk/sys/dev/xen/timer/timer.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/dev/xen/xenpci/xenpci.c
===================================================================
--- trunk/sys/dev/xen/xenpci/xenpci.c 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/xenpci/xenpci.c 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* Copyright (c) 2008 Citrix Systems, Inc.
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/dev/xen/xenpci/xenpci.c 255726 2013-09-20 05:06:03Z gibbs $");
#include <sys/param.h>
#include <sys/bus.h>
@@ -32,9 +33,6 @@
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/time.h>
#include <machine/bus.h>
#include <machine/resource.h>
@@ -41,31 +39,19 @@
#include <sys/rman.h>
#include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
+
+#include <xen/xen-os.h>
#include <xen/features.h>
#include <xen/hypervisor.h>
-#include <xen/gnttab.h>
-#include <xen/xen_intr.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/hvm/params.h>
+#include <xen/hvm.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_kern.h>
-#include <vm/pmap.h>
-
#include <dev/xen/xenpci/xenpcivar.h>
-/*
- * These variables are used by the rest of the kernel to access the
- * hypervisor.
- */
-char *hypercall_stubs;
-shared_info_t *HYPERVISOR_shared_info;
-static vm_paddr_t shared_info_pa;
+extern void xen_intr_handle_upcall(struct trapframe *trap_frame);
+
static device_t nexus;
/*
@@ -73,103 +59,44 @@
*/
static devclass_t xenpci_devclass;
-/*
- * Return the CPUID base address for Xen functions.
- */
-static uint32_t
-xenpci_cpuid_base(void)
+static int
+xenpci_intr_filter(void *trap_frame)
{
- uint32_t base, regs[4];
-
- for (base = 0x40000000; base < 0x40010000; base += 0x100) {
- do_cpuid(base, regs);
- if (!memcmp("XenVMMXenVMM", ®s[1], 12)
- && (regs[0] - base) >= 2)
- return (base);
- }
- return (0);
+ xen_intr_handle_upcall(trap_frame);
+ return (FILTER_HANDLED);
}
-/*
- * Allocate and fill in the hypcall page.
- */
static int
-xenpci_init_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
+xenpci_irq_init(device_t device, struct xenpci_softc *scp)
{
- uint32_t base, regs[4];
- int i;
+ int error;
- base = xenpci_cpuid_base();
- if (!base) {
- device_printf(dev, "Xen platform device but not Xen VMM\n");
- return (EINVAL);
- }
+ error = BUS_SETUP_INTR(device_get_parent(device), device,
+ scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC,
+ xenpci_intr_filter, NULL, /*trap_frame*/NULL,
+ &scp->intr_cookie);
+ if (error)
+ return error;
- if (bootverbose) {
- do_cpuid(base + 1, regs);
- device_printf(dev, "Xen version %d.%d.\n",
- regs[0] >> 16, regs[0] & 0xffff);
- }
-
+#ifdef SMP
/*
- * Find the hypercall pages.
+ * When using the PCI event delivery callback we cannot assign
+ * events to specific vCPUs, so all events are delivered to vCPU#0 by
+ * Xen. Since the PCI interrupt can fire on any CPU by default, we
+ * need to bind it to vCPU#0 in order to ensure that
+ * xen_intr_handle_upcall always gets called on vCPU#0.
*/
- do_cpuid(base + 2, regs);
-
- hypercall_stubs = malloc(regs[0] * PAGE_SIZE, M_TEMP, M_WAITOK);
+ error = BUS_BIND_INTR(device_get_parent(device), device,
+ scp->res_irq, 0);
+ if (error)
+ return error;
+#endif
- for (i = 0; i < regs[0]; i++) {
- wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
- }
-
+ xen_hvm_set_callback(device);
return (0);
}
/*
- * After a resume, re-initialise the hypercall page.
- */
-static void
-xenpci_resume_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
-{
- uint32_t base, regs[4];
- int i;
-
- base = xenpci_cpuid_base();
-
- do_cpuid(base + 2, regs);
- for (i = 0; i < regs[0]; i++) {
- wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
- }
-}
-
-/*
- * Tell the hypervisor how to contact us for event channel callbacks.
- */
-static void
-xenpci_set_callback(device_t dev)
-{
- int irq;
- uint64_t callback;
- struct xen_hvm_param xhp;
-
- irq = pci_get_irq(dev);
- if (irq < 16) {
- callback = irq;
- } else {
- callback = (pci_get_intpin(dev) - 1) & 3;
- callback |= pci_get_slot(dev) << 11;
- callback |= 1ull << 56;
- }
-
- xhp.domid = DOMID_SELF;
- xhp.index = HVM_PARAM_CALLBACK_IRQ;
- xhp.value = callback;
- if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp))
- panic("Can't set evtchn callback");
-}
-
-
-/*
* Deallocate anything allocated by xenpci_allocate_resources.
*/
static int
@@ -293,35 +220,6 @@
}
/*
- * Called very early in the resume sequence - reinitialise the various
- * bits of Xen machinery including the hypercall page and the shared
- * info page.
- */
-void
-xenpci_resume()
-{
- device_t dev = devclass_get_device(xenpci_devclass, 0);
- struct xenpci_softc *scp = device_get_softc(dev);
- struct xen_add_to_physmap xatp;
-
- xenpci_resume_hypercall_stubs(dev, scp);
-
- xatp.domid = DOMID_SELF;
- xatp.idx = 0;
- xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
- if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
- panic("HYPERVISOR_memory_op failed");
-
- pmap_kenter((vm_offset_t) HYPERVISOR_shared_info, shared_info_pa);
-
- xenpci_set_callback(dev);
-
- gnttab_resume();
- irq_resume();
-}
-
-/*
* Probe - just check device ID.
*/
static int
@@ -341,11 +239,9 @@
static int
xenpci_attach(device_t dev)
{
- int error;
struct xenpci_softc *scp = device_get_softc(dev);
- struct xen_add_to_physmap xatp;
- vm_offset_t shared_va;
devclass_t dc;
+ int error;
/*
* Find and record nexus0. Since we are not really on the
@@ -365,34 +261,16 @@
goto errexit;
}
- error = xenpci_init_hypercall_stubs(dev, scp);
+ /*
+ * Hook the irq up to evtchn
+ */
+ error = xenpci_irq_init(dev, scp);
if (error) {
- device_printf(dev, "xenpci_init_hypercall_stubs failed(%d).\n",
- error);
+ device_printf(dev, "xenpci_irq_init failed(%d).\n",
+ error);
goto errexit;
}
- setup_xen_features();
-
- xenpci_alloc_space_int(scp, PAGE_SIZE, &shared_info_pa);
-
- xatp.domid = DOMID_SELF;
- xatp.idx = 0;
- xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
- if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
- panic("HYPERVISOR_memory_op failed");
-
- shared_va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
- pmap_kenter(shared_va, shared_info_pa);
- HYPERVISOR_shared_info = (void *) shared_va;
-
- /*
- * Hook the irq up to evtchn
- */
- xenpci_irq_init(dev, scp);
- xenpci_set_callback(dev);
-
return (bus_generic_attach(dev));
errexit:
@@ -431,13 +309,26 @@
return (xenpci_deallocate_resources(dev));
}
+static int
+xenpci_suspend(device_t dev)
+{
+ return (bus_generic_suspend(dev));
+}
+
+static int
+xenpci_resume(device_t dev)
+{
+ xen_hvm_set_callback(dev);
+ return (bus_generic_resume(dev));
+}
+
static device_method_t xenpci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, xenpci_probe),
DEVMETHOD(device_attach, xenpci_attach),
DEVMETHOD(device_detach, xenpci_detach),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
+ DEVMETHOD(device_suspend, xenpci_suspend),
+ DEVMETHOD(device_resume, xenpci_resume),
/* Bus interface */
DEVMETHOD(bus_add_child, bus_generic_add_child),
Modified: trunk/sys/dev/xen/xenpci/xenpcivar.h
===================================================================
--- trunk/sys/dev/xen/xenpci/xenpcivar.h 2018-05-27 22:19:26 UTC (rev 10028)
+++ trunk/sys/dev/xen/xenpci/xenpcivar.h 2018-05-27 22:21:25 UTC (rev 10029)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* Copyright (c) 2008 Citrix Systems, Inc.
* All rights reserved.
@@ -22,6 +23,8 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/dev/xen/xenpci/xenpcivar.h 255040 2013-08-29 19:52:18Z gibbs $
*/
/*
@@ -38,7 +41,4 @@
vm_paddr_t phys_next; /* next page from mem range */
};
-extern int xenpci_irq_init(device_t device, struct xenpci_softc *scp);
extern int xenpci_alloc_space(size_t sz, vm_paddr_t *pa);
-extern void xenpci_resume(void);
-extern void xen_suspend(void);
More information about the Midnightbsd-cvs
mailing list