[Midnightbsd-cvs] src [9465] trunk/sys/vm/vm_phys.c: Fix two bugs in the current NUMA-aware allocation code:

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun Mar 5 14:41:04 EST 2017


Revision: 9465
          http://svnweb.midnightbsd.org/src/?rev=9465
Author:   laffer1
Date:     2017-03-05 14:41:04 -0500 (Sun, 05 Mar 2017)
Log Message:
-----------
Fix two bugs in the current NUMA-aware allocation code:
- vm_phys_alloc_freelist_pages() can be called by vm_page_alloc_freelist()
  to allocate a page from a specific freelist.  In the NUMA case it did not
  properly map the public VM_FREELIST_* constants to the correct backing
  freelists, nor did it try all NUMA domains for allocations from
  VM_FREELIST_DEFAULT.
- vm_phys_alloc_pages() did not pin the thread and each call to
  vm_phys_alloc_freelist_pages() fetched the current domain to choose
  which freelist to use.  If a thread migrated domains during the loop
  in vm_phys_alloc_pages() it could skip one of the freelists.  If the
  other freelists were out of memory then it is possible that
  vm_phys_alloc_pages() would fail to allocate a page even though pages
  were available resulting in a panic in vm_page_alloc().

Obtained from: FreeBSD svn 251179

Modified Paths:
--------------
    trunk/sys/vm/vm_phys.c

Modified: trunk/sys/vm/vm_phys.c
===================================================================
--- trunk/sys/vm/vm_phys.c	2017-03-05 19:40:24 UTC (rev 9464)
+++ trunk/sys/vm/vm_phys.c	2017-03-05 19:41:04 UTC (rev 9465)
@@ -117,6 +117,8 @@
     NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
 #endif
 
+static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
+    int order);
 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
     int domain);
 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
@@ -430,10 +432,20 @@
 vm_phys_alloc_pages(int pool, int order)
 {
 	vm_page_t m;
-	int flind;
+	int domain, flind;
 
+	KASSERT(pool < VM_NFREEPOOL,
+	    ("vm_phys_alloc_pages: pool %d is out of range", pool));
+	KASSERT(order < VM_NFREEORDER,
+	    ("vm_phys_alloc_pages: order %d is out of range", order));
+
+#if VM_NDOMAIN > 1
+	domain = PCPU_GET(domain);
+#else
+	domain = 0;
+#endif
 	for (flind = 0; flind < vm_nfreelists; flind++) {
-		m = vm_phys_alloc_freelist_pages(flind, pool, order);
+		m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
 		if (m != NULL)
 			return (m);
 	}
@@ -446,11 +458,12 @@
  */
 vm_page_t
 vm_phys_alloc_freelist_pages(int flind, int pool, int order)
-{	
-	struct vm_freelist *fl;
-	struct vm_freelist *alt;
-	int domain, oind, pind;
+{
+#if VM_NDOMAIN > 1
 	vm_page_t m;
+	int i, ndomains;
+#endif
+	int domain;
 
 	KASSERT(flind < VM_NFREELIST,
 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
@@ -460,10 +473,39 @@
 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
 
 #if VM_NDOMAIN > 1
+	/*
+	 * This routine expects to be called with a VM_FREELIST_* constant.
+	 * On a system with multiple domains we need to adjust the flind
+	 * appropriately.  If it is for VM_FREELIST_DEFAULT we need to
+	 * iterate over the per-domain lists.
+	 */
 	domain = PCPU_GET(domain);
+	ndomains = vm_nfreelists - VM_NFREELIST + 1;
+	if (flind == VM_FREELIST_DEFAULT) {
+		m = NULL;
+		for (i = 0; i < ndomains; i++, flind++) {
+			m = vm_phys_alloc_domain_pages(domain, flind, pool,
+			    order);
+			if (m != NULL)
+				break;
+		}
+		return (m);
+	} else if (flind > VM_FREELIST_DEFAULT)
+		flind += ndomains - 1;
 #else
 	domain = 0;
 #endif
+	return (vm_phys_alloc_domain_pages(domain, flind, pool, order));
+}
+
+static vm_page_t
+vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
+{	
+	struct vm_freelist *fl;
+	struct vm_freelist *alt;
+	int oind, pind;
+	vm_page_t m;
+
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	fl = (*vm_phys_lookup_lists[domain][flind])[pool];
 	for (oind = order; oind < VM_NFREEORDER; oind++) {



More information about the Midnightbsd-cvs mailing list