xref: /NextBSD/lib/libdispatch/src/allocator.c (revision 33da5adc555b3bc29986eeadca03829e4ad06b1e)
1 /*
2  * Copyright (c) 2012-2013 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 #include "internal.h"
22 #include "allocator_internal.h"
23 
24 
25 #if DISPATCH_ALLOCATOR
26 
27 #ifndef VM_MEMORY_LIBDISPATCH
28 #define VM_MEMORY_LIBDISPATCH 74
29 #endif
30 
31 // _dispatch_main_heap is is the first heap in the linked list, where searches
32 // always begin.
33 //
34 // _dispatch_main_heap, and dh_next, are read normally but only written (in
35 // try_create_heap) by cmpxchg. They start life at 0, and are only written
36 // once to non-zero. They are not marked volatile. There is a small risk that
37 // some thread may see a stale 0 value and enter try_create_heap. It will
38 // waste some time in an allocate syscall, but eventually it will try to
39 // cmpxchg, expecting to overwite 0 with an address. This will fail
40 // (because another thread already did this), the thread will deallocate the
41 // unused allocated memory, and continue with the new value.
42 //
43 // If something goes wrong here, the symptom would be a NULL dereference
44 // in alloc_continuation_from_heap or _magazine when derefing the magazine ptr.
45 static dispatch_heap_t _dispatch_main_heap;
46 
47 DISPATCH_ALWAYS_INLINE
48 static void
set_last_found_page(bitmap_t * val)49 set_last_found_page(bitmap_t *val)
50 {
51 	dispatch_assert(_dispatch_main_heap);
52 	unsigned int cpu = _dispatch_cpu_number();
53 	_dispatch_main_heap[cpu].header.last_found_page = val;
54 }
55 
56 DISPATCH_ALWAYS_INLINE
57 static bitmap_t *
last_found_page(void)58 last_found_page(void)
59 {
60 	dispatch_assert(_dispatch_main_heap);
61 	unsigned int cpu = _dispatch_cpu_number();
62 	return _dispatch_main_heap[cpu].header.last_found_page;
63 }
64 
65 #pragma mark -
66 #pragma mark dispatch_alloc_bitmaps
67 
68 DISPATCH_ALWAYS_INLINE_NDEBUG DISPATCH_CONST
69 static bitmap_t *
supermap_address(struct dispatch_magazine_s * magazine,unsigned int supermap)70 supermap_address(struct dispatch_magazine_s *magazine, unsigned int supermap)
71 {
72 	return &magazine->supermaps[supermap];
73 }
74 
75 DISPATCH_ALWAYS_INLINE_NDEBUG DISPATCH_CONST
76 static bitmap_t *
bitmap_address(struct dispatch_magazine_s * magazine,unsigned int supermap,unsigned int map)77 bitmap_address(struct dispatch_magazine_s *magazine, unsigned int supermap,
78 		unsigned int map)
79 {
80 	return &magazine->maps[supermap][map];
81 }
82 
83 DISPATCH_ALWAYS_INLINE_NDEBUG DISPATCH_CONST
84 static dispatch_continuation_t
continuation_address(struct dispatch_magazine_s * magazine,unsigned int supermap,unsigned int map,unsigned int index)85 continuation_address(struct dispatch_magazine_s *magazine,
86 		unsigned int supermap, unsigned int map, unsigned int index)
87 {
88 #if DISPATCH_DEBUG
89 	dispatch_assert(supermap < SUPERMAPS_PER_MAGAZINE);
90 	dispatch_assert(map < BITMAPS_PER_SUPERMAP);
91 	dispatch_assert(index < CONTINUATIONS_PER_BITMAP);
92 #endif
93 	return (dispatch_continuation_t)&magazine->conts[supermap][map][index];
94 }
95 
96 DISPATCH_ALWAYS_INLINE_NDEBUG DISPATCH_CONST
97 static struct dispatch_magazine_s *
magazine_for_continuation(dispatch_continuation_t c)98 magazine_for_continuation(dispatch_continuation_t c)
99 {
100 	return (struct dispatch_magazine_s *)((uintptr_t)c & MAGAZINE_MASK);
101 }
102 
103 DISPATCH_ALWAYS_INLINE_NDEBUG
104 static void
get_cont_and_indices_for_bitmap_and_index(bitmap_t * bitmap,unsigned int index,dispatch_continuation_t * continuation_out,bitmap_t ** supermap_out,unsigned int * bitmap_index_out)105 get_cont_and_indices_for_bitmap_and_index(bitmap_t *bitmap,
106 		unsigned int index, dispatch_continuation_t *continuation_out,
107 		bitmap_t **supermap_out, unsigned int *bitmap_index_out)
108 {
109 	// m_for_c wants a continuation not a bitmap, but it works because it
110 	// just masks off the bottom bits of the address.
111 	struct dispatch_magazine_s *m = magazine_for_continuation((void *)bitmap);
112 	unsigned int mindex = (unsigned int)(bitmap - m->maps[0]);
113 	unsigned int bindex = mindex % BITMAPS_PER_SUPERMAP;
114 	unsigned int sindex = mindex / BITMAPS_PER_SUPERMAP;
115 	dispatch_assert(&m->maps[sindex][bindex] == bitmap);
116 	if (fastpath(continuation_out)) {
117 		*continuation_out = continuation_address(m, sindex, bindex, index);
118 	}
119 	if (fastpath(supermap_out)) *supermap_out = supermap_address(m, sindex);
120 	if (fastpath(bitmap_index_out)) *bitmap_index_out = bindex;
121 }
122 
123 DISPATCH_ALWAYS_INLINE_NDEBUG DISPATCH_CONST
124 static bool
continuation_is_in_first_page(dispatch_continuation_t c)125 continuation_is_in_first_page(dispatch_continuation_t c)
126 {
127 #if PACK_FIRST_PAGE_WITH_CONTINUATIONS
128 	// (the base of c's magazine == the base of c's page)
129 	// => c is in first page of magazine
130 	return (((uintptr_t)c & MAGAZINE_MASK) ==
131 			((uintptr_t)c & ~(uintptr_t)DISPATCH_ALLOCATOR_PAGE_MASK));
132 #else
133 	(void)c;
134 	return false;
135 #endif
136 }
137 
138 DISPATCH_ALWAYS_INLINE_NDEBUG
139 static void
get_maps_and_indices_for_continuation(dispatch_continuation_t c,bitmap_t ** supermap_out,unsigned int * bitmap_index_out,bitmap_t ** bitmap_out,unsigned int * index_out)140 get_maps_and_indices_for_continuation(dispatch_continuation_t c,
141 		bitmap_t **supermap_out, unsigned int *bitmap_index_out,
142 		bitmap_t **bitmap_out, unsigned int *index_out)
143 {
144 	unsigned int cindex, sindex, index, mindex;
145 	padded_continuation *p = (padded_continuation *)c;
146 	struct dispatch_magazine_s *m = magazine_for_continuation(c);
147 #if PACK_FIRST_PAGE_WITH_CONTINUATIONS
148 	if (fastpath(continuation_is_in_first_page(c))) {
149 		cindex = (unsigned int)(p - m->fp_conts);
150 		index = cindex % CONTINUATIONS_PER_BITMAP;
151 		mindex = cindex / CONTINUATIONS_PER_BITMAP;
152 		if (fastpath(supermap_out)) *supermap_out = NULL;
153 		if (fastpath(bitmap_index_out)) *bitmap_index_out = mindex;
154 		if (fastpath(bitmap_out)) *bitmap_out = &m->fp_maps[mindex];
155 		if (fastpath(index_out)) *index_out = index;
156 		return;
157 	}
158 #endif // PACK_FIRST_PAGE_WITH_CONTINUATIONS
159 	cindex = (unsigned int)(p - (padded_continuation *)m->conts);
160 	sindex = cindex / (BITMAPS_PER_SUPERMAP * CONTINUATIONS_PER_BITMAP);
161 	mindex = (cindex / CONTINUATIONS_PER_BITMAP) % BITMAPS_PER_SUPERMAP;
162 	index = cindex % CONTINUATIONS_PER_BITMAP;
163 	if (fastpath(supermap_out)) *supermap_out = &m->supermaps[sindex];
164 	if (fastpath(bitmap_index_out)) *bitmap_index_out = mindex;
165 	if (fastpath(bitmap_out)) *bitmap_out = &m->maps[sindex][mindex];
166 	if (fastpath(index_out)) *index_out = index;
167 }
168 
169 // Base address of page, or NULL if this page shouldn't be madvise()d
170 DISPATCH_ALWAYS_INLINE_NDEBUG DISPATCH_CONST
171 static void *
madvisable_page_base_for_continuation(dispatch_continuation_t c)172 madvisable_page_base_for_continuation(dispatch_continuation_t c)
173 {
174 	if (fastpath(continuation_is_in_first_page(c))) {
175 		return NULL;
176 	}
177 	void *page_base = (void *)((uintptr_t)c &
178 			~(uintptr_t)DISPATCH_ALLOCATOR_PAGE_MASK);
179 #if DISPATCH_DEBUG
180 	struct dispatch_magazine_s *m = magazine_for_continuation(c);
181 	if (slowpath(page_base < (void *)&m->conts)) {
182 		DISPATCH_CRASH("madvisable continuation too low");
183 	}
184 	if (slowpath(page_base > (void *)&m->conts[SUPERMAPS_PER_MAGAZINE-1]
185 			[BITMAPS_PER_SUPERMAP-1][CONTINUATIONS_PER_BITMAP-1])) {
186 		DISPATCH_CRASH("madvisable continuation too high");
187 	}
188 #endif
189 	return page_base;
190 }
191 
192 // Bitmap that controls the first few continuations in the same page as
193 // the continuations controlled by the passed bitmap. Undefined results if the
194 // passed bitmap controls continuations in the first page.
195 DISPATCH_ALWAYS_INLINE_NDEBUG DISPATCH_CONST
196 static bitmap_t *
first_bitmap_in_same_page(bitmap_t * b)197 first_bitmap_in_same_page(bitmap_t *b)
198 {
199 #if DISPATCH_DEBUG
200 	struct dispatch_magazine_s *m;
201 	m = magazine_for_continuation((void*)b);
202 	dispatch_assert(b >= &m->maps[0][0]);
203 	dispatch_assert(b <  &m->maps[SUPERMAPS_PER_MAGAZINE]
204 			[BITMAPS_PER_SUPERMAP]);
205 #endif
206 	const uintptr_t PAGE_BITMAP_MASK = (BITMAPS_PER_PAGE *
207 			BYTES_PER_BITMAP) - 1;
208 	return (bitmap_t *)((uintptr_t)b & ~PAGE_BITMAP_MASK);
209 }
210 
211 DISPATCH_ALWAYS_INLINE_NDEBUG DISPATCH_CONST
212 static bool
bitmap_is_full(bitmap_t bits)213 bitmap_is_full(bitmap_t bits)
214 {
215 	return (bits == BITMAP_ALL_ONES);
216 }
217 
218 #define NO_BITS_WERE_UNSET (UINT_MAX)
219 
220 // max_index is the 0-based position of the most significant bit that is
221 // allowed to be set.
222 DISPATCH_ALWAYS_INLINE_NDEBUG
223 static unsigned int
bitmap_set_first_unset_bit_upto_index(volatile bitmap_t * bitmap,unsigned int max_index)224 bitmap_set_first_unset_bit_upto_index(volatile bitmap_t *bitmap,
225 		unsigned int max_index)
226 {
227 	// No barriers needed in acquire path: the just-allocated
228 	// continuation is "uninitialized", so the caller shouldn't
229 	// load from it before storing, so we don't need to guard
230 	// against reordering those loads.
231 	dispatch_assert(sizeof(*bitmap) == sizeof(unsigned long));
232 	return dispatch_atomic_set_first_bit(bitmap,max_index);
233 }
234 
235 DISPATCH_ALWAYS_INLINE
236 static unsigned int
bitmap_set_first_unset_bit(volatile bitmap_t * bitmap)237 bitmap_set_first_unset_bit(volatile bitmap_t *bitmap)
238 {
239 	return bitmap_set_first_unset_bit_upto_index(bitmap, UINT_MAX);
240 }
241 
242 #define CLEAR_EXCLUSIVELY true
243 #define CLEAR_NONEXCLUSIVELY false
244 
245 // Return true if this bit was the last in the bitmap, and it is now all zeroes
246 DISPATCH_ALWAYS_INLINE_NDEBUG
247 static bool
bitmap_clear_bit(volatile bitmap_t * bitmap,unsigned int index,bool exclusively)248 bitmap_clear_bit(volatile bitmap_t *bitmap, unsigned int index,
249 		bool exclusively)
250 {
251 #if DISPATCH_DEBUG
252 	dispatch_assert(index < CONTINUATIONS_PER_BITMAP);
253 #endif
254 	const bitmap_t mask = BITMAP_C(1) << index;
255 	bitmap_t b;
256 
257 	if (exclusively == CLEAR_EXCLUSIVELY) {
258 		if (slowpath((*bitmap & mask) == 0)) {
259 			DISPATCH_CRASH("Corruption: failed to clear bit exclusively");
260 		}
261 	}
262 
263 	// and-and-fetch
264 	b = dispatch_atomic_and(bitmap, ~mask, release);
265 	return b == 0;
266 }
267 
268 DISPATCH_ALWAYS_INLINE_NDEBUG
269 static void
mark_bitmap_as_full_if_still_full(volatile bitmap_t * supermap,unsigned int bitmap_index,volatile bitmap_t * bitmap)270 mark_bitmap_as_full_if_still_full(volatile bitmap_t *supermap,
271 		unsigned int bitmap_index, volatile bitmap_t *bitmap)
272 {
273 #if DISPATCH_DEBUG
274 	dispatch_assert(bitmap_index < BITMAPS_PER_SUPERMAP);
275 #endif
276 	const bitmap_t mask = BITMAP_C(1) << bitmap_index;
277 	bitmap_t s, s_new, s_masked;
278 
279 	if (!bitmap_is_full(*bitmap)) {
280 		return;
281 	}
282 	s_new = *supermap;
283 	for (;;) {
284 		// No barriers because supermaps are only advisory, they
285 		// don't protect access to other memory.
286 		s = s_new;
287 		s_masked = s | mask;
288 		if (dispatch_atomic_cmpxchgvw(supermap, s, s_masked, &s_new, relaxed) ||
289 				!bitmap_is_full(*bitmap)) {
290 			return;
291 		}
292 	}
293 }
294 
295 #pragma mark -
296 #pragma mark dispatch_alloc_continuation_alloc
297 
298 #if PACK_FIRST_PAGE_WITH_CONTINUATIONS
299 DISPATCH_ALWAYS_INLINE_NDEBUG
300 static dispatch_continuation_t
alloc_continuation_from_first_page(struct dispatch_magazine_s * magazine)301 alloc_continuation_from_first_page(struct dispatch_magazine_s *magazine)
302 {
303 	unsigned int i, index, continuation_index;
304 
305 	// TODO: unroll if this is hot?
306 	for (i = 0; i < FULL_BITMAPS_IN_FIRST_PAGE; i++) {
307 		index = bitmap_set_first_unset_bit(&magazine->fp_maps[i]);
308 		if (fastpath(index != NO_BITS_WERE_UNSET)) goto found;
309 	}
310 	if (REMAINDERED_CONTINUATIONS_IN_FIRST_PAGE) {
311 		index = bitmap_set_first_unset_bit_upto_index(&magazine->fp_maps[i],
312 				REMAINDERED_CONTINUATIONS_IN_FIRST_PAGE - 1);
313 		if (fastpath(index != NO_BITS_WERE_UNSET)) goto found;
314 	}
315 	return NULL;
316 
317 found:
318 	continuation_index = (i * CONTINUATIONS_PER_BITMAP) + index;
319 	return (dispatch_continuation_t)&magazine->fp_conts[continuation_index];
320 }
321 #endif // PACK_FIRST_PAGE_WITH_CONTINUATIONS
322 
323 DISPATCH_ALWAYS_INLINE_NDEBUG
324 static dispatch_continuation_t
alloc_continuation_from_magazine(struct dispatch_magazine_s * magazine)325 alloc_continuation_from_magazine(struct dispatch_magazine_s *magazine)
326 {
327 	unsigned int s, b, index;
328 
329 	for (s = 0; s < SUPERMAPS_PER_MAGAZINE; s++) {
330 		volatile bitmap_t *supermap = supermap_address(magazine, s);
331 		if (bitmap_is_full(*supermap)) {
332 			continue;
333 		}
334 		for (b = 0; b < BITMAPS_PER_SUPERMAP; b++) {
335 			volatile bitmap_t *bitmap = bitmap_address(magazine, s, b);
336 			index = bitmap_set_first_unset_bit(bitmap);
337 			if (index != NO_BITS_WERE_UNSET) {
338 				set_last_found_page(
339 						first_bitmap_in_same_page((bitmap_t *)bitmap));
340 				mark_bitmap_as_full_if_still_full(supermap, b, bitmap);
341 				return continuation_address(magazine, s, b, index);
342 			}
343 		}
344 	}
345 	return NULL;
346 }
347 
348 DISPATCH_NOINLINE
349 static void
_dispatch_alloc_try_create_heap(dispatch_heap_t * heap_ptr)350 _dispatch_alloc_try_create_heap(dispatch_heap_t *heap_ptr)
351 {
352 #if HAVE_MACH
353 	kern_return_t kr;
354 	mach_vm_size_t vm_size = MAGAZINES_PER_HEAP * BYTES_PER_MAGAZINE;
355 	mach_vm_offset_t vm_mask = ~MAGAZINE_MASK;
356 	mach_vm_address_t vm_addr = vm_page_size;
357 	while (slowpath(kr = mach_vm_map(mach_task_self(), &vm_addr, vm_size,
358 			vm_mask, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH),
359 			MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
360 			VM_INHERIT_DEFAULT))) {
361 		if (kr != KERN_NO_SPACE) {
362 			(void)dispatch_assume_zero(kr);
363 			DISPATCH_CLIENT_CRASH("Could not allocate heap");
364 		}
365 		_dispatch_temporary_resource_shortage();
366 		vm_addr = vm_page_size;
367 	}
368 	uintptr_t aligned_region = (uintptr_t)vm_addr;
369 #else // HAVE_MACH
370 	const size_t region_sz = (1 + MAGAZINES_PER_HEAP) * BYTES_PER_MAGAZINE;
371 	void *region_p;
372 	while (!dispatch_assume((region_p = mmap(NULL, region_sz,
373 			PROT_READ|PROT_WRITE, MAP_ANON | MAP_PRIVATE,
374 			VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH), 0)) != MAP_FAILED)) {
375 		_dispatch_temporary_resource_shortage();
376 	}
377 	uintptr_t region = (uintptr_t)region_p;
378 	uintptr_t region_end = region + region_sz;
379 	uintptr_t aligned_region, aligned_region_end;
380 	uintptr_t bottom_slop_len, top_slop_len;
381 	// Realign if needed; find the slop at top/bottom to unmap
382 	if ((region & ~(MAGAZINE_MASK)) == 0) {
383 		bottom_slop_len = 0;
384 		aligned_region = region;
385 		aligned_region_end = region_end - BYTES_PER_MAGAZINE;
386 		top_slop_len = BYTES_PER_MAGAZINE;
387 	} else {
388 		aligned_region = (region & MAGAZINE_MASK) + BYTES_PER_MAGAZINE;
389 		aligned_region_end = aligned_region +
390 				(MAGAZINES_PER_HEAP * BYTES_PER_MAGAZINE);
391 		bottom_slop_len = aligned_region - region;
392 		top_slop_len = BYTES_PER_MAGAZINE - bottom_slop_len;
393 	}
394 #if DISPATCH_DEBUG
395 	// Double-check our math.
396 	dispatch_assert(aligned_region % DISPATCH_ALLOCATOR_PAGE_SIZE == 0);
397 	dispatch_assert(aligned_region % vm_kernel_page_size == 0);
398 	dispatch_assert(aligned_region_end % DISPATCH_ALLOCATOR_PAGE_SIZE == 0);
399 	dispatch_assert(aligned_region_end % vm_kernel_page_size == 0);
400 	dispatch_assert(aligned_region_end > aligned_region);
401 	dispatch_assert(top_slop_len % DISPATCH_ALLOCATOR_PAGE_SIZE == 0);
402 	dispatch_assert(bottom_slop_len % DISPATCH_ALLOCATOR_PAGE_SIZE == 0);
403 	dispatch_assert(aligned_region_end + top_slop_len == region_end);
404 	dispatch_assert(region + bottom_slop_len == aligned_region);
405 	dispatch_assert(region_sz == bottom_slop_len + top_slop_len +
406 			MAGAZINES_PER_HEAP * BYTES_PER_MAGAZINE);
407 	if (bottom_slop_len) {
408 		(void)dispatch_assume_zero(mprotect((void *)region, bottom_slop_len,
409 				PROT_NONE));
410 	}
411 	if (top_slop_len) {
412 		(void)dispatch_assume_zero(mprotect((void *)aligned_region_end,
413 				top_slop_len, PROT_NONE));
414 	}
415 #else
416 	if (bottom_slop_len) {
417 		(void)dispatch_assume_zero(munmap((void *)region, bottom_slop_len));
418 	}
419 	if (top_slop_len) {
420 		(void)dispatch_assume_zero(munmap((void *)aligned_region_end,
421 				top_slop_len));
422 	}
423 #endif // DISPATCH_DEBUG
424 #endif // HAVE_MACH
425 
426 	if (!dispatch_atomic_cmpxchg(heap_ptr, NULL, (void *)aligned_region,
427 			relaxed)) {
428 		// If we lost the race to link in the new region, unmap the whole thing.
429 #if DISPATCH_DEBUG
430 		(void)dispatch_assume_zero(mprotect((void *)aligned_region,
431 				MAGAZINES_PER_HEAP * BYTES_PER_MAGAZINE, PROT_NONE));
432 #else
433 		(void)dispatch_assume_zero(munmap((void *)aligned_region,
434 				MAGAZINES_PER_HEAP * BYTES_PER_MAGAZINE));
435 #endif
436 	}
437 }
438 
439 DISPATCH_NOINLINE
440 static dispatch_continuation_t
_dispatch_alloc_continuation_from_heap(dispatch_heap_t heap)441 _dispatch_alloc_continuation_from_heap(dispatch_heap_t heap)
442 {
443 	dispatch_continuation_t cont;
444 
445 	unsigned int cpu_number = _dispatch_cpu_number();
446 #ifdef DISPATCH_DEBUG
447 	dispatch_assert(cpu_number < NUM_CPU);
448 #endif
449 
450 #if PACK_FIRST_PAGE_WITH_CONTINUATIONS
451 	// First try the continuations in the first page for this CPU
452 	cont = alloc_continuation_from_first_page(&(heap[cpu_number]));
453 	if (fastpath(cont)) {
454 		return cont;
455 	}
456 #endif
457 	// Next, try the rest of the magazine for this CPU
458 	cont = alloc_continuation_from_magazine(&(heap[cpu_number]));
459 	return cont;
460 }
461 
462 DISPATCH_NOINLINE
463 static dispatch_continuation_t
_dispatch_alloc_continuation_from_heap_slow(void)464 _dispatch_alloc_continuation_from_heap_slow(void)
465 {
466 	dispatch_heap_t *heap = &_dispatch_main_heap;
467 	dispatch_continuation_t cont;
468 
469 	for (;;) {
470 		if (!fastpath(*heap)) {
471 			_dispatch_alloc_try_create_heap(heap);
472 		}
473 		cont = _dispatch_alloc_continuation_from_heap(*heap);
474 		if (fastpath(cont)) {
475 			return cont;
476 		}
477 		// If we have tuned our parameters right, 99.999% of apps should
478 		// never reach this point! The ones that do have gone off the rails...
479 		//
480 		// Magazine is full? Onto the next heap!
481 		// We tried 'stealing' from other CPUs' magazines. The net effect
482 		// was worse performance from more wasted search time and more
483 		// cache contention.
484 
485 		// rdar://11378331
486 		// Future optimization: start at the page we last used, start
487 		// in the *zone* we last used. But this would only improve deeply
488 		// pathological cases like dispatch_starfish
489 		heap = &(*heap)->header.dh_next;
490 	}
491 }
492 
493 DISPATCH_ALLOC_NOINLINE
494 static dispatch_continuation_t
_dispatch_alloc_continuation_alloc(void)495 _dispatch_alloc_continuation_alloc(void)
496 {
497 	dispatch_continuation_t cont;
498 
499 	if (fastpath(_dispatch_main_heap)) {
500 		// Start looking in the same page where we found a continuation
501 		// last time.
502 		bitmap_t *last = last_found_page();
503 		if (fastpath(last)) {
504 			unsigned int i;
505 			for (i = 0; i < BITMAPS_PER_PAGE; i++) {
506 				bitmap_t *cur = last + i;
507 				unsigned int index = bitmap_set_first_unset_bit(cur);
508 				if (fastpath(index != NO_BITS_WERE_UNSET)) {
509 					bitmap_t *supermap;
510 					unsigned int bindex;
511 					get_cont_and_indices_for_bitmap_and_index(cur,
512 							index, &cont, &supermap, &bindex);
513 					mark_bitmap_as_full_if_still_full(supermap, bindex,
514 							cur);
515 					return cont;
516 				}
517 			}
518 		}
519 
520 		cont = _dispatch_alloc_continuation_from_heap(_dispatch_main_heap);
521 		if (fastpath(cont)) {
522 			return cont;
523 		}
524 	}
525 	return _dispatch_alloc_continuation_from_heap_slow();
526 }
527 
528 #pragma mark -
529 #pragma mark dispatch_alloc_continuation_free
530 
531 DISPATCH_NOINLINE
532 static void
_dispatch_alloc_maybe_madvise_page(dispatch_continuation_t c)533 _dispatch_alloc_maybe_madvise_page(dispatch_continuation_t c)
534 {
535 	void *page = madvisable_page_base_for_continuation(c);
536 	if (!page) {
537 		// page can't be madvised; maybe it contains non-continuations
538 		return;
539 	}
540 	// Are all the continuations in this page unallocated?
541 	volatile bitmap_t *page_bitmaps;
542 	get_maps_and_indices_for_continuation((dispatch_continuation_t)page, NULL,
543 			NULL, (bitmap_t **)&page_bitmaps, NULL);
544 	unsigned int i;
545 	for (i = 0; i < BITMAPS_PER_PAGE; i++) {
546 		if (page_bitmaps[i] != 0) {
547 			return;
548 		}
549 	}
550 	// They are all unallocated, so we could madvise the page. Try to
551 	// take ownership of them all.
552 	int last_locked = 0;
553 	do {
554 		if (!dispatch_atomic_cmpxchg(&page_bitmaps[last_locked], BITMAP_C(0),
555 				BITMAP_ALL_ONES, relaxed)) {
556 			// We didn't get one; since there is a cont allocated in
557 			// the page, we can't madvise. Give up and unlock all.
558 			goto unlock;
559 		}
560 	} while (++last_locked < (signed)BITMAPS_PER_PAGE);
561 #if DISPATCH_DEBUG
562 	//fprintf(stderr, "%s: madvised page %p for cont %p (next = %p), "
563 	//		"[%u+1]=%u bitmaps at %p\n", __func__, page, c, c->do_next,
564 	//		last_locked-1, BITMAPS_PER_PAGE, &page_bitmaps[0]);
565 	// Scribble to expose use-after-free bugs
566 	// madvise (syscall) flushes these stores
567 	memset(page, DISPATCH_ALLOCATOR_SCRIBBLE, DISPATCH_ALLOCATOR_PAGE_SIZE);
568 #endif
569 	(void)dispatch_assume_zero(madvise(page, DISPATCH_ALLOCATOR_PAGE_SIZE,
570 			MADV_FREE));
571 
572 unlock:
573 	while (last_locked > 1) {
574 		page_bitmaps[--last_locked] = BITMAP_C(0);
575 	}
576 	if (last_locked) {
577 		dispatch_atomic_store(&page_bitmaps[0], BITMAP_C(0), relaxed);
578 	}
579 	return;
580 }
581 
582 DISPATCH_ALLOC_NOINLINE
583 static void
_dispatch_alloc_continuation_free(dispatch_continuation_t c)584 _dispatch_alloc_continuation_free(dispatch_continuation_t c)
585 {
586 	bitmap_t *b, *s;
587 	unsigned int b_idx, idx;
588 
589 	get_maps_and_indices_for_continuation(c, &s, &b_idx, &b, &idx);
590 	bool bitmap_now_empty = bitmap_clear_bit(b, idx, CLEAR_EXCLUSIVELY);
591 	if (slowpath(s)) {
592 		(void)bitmap_clear_bit(s, b_idx, CLEAR_NONEXCLUSIVELY);
593 	}
594 	// We only try to madvise(2) pages outside of the first page.
595 	// (Allocations in the first page do not have a supermap entry.)
596 	if (slowpath(bitmap_now_empty) && slowpath(s)) {
597 		return _dispatch_alloc_maybe_madvise_page(c);
598 	}
599 }
600 
601 #pragma mark -
602 #pragma mark dispatch_alloc_init
603 
604 #if DISPATCH_DEBUG
605 static void
_dispatch_alloc_init(void)606 _dispatch_alloc_init(void)
607 {
608 	// Double-check our math. These are all compile time checks and don't
609 	// generate code.
610 
611 	dispatch_assert(sizeof(bitmap_t) == BYTES_PER_BITMAP);
612 	dispatch_assert(sizeof(bitmap_t) == BYTES_PER_SUPERMAP);
613 	dispatch_assert(sizeof(struct dispatch_magazine_header_s) ==
614 			SIZEOF_HEADER);
615 
616 	dispatch_assert(sizeof(struct dispatch_continuation_s) <=
617 			DISPATCH_CONTINUATION_SIZE);
618 
619 	// Magazines should be the right size, so they pack neatly into an array of
620 	// heaps.
621 	dispatch_assert(sizeof(struct dispatch_magazine_s) == BYTES_PER_MAGAZINE);
622 
623 	// The header and maps sizes should match what we computed.
624 	dispatch_assert(SIZEOF_HEADER ==
625 			sizeof(((struct dispatch_magazine_s *)0x0)->header));
626 	dispatch_assert(SIZEOF_MAPS ==
627 			sizeof(((struct dispatch_magazine_s *)0x0)->maps));
628 
629 	// The main array of continuations should start at the second page,
630 	// self-aligned.
631 	dispatch_assert(offsetof(struct dispatch_magazine_s, conts) %
632 			(CONTINUATIONS_PER_BITMAP * DISPATCH_CONTINUATION_SIZE) == 0);
633 	dispatch_assert(offsetof(struct dispatch_magazine_s, conts) ==
634 			DISPATCH_ALLOCATOR_PAGE_SIZE);
635 
636 #if PACK_FIRST_PAGE_WITH_CONTINUATIONS
637 	// The continuations in the first page should actually fit within the first
638 	// page.
639 	dispatch_assert(offsetof(struct dispatch_magazine_s, fp_conts) <
640 			DISPATCH_ALLOCATOR_PAGE_SIZE);
641 	dispatch_assert(offsetof(struct dispatch_magazine_s, fp_conts) %
642 			DISPATCH_CONTINUATION_SIZE == 0);
643 	dispatch_assert(offsetof(struct dispatch_magazine_s, fp_conts) +
644 			sizeof(((struct dispatch_magazine_s *)0x0)->fp_conts) ==
645 					DISPATCH_ALLOCATOR_PAGE_SIZE);
646 #endif // PACK_FIRST_PAGE_WITH_CONTINUATIONS
647 }
648 #elif (DISPATCH_ALLOCATOR && DISPATCH_CONTINUATION_MALLOC) \
649 		|| (DISPATCH_CONTINUATION_MALLOC && DISPATCH_USE_MALLOCZONE)
_dispatch_alloc_init(void)650 static inline void _dispatch_alloc_init(void) {}
651 #endif
652 
653 #endif // DISPATCH_ALLOCATOR
654 
655 #pragma mark -
656 #pragma mark dispatch_malloc
657 
658 #if DISPATCH_CONTINUATION_MALLOC
659 
660 #if DISPATCH_USE_MALLOCZONE
661 static malloc_zone_t *_dispatch_ccache_zone;
662 
663 #define calloc(n, s) malloc_zone_calloc(_dispatch_ccache_zone, (n), (s))
664 #define free(c) malloc_zone_free(_dispatch_ccache_zone, (c))
665 
666 static void
_dispatch_malloc_init(void)667 _dispatch_malloc_init(void)
668 {
669 	_dispatch_ccache_zone = malloc_create_zone(0, 0);
670 	dispatch_assert(_dispatch_ccache_zone);
671 	malloc_set_zone_name(_dispatch_ccache_zone, "DispatchContinuations");
672 }
673 #else
_dispatch_malloc_init(void)674 static inline void _dispatch_malloc_init(void) {}
675 #endif // DISPATCH_USE_MALLOCZONE
676 
677 static dispatch_continuation_t
_dispatch_malloc_continuation_alloc(void)678 _dispatch_malloc_continuation_alloc(void)
679 {
680 	dispatch_continuation_t dc;
681 	while (!(dc = fastpath(calloc(1,
682 			ROUND_UP_TO_CACHELINE_SIZE(sizeof(*dc)))))) {
683 		_dispatch_temporary_resource_shortage();
684 	}
685 	return dc;
686 }
687 
688 static inline void
_dispatch_malloc_continuation_free(dispatch_continuation_t c)689 _dispatch_malloc_continuation_free(dispatch_continuation_t c)
690 {
691 	free(c);
692 }
693 #endif // DISPATCH_CONTINUATION_MALLOC
694 
695 #pragma mark -
696 #pragma mark dispatch_continuation_alloc
697 
698 #if DISPATCH_ALLOCATOR
699 #if DISPATCH_CONTINUATION_MALLOC
700 #if DISPATCH_USE_NANOZONE
701 extern boolean_t malloc_engaged_nano(void);
702 #else
703 #define malloc_engaged_nano() false
704 #endif // DISPATCH_USE_NANOZONE
705 static int _dispatch_use_dispatch_alloc;
706 #else
707 #define _dispatch_use_dispatch_alloc 1
708 #endif // DISPATCH_CONTINUATION_MALLOC
709 #endif // DISPATCH_ALLOCATOR
710 
711 #if (DISPATCH_ALLOCATOR && (DISPATCH_CONTINUATION_MALLOC || DISPATCH_DEBUG)) \
712 		|| (DISPATCH_CONTINUATION_MALLOC && DISPATCH_USE_MALLOCZONE)
713 static void
_dispatch_continuation_alloc_init(void * ctxt DISPATCH_UNUSED)714 _dispatch_continuation_alloc_init(void *ctxt DISPATCH_UNUSED)
715 {
716 #if DISPATCH_ALLOCATOR
717 #if DISPATCH_CONTINUATION_MALLOC
718 	bool use_dispatch_alloc = !malloc_engaged_nano();
719 	char *e = getenv("LIBDISPATCH_CONTINUATION_ALLOCATOR");
720 	if (e) {
721 		use_dispatch_alloc = atoi(e);
722 	}
723 	_dispatch_use_dispatch_alloc = use_dispatch_alloc;
724 #endif // DISPATCH_CONTINUATION_MALLOC
725 	if (_dispatch_use_dispatch_alloc)
726 		return _dispatch_alloc_init();
727 #endif // DISPATCH_ALLOCATOR
728 #if DISPATCH_CONTINUATION_MALLOC
729 	return _dispatch_malloc_init();
730 #endif // DISPATCH_ALLOCATOR
731 }
732 
733 static void
_dispatch_continuation_alloc_once()734 _dispatch_continuation_alloc_once()
735 {
736 	static dispatch_once_t pred;
737 	dispatch_once_f(&pred, NULL, _dispatch_continuation_alloc_init);
738 }
739 #else
_dispatch_continuation_alloc_once(void)740 static inline void _dispatch_continuation_alloc_once(void) {}
741 #endif // DISPATCH_ALLOCATOR ... || DISPATCH_CONTINUATION_MALLOC ...
742 
743 dispatch_continuation_t
_dispatch_continuation_alloc_from_heap(void)744 _dispatch_continuation_alloc_from_heap(void)
745 {
746 	_dispatch_continuation_alloc_once();
747 #if DISPATCH_ALLOCATOR
748 	if (_dispatch_use_dispatch_alloc)
749 		return _dispatch_alloc_continuation_alloc();
750 #endif
751 #if DISPATCH_CONTINUATION_MALLOC
752 	return _dispatch_malloc_continuation_alloc();
753 #endif
754 }
755 
756 void
_dispatch_continuation_free_to_heap(dispatch_continuation_t c)757 _dispatch_continuation_free_to_heap(dispatch_continuation_t c)
758 {
759 #if DISPATCH_ALLOCATOR
760 	if (_dispatch_use_dispatch_alloc)
761 		return _dispatch_alloc_continuation_free(c);
762 #endif
763 #if DISPATCH_CONTINUATION_MALLOC
764 	return _dispatch_malloc_continuation_free(c);
765 #endif
766 }
767 
768