1 |
/*- |
2 |
* Copyright (c) 2001 |
3 |
* John Baldwin <jhb@FreeBSD.org>. All rights reserved. |
4 |
* |
5 |
* Redistribution and use in source and binary forms, with or without |
6 |
* modification, are permitted provided that the following conditions |
7 |
* are met: |
8 |
* 1. Redistributions of source code must retain the above copyright |
9 |
* notice, this list of conditions and the following disclaimer. |
10 |
* 2. Redistributions in binary form must reproduce the above copyright |
11 |
* notice, this list of conditions and the following disclaimer in the |
12 |
* documentation and/or other materials provided with the distribution. |
13 |
* 4. Neither the name of the author nor the names of any co-contributors |
14 |
* may be used to endorse or promote products derived from this software |
15 |
* without specific prior written permission. |
16 |
* |
17 |
* THIS SOFTWARE IS PROVIDED BY JOHN BALDWIN AND CONTRIBUTORS ``AS IS'' AND |
18 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 |
* ARE DISCLAIMED. IN NO EVENT SHALL JOHN BALDWIN OR THE VOICES IN HIS HEAD |
21 |
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
22 |
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
23 |
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
24 |
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
25 |
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
26 |
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
27 |
* THE POSSIBILITY OF SUCH DAMAGE. |
28 |
*/ |
29 |
|
30 |
/* |
31 |
* This module holds the global variables and machine independent functions |
32 |
* used for the kernel SMP support. |
33 |
*/ |
34 |
|
35 |
#include <sys/cdefs.h> |
36 |
__FBSDID("$FreeBSD: src/sys/kern/subr_smp.c,v 1.196 2005/06/30 03:38:10 peter Exp $"); |
37 |
|
38 |
#include "opt_kdb.h" |
39 |
|
40 |
#include <sys/param.h> |
41 |
#include <sys/systm.h> |
42 |
#include <sys/kernel.h> |
43 |
#include <sys/ktr.h> |
44 |
#include <sys/proc.h> |
45 |
#include <sys/bus.h> |
46 |
#include <sys/lock.h> |
47 |
#include <sys/mutex.h> |
48 |
#include <sys/pcpu.h> |
49 |
#include <sys/smp.h> |
50 |
#include <sys/sysctl.h> |
51 |
|
52 |
#include <machine/smp.h> |
53 |
|
54 |
#include "opt_sched.h" |
55 |
|
56 |
#ifdef SMP |
57 |
volatile cpumask_t stopped_cpus; |
58 |
volatile cpumask_t started_cpus; |
59 |
cpumask_t idle_cpus_mask; |
60 |
cpumask_t hlt_cpus_mask; |
61 |
cpumask_t logical_cpus_mask; |
62 |
|
63 |
void (*cpustop_restartfunc)(void); |
64 |
#endif |
65 |
/* This is used in modules that need to work in both SMP and UP. */ |
66 |
cpumask_t all_cpus; |
67 |
|
68 |
int mp_ncpus; |
69 |
/* export this for libkvm consumers. */ |
70 |
int mp_maxcpus = MAXCPU; |
71 |
|
72 |
struct cpu_top *smp_topology; |
73 |
volatile int smp_started; |
74 |
u_int mp_maxid; |
75 |
|
76 |
SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD, NULL, "Kernel SMP"); |
77 |
|
78 |
SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD, &mp_maxcpus, 0, |
79 |
"Max number of CPUs that the system was compiled for."); |
80 |
|
81 |
int smp_active = 0; /* are the APs allowed to run? */ |
82 |
SYSCTL_INT(_kern_smp, OID_AUTO, active, CTLFLAG_RW, &smp_active, 0, |
83 |
"Number of Auxillary Processors (APs) that were successfully started"); |
84 |
|
85 |
int smp_disabled = 0; /* has smp been disabled? */ |
86 |
SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN, &smp_disabled, 0, |
87 |
"SMP has been disabled from the loader"); |
88 |
TUNABLE_INT("kern.smp.disabled", &smp_disabled); |
89 |
|
90 |
int smp_cpus = 1; /* how many cpu's running */ |
91 |
SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD, &smp_cpus, 0, |
92 |
"Number of CPUs online"); |
93 |
|
94 |
#ifdef SMP |
95 |
/* Enable forwarding of a signal to a process running on a different CPU */ |
96 |
static int forward_signal_enabled = 1; |
97 |
SYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW, |
98 |
&forward_signal_enabled, 0, |
99 |
"Forwarding of a signal to a process on a different CPU"); |
100 |
|
101 |
/* Enable forwarding of roundrobin to all other cpus */ |
102 |
static int forward_roundrobin_enabled = 1; |
103 |
SYSCTL_INT(_kern_smp, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, |
104 |
&forward_roundrobin_enabled, 0, |
105 |
"Forwarding of roundrobin to all other CPUs"); |
106 |
|
107 |
/* Variables needed for SMP rendezvous. */ |
108 |
static void (*smp_rv_setup_func)(void *arg); |
109 |
static void (*smp_rv_action_func)(void *arg); |
110 |
static void (*smp_rv_teardown_func)(void *arg); |
111 |
static void *smp_rv_func_arg; |
112 |
static volatile int smp_rv_waiters[2]; |
113 |
|
114 |
/* |
115 |
* Shared mutex to restrict busywaits between smp_rendezvous() and |
116 |
* smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these |
117 |
* functions trigger at once and cause multiple CPUs to busywait with |
118 |
* interrupts disabled. |
119 |
*/ |
120 |
struct mtx smp_ipi_mtx; |
121 |
|
122 |
/* |
123 |
* Let the MD SMP code initialize mp_maxid very early if it can. |
124 |
*/ |
125 |
static void |
126 |
mp_setmaxid(void *dummy) |
127 |
{ |
128 |
cpu_mp_setmaxid(); |
129 |
} |
130 |
SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL) |
131 |
|
132 |
/* |
133 |
* Call the MD SMP initialization code. |
134 |
*/ |
135 |
static void |
136 |
mp_start(void *dummy) |
137 |
{ |
138 |
|
139 |
/* Probe for MP hardware. */ |
140 |
if (smp_disabled != 0 || cpu_mp_probe() == 0) { |
141 |
mp_ncpus = 1; |
142 |
all_cpus = PCPU_GET(cpumask); |
143 |
return; |
144 |
} |
145 |
|
146 |
mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN); |
147 |
cpu_mp_start(); |
148 |
printf("MidnightBSD/SMP: Multiprocessor System Detected: %d CPUs\n", |
149 |
mp_ncpus); |
150 |
cpu_mp_announce(); |
151 |
} |
152 |
SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_SECOND, mp_start, NULL) |
153 |
|
154 |
void |
155 |
forward_signal(struct thread *td) |
156 |
{ |
157 |
int id; |
158 |
|
159 |
/* |
160 |
* signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on |
161 |
* this thread, so all we need to do is poke it if it is currently |
162 |
* executing so that it executes ast(). |
163 |
*/ |
164 |
mtx_assert(&sched_lock, MA_OWNED); |
165 |
KASSERT(TD_IS_RUNNING(td), |
166 |
("forward_signal: thread is not TDS_RUNNING")); |
167 |
|
168 |
CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc); |
169 |
|
170 |
if (!smp_started || cold || panicstr) |
171 |
return; |
172 |
if (!forward_signal_enabled) |
173 |
return; |
174 |
|
175 |
/* No need to IPI ourself. */ |
176 |
if (td == curthread) |
177 |
return; |
178 |
|
179 |
id = td->td_oncpu; |
180 |
if (id == NOCPU) |
181 |
return; |
182 |
ipi_selected(1 << id, IPI_AST); |
183 |
} |
184 |
|
185 |
void |
186 |
forward_roundrobin(void) |
187 |
{ |
188 |
struct pcpu *pc; |
189 |
struct thread *td; |
190 |
cpumask_t id, map, me; |
191 |
|
192 |
mtx_assert(&sched_lock, MA_OWNED); |
193 |
|
194 |
CTR0(KTR_SMP, "forward_roundrobin()"); |
195 |
|
196 |
if (!smp_started || cold || panicstr) |
197 |
return; |
198 |
if (!forward_roundrobin_enabled) |
199 |
return; |
200 |
map = 0; |
201 |
me = PCPU_GET(cpumask); |
202 |
SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { |
203 |
td = pc->pc_curthread; |
204 |
id = pc->pc_cpumask; |
205 |
if (id != me && (id & stopped_cpus) == 0 && |
206 |
td != pc->pc_idlethread) { |
207 |
td->td_flags |= TDF_NEEDRESCHED; |
208 |
map |= id; |
209 |
} |
210 |
} |
211 |
ipi_selected(map, IPI_AST); |
212 |
} |
213 |
|
214 |
/* |
215 |
* When called the executing CPU will send an IPI to all other CPUs |
216 |
* requesting that they halt execution. |
217 |
* |
218 |
* Usually (but not necessarily) called with 'other_cpus' as its arg. |
219 |
* |
220 |
* - Signals all CPUs in map to stop. |
221 |
* - Waits for each to stop. |
222 |
* |
223 |
* Returns: |
224 |
* -1: error |
225 |
* 0: NA |
226 |
* 1: ok |
227 |
* |
228 |
* XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs |
229 |
* from executing at same time. |
230 |
*/ |
231 |
int |
232 |
stop_cpus(cpumask_t map) |
233 |
{ |
234 |
int i; |
235 |
|
236 |
if (!smp_started) |
237 |
return 0; |
238 |
|
239 |
CTR1(KTR_SMP, "stop_cpus(%x)", map); |
240 |
|
241 |
/* send the stop IPI to all CPUs in map */ |
242 |
ipi_selected(map, IPI_STOP); |
243 |
|
244 |
i = 0; |
245 |
while ((atomic_load_acq_int(&stopped_cpus) & map) != map) { |
246 |
/* spin */ |
247 |
i++; |
248 |
#ifdef DIAGNOSTIC |
249 |
if (i == 100000) { |
250 |
printf("timeout stopping cpus\n"); |
251 |
break; |
252 |
} |
253 |
#endif |
254 |
} |
255 |
|
256 |
return 1; |
257 |
} |
258 |
|
259 |
#ifdef KDB_STOP_NMI |
260 |
int |
261 |
stop_cpus_nmi(cpumask_t map) |
262 |
{ |
263 |
int i; |
264 |
|
265 |
if (!smp_started) |
266 |
return 0; |
267 |
|
268 |
CTR1(KTR_SMP, "stop_cpus(%x)", map); |
269 |
|
270 |
/* send the stop IPI to all CPUs in map */ |
271 |
ipi_nmi_selected(map); |
272 |
|
273 |
i = 0; |
274 |
while ((atomic_load_acq_int(&stopped_cpus) & map) != map) { |
275 |
/* spin */ |
276 |
i++; |
277 |
#ifdef DIAGNOSTIC |
278 |
if (i == 100000) { |
279 |
printf("timeout stopping cpus\n"); |
280 |
break; |
281 |
} |
282 |
#endif |
283 |
} |
284 |
|
285 |
return 1; |
286 |
} |
287 |
#endif /* KDB_STOP_NMI */ |
288 |
|
289 |
/* |
290 |
* Called by a CPU to restart stopped CPUs. |
291 |
* |
292 |
* Usually (but not necessarily) called with 'stopped_cpus' as its arg. |
293 |
* |
294 |
* - Signals all CPUs in map to restart. |
295 |
* - Waits for each to restart. |
296 |
* |
297 |
* Returns: |
298 |
* -1: error |
299 |
* 0: NA |
300 |
* 1: ok |
301 |
*/ |
302 |
int |
303 |
restart_cpus(cpumask_t map) |
304 |
{ |
305 |
|
306 |
if (!smp_started) |
307 |
return 0; |
308 |
|
309 |
CTR1(KTR_SMP, "restart_cpus(%x)", map); |
310 |
|
311 |
/* signal other cpus to restart */ |
312 |
atomic_store_rel_int(&started_cpus, map); |
313 |
|
314 |
/* wait for each to clear its bit */ |
315 |
while ((atomic_load_acq_int(&stopped_cpus) & map) != 0) |
316 |
; /* nothing */ |
317 |
|
318 |
return 1; |
319 |
} |
320 |
|
321 |
/* |
322 |
* All-CPU rendezvous. CPUs are signalled, all execute the setup function |
323 |
* (if specified), rendezvous, execute the action function (if specified), |
324 |
* rendezvous again, execute the teardown function (if specified), and then |
325 |
* resume. |
326 |
* |
327 |
* Note that the supplied external functions _must_ be reentrant and aware |
328 |
* that they are running in parallel and in an unknown lock context. |
329 |
*/ |
330 |
void |
331 |
smp_rendezvous_action(void) |
332 |
{ |
333 |
|
334 |
/* setup function */ |
335 |
if (smp_rv_setup_func != NULL) |
336 |
smp_rv_setup_func(smp_rv_func_arg); |
337 |
/* spin on entry rendezvous */ |
338 |
atomic_add_int(&smp_rv_waiters[0], 1); |
339 |
while (atomic_load_acq_int(&smp_rv_waiters[0]) < mp_ncpus) |
340 |
; /* nothing */ |
341 |
/* action function */ |
342 |
if (smp_rv_action_func != NULL) |
343 |
smp_rv_action_func(smp_rv_func_arg); |
344 |
/* spin on exit rendezvous */ |
345 |
atomic_add_int(&smp_rv_waiters[1], 1); |
346 |
while (atomic_load_acq_int(&smp_rv_waiters[1]) < mp_ncpus) |
347 |
; /* nothing */ |
348 |
/* teardown function */ |
349 |
if (smp_rv_teardown_func != NULL) |
350 |
smp_rv_teardown_func(smp_rv_func_arg); |
351 |
} |
352 |
|
353 |
void |
354 |
smp_rendezvous(void (* setup_func)(void *), |
355 |
void (* action_func)(void *), |
356 |
void (* teardown_func)(void *), |
357 |
void *arg) |
358 |
{ |
359 |
|
360 |
if (!smp_started) { |
361 |
if (setup_func != NULL) |
362 |
setup_func(arg); |
363 |
if (action_func != NULL) |
364 |
action_func(arg); |
365 |
if (teardown_func != NULL) |
366 |
teardown_func(arg); |
367 |
return; |
368 |
} |
369 |
|
370 |
/* obtain rendezvous lock */ |
371 |
mtx_lock_spin(&smp_ipi_mtx); |
372 |
|
373 |
/* set static function pointers */ |
374 |
smp_rv_setup_func = setup_func; |
375 |
smp_rv_action_func = action_func; |
376 |
smp_rv_teardown_func = teardown_func; |
377 |
smp_rv_func_arg = arg; |
378 |
smp_rv_waiters[0] = 0; |
379 |
smp_rv_waiters[1] = 0; |
380 |
|
381 |
/* signal other processors, which will enter the IPI with interrupts off */ |
382 |
ipi_all_but_self(IPI_RENDEZVOUS); |
383 |
|
384 |
/* call executor function */ |
385 |
smp_rendezvous_action(); |
386 |
|
387 |
/* release lock */ |
388 |
mtx_unlock_spin(&smp_ipi_mtx); |
389 |
} |
390 |
#else /* !SMP */ |
391 |
|
392 |
/* |
393 |
* Provide dummy SMP support for UP kernels. Modules that need to use SMP |
394 |
* APIs will still work using this dummy support. |
395 |
*/ |
396 |
static void |
397 |
mp_setvariables_for_up(void *dummy) |
398 |
{ |
399 |
mp_ncpus = 1; |
400 |
mp_maxid = PCPU_GET(cpuid); |
401 |
all_cpus = PCPU_GET(cpumask); |
402 |
KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero")); |
403 |
} |
404 |
SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST, |
405 |
mp_setvariables_for_up, NULL) |
406 |
|
407 |
void |
408 |
smp_rendezvous(void (* setup_func)(void *), |
409 |
void (* action_func)(void *), |
410 |
void (* teardown_func)(void *), |
411 |
void *arg) |
412 |
{ |
413 |
|
414 |
if (setup_func != NULL) |
415 |
setup_func(arg); |
416 |
if (action_func != NULL) |
417 |
action_func(arg); |
418 |
if (teardown_func != NULL) |
419 |
teardown_func(arg); |
420 |
} |
421 |
#endif /* SMP */ |