Print this page
5042 stop using deprecated atomic functions
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/x_call.c
+++ new/usr/src/uts/i86pc/os/x_call.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright (c) 2010, Intel Corporation.
27 27 * All rights reserved.
28 28 */
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/param.h>
32 32 #include <sys/t_lock.h>
33 33 #include <sys/thread.h>
34 34 #include <sys/cpuvar.h>
35 35 #include <sys/x_call.h>
36 36 #include <sys/xc_levels.h>
37 37 #include <sys/cpu.h>
38 38 #include <sys/psw.h>
39 39 #include <sys/sunddi.h>
40 40 #include <sys/debug.h>
41 41 #include <sys/systm.h>
42 42 #include <sys/archsystm.h>
43 43 #include <sys/machsystm.h>
↓ open down ↓ |
43 lines elided |
↑ open up ↑ |
44 44 #include <sys/mutex_impl.h>
45 45 #include <sys/stack.h>
46 46 #include <sys/promif.h>
47 47 #include <sys/x86_archext.h>
48 48
49 49 /*
50 50 * Implementation for cross-processor calls via interprocessor interrupts
51 51 *
52 52 * This implementation uses a message passing architecture to allow multiple
53 53 * concurrent cross calls to be in flight at any given time. We use the cmpxchg
54 - * instruction, aka casptr(), to implement simple efficient work queues for
55 - * message passing between CPUs with almost no need for regular locking.
56 - * See xc_extract() and xc_insert() below.
54 + * instruction, aka atomic_cas_ptr(), to implement simple efficient work
55 + * queues for message passing between CPUs with almost no need for regular
56 + * locking. See xc_extract() and xc_insert() below.
57 57 *
58 58 * The general idea is that initiating a cross call means putting a message
59 59 * on a target(s) CPU's work queue. Any synchronization is handled by passing
60 60 * the message back and forth between initiator and target(s).
61 61 *
62 62 * Every CPU has xc_work_cnt, which indicates it has messages to process.
63 63 * This value is incremented as message traffic is initiated and decremented
64 64 * with every message that finishes all processing.
65 65 *
66 66 * The code needs no mfence or other membar_*() calls. The uses of
67 - * casptr(), cas32() and atomic_dec_32() for the message passing are
68 - * implemented with LOCK prefix instructions which are equivalent to mfence.
67 + * atomic_cas_ptr(), atomic_cas_32() and atomic_dec_32() for the message
68 + * passing are implemented with LOCK prefix instructions which are
69 + * equivalent to mfence.
69 70 *
70 71 * One interesting aspect of this implmentation is that it allows 2 or more
71 72 * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
72 73 * The cross call processing by the CPUs will happen in any order with only
73 74 * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
74 75 * from cross calls before all slaves have invoked the function.
75 76 *
76 77 * The reason for this asynchronous approach is to allow for fast global
77 78 * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
78 79 * on a different Virtual Address at the same time. The old code required
79 80 * N squared IPIs. With this method, depending on timing, it could happen
80 81 * with just N IPIs.
81 82 */
82 83
83 84 /*
84 85 * The default is to not enable collecting counts of IPI information, since
85 86 * the updating of shared cachelines could cause excess bus traffic.
86 87 */
87 88 uint_t xc_collect_enable = 0;
88 89 uint64_t xc_total_cnt = 0; /* total #IPIs sent for cross calls */
89 90 uint64_t xc_multi_cnt = 0; /* # times we piggy backed on another IPI */
90 91
91 92 /*
92 93 * Values for message states. Here are the normal transitions. A transition
93 94 * of "->" happens in the slave cpu and "=>" happens in the master cpu as
94 95 * the messages are passed back and forth.
95 96 *
96 97 * FREE => ASYNC -> DONE => FREE
97 98 * FREE => CALL -> DONE => FREE
98 99 * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
99 100 *
100 101 * The interesing one above is ASYNC. You might ask, why not go directly
101 102 * to FREE, instead of DONE. If it did that, it might be possible to exhaust
102 103 * the master's xc_free list if a master can generate ASYNC messages faster
103 104 * then the slave can process them. That could be handled with more complicated
104 105 * handling. However since nothing important uses ASYNC, I've not bothered.
105 106 */
106 107 #define XC_MSG_FREE (0) /* msg in xc_free queue */
107 108 #define XC_MSG_ASYNC (1) /* msg in slave xc_msgbox */
108 109 #define XC_MSG_CALL (2) /* msg in slave xc_msgbox */
109 110 #define XC_MSG_SYNC (3) /* msg in slave xc_msgbox */
110 111 #define XC_MSG_WAITING (4) /* msg in master xc_msgbox or xc_waiters */
111 112 #define XC_MSG_RELEASED (5) /* msg in slave xc_msgbox */
112 113 #define XC_MSG_DONE (6) /* msg in master xc_msgbox */
113 114
114 115 /*
115 116 * We allow for one high priority message at a time to happen in the system.
116 117 * This is used for panic, kmdb, etc., so no locking is done.
117 118 */
118 119 static volatile cpuset_t xc_priority_set_store;
119 120 static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
120 121 static xc_data_t xc_priority_data;
121 122
122 123 /*
123 124 * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
124 125 * operations don't accept volatile bit vectors - which is a bit silly.
125 126 */
126 127 #define XC_BT_SET(vector, b) BT_ATOMIC_SET((ulong_t *)(vector), (b))
127 128 #define XC_BT_CLEAR(vector, b) BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
128 129
129 130 /*
130 131 * Decrement a CPU's work count
131 132 */
132 133 static void
133 134 xc_decrement(struct machcpu *mcpu)
134 135 {
135 136 atomic_dec_32(&mcpu->xc_work_cnt);
136 137 }
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
137 138
138 139 /*
139 140 * Increment a CPU's work count and return the old value
140 141 */
141 142 static int
142 143 xc_increment(struct machcpu *mcpu)
143 144 {
144 145 int old;
145 146 do {
146 147 old = mcpu->xc_work_cnt;
147 - } while (cas32((uint32_t *)&mcpu->xc_work_cnt, old, old + 1) != old);
148 + } while (atomic_cas_32(&mcpu->xc_work_cnt, old, old + 1) != old);
148 149 return (old);
149 150 }
150 151
151 152 /*
152 153 * Put a message into a queue. The insertion is atomic no matter
153 154 * how many different inserts/extracts to the same queue happen.
154 155 */
155 156 static void
156 157 xc_insert(void *queue, xc_msg_t *msg)
157 158 {
158 159 xc_msg_t *old_head;
159 160
160 161 /*
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
161 162 * FREE messages should only ever be getting inserted into
162 163 * the xc_master CPUs xc_free queue.
163 164 */
164 165 ASSERT(msg->xc_command != XC_MSG_FREE ||
165 166 cpu[msg->xc_master] == NULL || /* possible only during init */
166 167 queue == &cpu[msg->xc_master]->cpu_m.xc_free);
167 168
168 169 do {
169 170 old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
170 171 msg->xc_next = old_head;
171 - } while (casptr(queue, old_head, msg) != old_head);
172 + } while (atomic_cas_ptr(queue, old_head, msg) != old_head);
172 173 }
173 174
174 175 /*
175 176 * Extract a message from a queue. The extraction is atomic only
176 177 * when just one thread does extractions from the queue.
177 178 * If the queue is empty, NULL is returned.
178 179 */
179 180 static xc_msg_t *
180 181 xc_extract(xc_msg_t **queue)
181 182 {
182 183 xc_msg_t *old_head;
183 184
184 185 do {
185 186 old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
186 187 if (old_head == NULL)
187 188 return (old_head);
188 - } while (casptr(queue, old_head, old_head->xc_next) != old_head);
189 + } while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
190 + old_head);
189 191 old_head->xc_next = NULL;
190 192 return (old_head);
191 193 }
192 194
193 195 /*
194 196 * Initialize the machcpu fields used for cross calls
195 197 */
196 198 static uint_t xc_initialized = 0;
197 199
198 200 void
199 201 xc_init_cpu(struct cpu *cpup)
200 202 {
201 203 xc_msg_t *msg;
202 204 int c;
203 205
204 206 /*
205 207 * Allocate message buffers for the new CPU.
206 208 */
207 209 for (c = 0; c < max_ncpus; ++c) {
208 210 if (plat_dr_support_cpu()) {
209 211 /*
210 212 * Allocate a message buffer for every CPU possible
211 213 * in system, including our own, and add them to our xc
212 214 * message queue.
213 215 */
214 216 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
215 217 msg->xc_command = XC_MSG_FREE;
216 218 msg->xc_master = cpup->cpu_id;
217 219 xc_insert(&cpup->cpu_m.xc_free, msg);
218 220 } else if (cpu[c] != NULL && cpu[c] != cpup) {
219 221 /*
220 222 * Add a new message buffer to each existing CPU's free
221 223 * list, as well as one for my list for each of them.
222 224 * Note: cpu0 is statically inserted into cpu[] array,
223 225 * so need to check cpu[c] isn't cpup itself to avoid
224 226 * allocating extra message buffers for cpu0.
225 227 */
226 228 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
227 229 msg->xc_command = XC_MSG_FREE;
228 230 msg->xc_master = c;
229 231 xc_insert(&cpu[c]->cpu_m.xc_free, msg);
230 232
231 233 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
232 234 msg->xc_command = XC_MSG_FREE;
233 235 msg->xc_master = cpup->cpu_id;
234 236 xc_insert(&cpup->cpu_m.xc_free, msg);
235 237 }
236 238 }
237 239
238 240 if (!plat_dr_support_cpu()) {
239 241 /*
240 242 * Add one for self messages if CPU hotplug is disabled.
241 243 */
242 244 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
243 245 msg->xc_command = XC_MSG_FREE;
244 246 msg->xc_master = cpup->cpu_id;
245 247 xc_insert(&cpup->cpu_m.xc_free, msg);
246 248 }
247 249
248 250 if (!xc_initialized)
249 251 xc_initialized = 1;
250 252 }
251 253
252 254 void
253 255 xc_fini_cpu(struct cpu *cpup)
254 256 {
255 257 xc_msg_t *msg;
256 258
257 259 ASSERT((cpup->cpu_flags & CPU_READY) == 0);
258 260 ASSERT(cpup->cpu_m.xc_msgbox == NULL);
259 261 ASSERT(cpup->cpu_m.xc_work_cnt == 0);
260 262
261 263 while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
262 264 kmem_free(msg, sizeof (*msg));
263 265 }
264 266 }
265 267
266 268 #define XC_FLUSH_MAX_WAITS 1000
267 269
268 270 /* Flush inflight message buffers. */
269 271 int
270 272 xc_flush_cpu(struct cpu *cpup)
271 273 {
272 274 int i;
273 275
274 276 ASSERT((cpup->cpu_flags & CPU_READY) == 0);
275 277
276 278 /*
277 279 * Pause all working CPUs, which ensures that there's no CPU in
278 280 * function xc_common().
279 281 * This is used to work around a race condition window in xc_common()
280 282 * between checking CPU_READY flag and increasing working item count.
281 283 */
282 284 pause_cpus(cpup);
283 285 start_cpus();
284 286
285 287 for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
286 288 if (cpup->cpu_m.xc_work_cnt == 0) {
287 289 break;
288 290 }
289 291 DELAY(1);
290 292 }
291 293 for (; i < XC_FLUSH_MAX_WAITS; i++) {
292 294 if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
293 295 break;
294 296 }
295 297 DELAY(1);
296 298 }
297 299
298 300 return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
299 301 }
300 302
301 303 /*
302 304 * X-call message processing routine. Note that this is used by both
303 305 * senders and recipients of messages.
304 306 *
305 307 * We're protected against changing CPUs by either being in a high-priority
306 308 * interrupt, having preemption disabled or by having a raised SPL.
307 309 */
308 310 /*ARGSUSED*/
309 311 uint_t
310 312 xc_serv(caddr_t arg1, caddr_t arg2)
311 313 {
312 314 struct machcpu *mcpup = &(CPU->cpu_m);
313 315 xc_msg_t *msg;
314 316 xc_data_t *data;
315 317 xc_msg_t *xc_waiters = NULL;
316 318 uint32_t num_waiting = 0;
317 319 xc_func_t func;
318 320 xc_arg_t a1;
319 321 xc_arg_t a2;
320 322 xc_arg_t a3;
321 323 uint_t rc = DDI_INTR_UNCLAIMED;
322 324
323 325 while (mcpup->xc_work_cnt != 0) {
324 326 rc = DDI_INTR_CLAIMED;
325 327
326 328 /*
327 329 * We may have to wait for a message to arrive.
328 330 */
329 331 for (msg = NULL; msg == NULL;
330 332 msg = xc_extract(&mcpup->xc_msgbox)) {
331 333
332 334 /*
333 335 * Alway check for and handle a priority message.
334 336 */
335 337 if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
336 338 func = xc_priority_data.xc_func;
337 339 a1 = xc_priority_data.xc_a1;
338 340 a2 = xc_priority_data.xc_a2;
339 341 a3 = xc_priority_data.xc_a3;
340 342 XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
341 343 xc_decrement(mcpup);
342 344 func(a1, a2, a3);
343 345 if (mcpup->xc_work_cnt == 0)
344 346 return (rc);
345 347 }
346 348
347 349 /*
348 350 * wait for a message to arrive
349 351 */
350 352 SMT_PAUSE();
351 353 }
352 354
353 355
354 356 /*
355 357 * process the message
356 358 */
357 359 switch (msg->xc_command) {
358 360
359 361 /*
360 362 * ASYNC gives back the message immediately, then we do the
361 363 * function and return with no more waiting.
362 364 */
363 365 case XC_MSG_ASYNC:
364 366 data = &cpu[msg->xc_master]->cpu_m.xc_data;
365 367 func = data->xc_func;
366 368 a1 = data->xc_a1;
367 369 a2 = data->xc_a2;
368 370 a3 = data->xc_a3;
369 371 msg->xc_command = XC_MSG_DONE;
370 372 xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
371 373 if (func != NULL)
372 374 (void) (*func)(a1, a2, a3);
373 375 xc_decrement(mcpup);
374 376 break;
375 377
376 378 /*
377 379 * SYNC messages do the call, then send it back to the master
378 380 * in WAITING mode
379 381 */
380 382 case XC_MSG_SYNC:
381 383 data = &cpu[msg->xc_master]->cpu_m.xc_data;
382 384 if (data->xc_func != NULL)
383 385 (void) (*data->xc_func)(data->xc_a1,
384 386 data->xc_a2, data->xc_a3);
385 387 msg->xc_command = XC_MSG_WAITING;
386 388 xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
387 389 break;
388 390
389 391 /*
390 392 * WAITING messsages are collected by the master until all
391 393 * have arrived. Once all arrive, we release them back to
392 394 * the slaves
393 395 */
394 396 case XC_MSG_WAITING:
395 397 xc_insert(&xc_waiters, msg);
396 398 if (++num_waiting < mcpup->xc_wait_cnt)
397 399 break;
398 400 while ((msg = xc_extract(&xc_waiters)) != NULL) {
399 401 msg->xc_command = XC_MSG_RELEASED;
400 402 xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
401 403 msg);
402 404 --num_waiting;
403 405 }
404 406 if (num_waiting != 0)
405 407 panic("wrong number waiting");
406 408 mcpup->xc_wait_cnt = 0;
407 409 break;
408 410
409 411 /*
410 412 * CALL messages do the function and then, like RELEASE,
411 413 * send the message is back to master as DONE.
412 414 */
413 415 case XC_MSG_CALL:
414 416 data = &cpu[msg->xc_master]->cpu_m.xc_data;
415 417 if (data->xc_func != NULL)
416 418 (void) (*data->xc_func)(data->xc_a1,
417 419 data->xc_a2, data->xc_a3);
418 420 /*FALLTHROUGH*/
419 421 case XC_MSG_RELEASED:
420 422 msg->xc_command = XC_MSG_DONE;
421 423 xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
422 424 xc_decrement(mcpup);
423 425 break;
424 426
425 427 /*
426 428 * DONE means a slave has completely finished up.
427 429 * Once we collect all the DONE messages, we'll exit
428 430 * processing too.
429 431 */
430 432 case XC_MSG_DONE:
431 433 msg->xc_command = XC_MSG_FREE;
432 434 xc_insert(&mcpup->xc_free, msg);
433 435 xc_decrement(mcpup);
434 436 break;
435 437
436 438 case XC_MSG_FREE:
437 439 panic("free message 0x%p in msgbox", (void *)msg);
438 440 break;
439 441
440 442 default:
441 443 panic("bad message 0x%p in msgbox", (void *)msg);
442 444 break;
443 445 }
444 446 }
445 447 return (rc);
446 448 }
447 449
448 450 /*
449 451 * Initiate cross call processing.
450 452 */
451 453 static void
452 454 xc_common(
453 455 xc_func_t func,
454 456 xc_arg_t arg1,
455 457 xc_arg_t arg2,
456 458 xc_arg_t arg3,
457 459 ulong_t *set,
458 460 uint_t command)
459 461 {
460 462 int c;
461 463 struct cpu *cpup;
462 464 xc_msg_t *msg;
463 465 xc_data_t *data;
464 466 int cnt;
465 467 int save_spl;
466 468
467 469 if (!xc_initialized) {
468 470 if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
469 471 func != NULL)
470 472 (void) (*func)(arg1, arg2, arg3);
471 473 return;
472 474 }
473 475
474 476 save_spl = splr(ipltospl(XC_HI_PIL));
475 477
476 478 /*
477 479 * fill in cross call data
478 480 */
479 481 data = &CPU->cpu_m.xc_data;
480 482 data->xc_func = func;
481 483 data->xc_a1 = arg1;
482 484 data->xc_a2 = arg2;
483 485 data->xc_a3 = arg3;
484 486
485 487 /*
486 488 * Post messages to all CPUs involved that are CPU_READY
487 489 */
488 490 CPU->cpu_m.xc_wait_cnt = 0;
489 491 for (c = 0; c < max_ncpus; ++c) {
490 492 if (!BT_TEST(set, c))
491 493 continue;
492 494 cpup = cpu[c];
493 495 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
494 496 continue;
495 497
496 498 /*
497 499 * Fill out a new message.
498 500 */
499 501 msg = xc_extract(&CPU->cpu_m.xc_free);
500 502 if (msg == NULL)
501 503 panic("Ran out of free xc_msg_t's");
502 504 msg->xc_command = command;
503 505 if (msg->xc_master != CPU->cpu_id)
504 506 panic("msg %p has wrong xc_master", (void *)msg);
505 507 msg->xc_slave = c;
506 508
507 509 /*
508 510 * Increment my work count for all messages that I'll
509 511 * transition from DONE to FREE.
510 512 * Also remember how many XC_MSG_WAITINGs to look for
511 513 */
512 514 (void) xc_increment(&CPU->cpu_m);
513 515 if (command == XC_MSG_SYNC)
514 516 ++CPU->cpu_m.xc_wait_cnt;
515 517
516 518 /*
517 519 * Increment the target CPU work count then insert the message
518 520 * in the target msgbox. If I post the first bit of work
519 521 * for the target to do, send an IPI to the target CPU.
520 522 */
521 523 cnt = xc_increment(&cpup->cpu_m);
522 524 xc_insert(&cpup->cpu_m.xc_msgbox, msg);
523 525 if (cpup != CPU) {
524 526 if (cnt == 0) {
525 527 CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
526 528 send_dirint(c, XC_HI_PIL);
527 529 if (xc_collect_enable)
528 530 ++xc_total_cnt;
529 531 } else if (xc_collect_enable) {
530 532 ++xc_multi_cnt;
531 533 }
532 534 }
533 535 }
534 536
535 537 /*
536 538 * Now drop into the message handler until all work is done
537 539 */
538 540 (void) xc_serv(NULL, NULL);
539 541 splx(save_spl);
540 542 }
541 543
542 544 /*
543 545 * Push out a priority cross call.
544 546 */
545 547 static void
546 548 xc_priority_common(
547 549 xc_func_t func,
548 550 xc_arg_t arg1,
549 551 xc_arg_t arg2,
550 552 xc_arg_t arg3,
551 553 ulong_t *set)
552 554 {
553 555 int i;
554 556 int c;
555 557 struct cpu *cpup;
556 558
557 559 /*
558 560 * Wait briefly for any previous xc_priority to have finished.
559 561 */
560 562 for (c = 0; c < max_ncpus; ++c) {
561 563 cpup = cpu[c];
562 564 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
563 565 continue;
564 566
565 567 /*
566 568 * The value of 40000 here is from old kernel code. It
567 569 * really should be changed to some time based value, since
568 570 * under a hypervisor, there's no guarantee a remote CPU
569 571 * is even scheduled.
570 572 */
571 573 for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
572 574 SMT_PAUSE();
573 575
574 576 /*
575 577 * Some CPU did not respond to a previous priority request. It's
576 578 * probably deadlocked with interrupts blocked or some such
577 579 * problem. We'll just erase the previous request - which was
578 580 * most likely a kmdb_enter that has already expired - and plow
579 581 * ahead.
580 582 */
581 583 if (BT_TEST(xc_priority_set, c)) {
582 584 XC_BT_CLEAR(xc_priority_set, c);
583 585 if (cpup->cpu_m.xc_work_cnt > 0)
584 586 xc_decrement(&cpup->cpu_m);
585 587 }
586 588 }
587 589
588 590 /*
589 591 * fill in cross call data
590 592 */
591 593 xc_priority_data.xc_func = func;
592 594 xc_priority_data.xc_a1 = arg1;
593 595 xc_priority_data.xc_a2 = arg2;
594 596 xc_priority_data.xc_a3 = arg3;
595 597
596 598 /*
597 599 * Post messages to all CPUs involved that are CPU_READY
598 600 * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
599 601 */
600 602 for (c = 0; c < max_ncpus; ++c) {
↓ open down ↓ |
402 lines elided |
↑ open up ↑ |
601 603 if (!BT_TEST(set, c))
602 604 continue;
603 605 cpup = cpu[c];
604 606 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
605 607 cpup == CPU)
606 608 continue;
607 609 (void) xc_increment(&cpup->cpu_m);
608 610 XC_BT_SET(xc_priority_set, c);
609 611 send_dirint(c, XC_HI_PIL);
610 612 for (i = 0; i < 10; ++i) {
611 - (void) casptr(&cpup->cpu_m.xc_msgbox,
613 + (void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
612 614 cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
613 615 }
614 616 }
615 617 }
616 618
617 619 /*
618 620 * Do cross call to all other CPUs with absolutely no waiting or handshaking.
619 621 * This should only be used for extraordinary operations, like panic(), which
620 622 * need to work, in some fashion, in a not completely functional system.
621 623 * All other uses that want minimal waiting should use xc_call_nowait().
622 624 */
623 625 void
624 626 xc_priority(
625 627 xc_arg_t arg1,
626 628 xc_arg_t arg2,
627 629 xc_arg_t arg3,
628 630 ulong_t *set,
629 631 xc_func_t func)
630 632 {
631 633 extern int IGNORE_KERNEL_PREEMPTION;
632 634 int save_spl = splr(ipltospl(XC_HI_PIL));
633 635 int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
634 636
635 637 IGNORE_KERNEL_PREEMPTION = 1;
636 638 xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
637 639 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
638 640 splx(save_spl);
639 641 }
640 642
641 643 /*
642 644 * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
643 645 */
644 646 void
645 647 kdi_xc_others(int this_cpu, void (*func)(void))
646 648 {
647 649 extern int IGNORE_KERNEL_PREEMPTION;
648 650 int save_kernel_preemption;
649 651 cpuset_t set;
650 652
651 653 if (!xc_initialized)
652 654 return;
653 655
654 656 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
655 657 IGNORE_KERNEL_PREEMPTION = 1;
656 658 CPUSET_ALL_BUT(set, this_cpu);
657 659 xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
658 660 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
659 661 }
660 662
661 663
662 664
663 665 /*
664 666 * Invoke function on specified processors. Remotes may continue after
665 667 * service with no waiting. xc_call_nowait() may return immediately too.
666 668 */
667 669 void
668 670 xc_call_nowait(
669 671 xc_arg_t arg1,
670 672 xc_arg_t arg2,
671 673 xc_arg_t arg3,
672 674 ulong_t *set,
673 675 xc_func_t func)
674 676 {
675 677 xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
676 678 }
677 679
678 680 /*
679 681 * Invoke function on specified processors. Remotes may continue after
680 682 * service with no waiting. xc_call() returns only after remotes have finished.
681 683 */
682 684 void
683 685 xc_call(
684 686 xc_arg_t arg1,
685 687 xc_arg_t arg2,
686 688 xc_arg_t arg3,
687 689 ulong_t *set,
688 690 xc_func_t func)
689 691 {
690 692 xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
691 693 }
692 694
693 695 /*
694 696 * Invoke function on specified processors. Remotes wait until all have
695 697 * finished. xc_sync() also waits until all remotes have finished.
696 698 */
697 699 void
698 700 xc_sync(
699 701 xc_arg_t arg1,
700 702 xc_arg_t arg2,
701 703 xc_arg_t arg3,
702 704 ulong_t *set,
703 705 xc_func_t func)
704 706 {
705 707 xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
706 708 }
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX