Print this page
5042 stop using deprecated atomic functions
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/mutex.c
+++ new/usr/src/uts/common/os/mutex.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Big Theory Statement for mutual exclusion locking primitives.
28 28 *
29 29 * A mutex serializes multiple threads so that only one thread
30 30 * (the "owner" of the mutex) is active at a time. See mutex(9F)
31 31 * for a full description of the interfaces and programming model.
32 32 * The rest of this comment describes the implementation.
33 33 *
34 34 * Mutexes come in two flavors: adaptive and spin. mutex_init(9F)
35 35 * determines the type based solely on the iblock cookie (PIL) argument.
36 36 * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
37 37 *
38 38 * Spin mutexes block interrupts and spin until the lock becomes available.
39 39 * A thread may not sleep, or call any function that might sleep, while
40 40 * holding a spin mutex. With few exceptions, spin mutexes should only
41 41 * be used to synchronize with interrupt handlers.
42 42 *
43 43 * Adaptive mutexes (the default type) spin if the owner is running on
44 44 * another CPU and block otherwise. This policy is based on the assumption
45 45 * that mutex hold times are typically short enough that the time spent
46 46 * spinning is less than the time it takes to block. If you need mutual
47 47 * exclusion semantics with long hold times, consider an rwlock(9F) as
48 48 * RW_WRITER. Better still, reconsider the algorithm: if it requires
49 49 * mutual exclusion for long periods of time, it's probably not scalable.
50 50 *
51 51 * Adaptive mutexes are overwhelmingly more common than spin mutexes,
52 52 * so mutex_enter() assumes that the lock is adaptive. We get away
53 53 * with this by structuring mutexes so that an attempt to acquire a
54 54 * spin mutex as adaptive always fails. When mutex_enter() fails
55 55 * it punts to mutex_vector_enter(), which does all the hard stuff.
56 56 *
57 57 * mutex_vector_enter() first checks the type. If it's spin mutex,
58 58 * we just call lock_set_spl() and return. If it's an adaptive mutex,
59 59 * we check to see what the owner is doing. If the owner is running,
60 60 * we spin until the lock becomes available; if not, we mark the lock
61 61 * as having waiters and block.
62 62 *
63 63 * Blocking on a mutex is surprisingly delicate dance because, for speed,
64 64 * mutex_exit() doesn't use an atomic instruction. Thus we have to work
65 65 * a little harder in the (rarely-executed) blocking path to make sure
66 66 * we don't block on a mutex that's just been released -- otherwise we
67 67 * might never be woken up.
68 68 *
69 69 * The logic for synchronizing mutex_vector_enter() with mutex_exit()
70 70 * in the face of preemption and relaxed memory ordering is as follows:
71 71 *
72 72 * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
73 73 * to restart. Each platform must enforce this by checking the
74 74 * interrupted PC in the interrupt handler (or on return from trap --
75 75 * whichever is more convenient for the platform). If the PC
76 76 * lies within the critical region of mutex_exit(), the interrupt
77 77 * handler must reset the PC back to the beginning of mutex_exit().
78 78 * The critical region consists of all instructions up to, but not
79 79 * including, the store that clears the lock (which, of course,
80 80 * must never be executed twice.)
81 81 *
82 82 * This ensures that the owner will always check for waiters after
83 83 * resuming from a previous preemption.
84 84 *
85 85 * (2) A thread resuming in mutex_exit() does (at least) the following:
86 86 *
87 87 * when resuming: set CPU_THREAD = owner
88 88 * membar #StoreLoad
89 89 *
90 90 * in mutex_exit: check waiters bit; do wakeup if set
91 91 * membar #LoadStore|#StoreStore
92 92 * clear owner
93 93 * (at this point, other threads may or may not grab
94 94 * the lock, and we may or may not reacquire it)
95 95 *
96 96 * when blocking: membar #StoreStore (due to disp_lock_enter())
97 97 * set CPU_THREAD = (possibly) someone else
98 98 *
99 99 * (3) A thread blocking in mutex_vector_enter() does the following:
100 100 *
101 101 * set waiters bit
102 102 * membar #StoreLoad (via membar_enter())
103 103 * check CPU_THREAD for owner's t_cpu
104 104 * continue if owner running
105 105 * membar #LoadLoad (via membar_consumer())
106 106 * check owner and waiters bit; abort if either changed
107 107 * block
108 108 *
109 109 * Thus the global memory orderings for (2) and (3) are as follows:
110 110 *
111 111 * (2M) mutex_exit() memory order:
112 112 *
113 113 * STORE CPU_THREAD = owner
114 114 * LOAD waiters bit
115 115 * STORE owner = NULL
116 116 * STORE CPU_THREAD = (possibly) someone else
117 117 *
118 118 * (3M) mutex_vector_enter() memory order:
119 119 *
120 120 * STORE waiters bit = 1
121 121 * LOAD CPU_THREAD for each CPU
122 122 * LOAD owner and waiters bit
123 123 *
124 124 * It has been verified by exhaustive simulation that all possible global
125 125 * memory orderings of (2M) interleaved with (3M) result in correct
126 126 * behavior. Moreover, these ordering constraints are minimal: changing
127 127 * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
128 128 * windows for missed wakeups. Note: the possibility that other threads
129 129 * may grab the lock after the owner drops it can be factored out of the
130 130 * memory ordering analysis because mutex_vector_enter() won't block
131 131 * if the lock isn't still owned by the same thread.
132 132 *
133 133 * The only requirements of code outside the mutex implementation are
134 134 * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
135 135 * (2) a membar #StoreLoad after setting CPU_THREAD in resume(),
136 136 * (3) mutex_owner_running() preemption fixup in interrupt handlers
137 137 * or trap returns.
138 138 * Note: idle threads cannot grab adaptive locks (since they cannot block),
139 139 * so the membar may be safely omitted when resuming an idle thread.
140 140 *
141 141 * When a mutex has waiters, mutex_vector_exit() has several options:
142 142 *
143 143 * (1) Choose a waiter and make that thread the owner before waking it;
144 144 * this is known as "direct handoff" of ownership.
145 145 *
146 146 * (2) Drop the lock and wake one waiter.
147 147 *
148 148 * (3) Drop the lock, clear the waiters bit, and wake all waiters.
149 149 *
150 150 * In many ways (1) is the cleanest solution, but if a lock is moderately
151 151 * contended it defeats the adaptive spin logic. If we make some other
152 152 * thread the owner, but he's not ONPROC yet, then all other threads on
153 153 * other cpus that try to get the lock will conclude that the owner is
154 154 * blocked, so they'll block too. And so on -- it escalates quickly,
155 155 * with every thread taking the blocking path rather than the spin path.
156 156 * Thus, direct handoff is *not* a good idea for adaptive mutexes.
157 157 *
158 158 * Option (2) is the next most natural-seeming option, but it has several
159 159 * annoying properties. If there's more than one waiter, we must preserve
160 160 * the waiters bit on an unheld lock. On cas-capable platforms, where
161 161 * the waiters bit is part of the lock word, this means that both 0x0
162 162 * and 0x1 represent unheld locks, so we have to cas against *both*.
163 163 * Priority inheritance also gets more complicated, because a lock can
164 164 * have waiters but no owner to whom priority can be willed. So while
165 165 * it is possible to make option (2) work, it's surprisingly vile.
166 166 *
167 167 * Option (3), the least-intuitive at first glance, is what we actually do.
168 168 * It has the advantage that because you always wake all waiters, you
169 169 * never have to preserve the waiters bit. Waking all waiters seems like
170 170 * begging for a thundering herd problem, but consider: under option (2),
171 171 * every thread that grabs and drops the lock will wake one waiter -- so
172 172 * if the lock is fairly active, all waiters will be awakened very quickly
173 173 * anyway. Moreover, this is how adaptive locks are *supposed* to work.
174 174 * The blocking case is rare; the more common case (by 3-4 orders of
175 175 * magnitude) is that one or more threads spin waiting to get the lock.
176 176 * Only direct handoff can prevent the thundering herd problem, but as
177 177 * mentioned earlier, that would tend to defeat the adaptive spin logic.
178 178 * In practice, option (3) works well because the blocking case is rare.
179 179 */
180 180
181 181 /*
182 182 * delayed lock retry with exponential delay for spin locks
183 183 *
184 184 * It is noted above that for both the spin locks and the adaptive locks,
185 185 * spinning is the dominate mode of operation. So long as there is only
186 186 * one thread waiting on a lock, the naive spin loop works very well in
187 187 * cache based architectures. The lock data structure is pulled into the
188 188 * cache of the processor with the waiting/spinning thread and no further
189 189 * memory traffic is generated until the lock is released. Unfortunately,
190 190 * once two or more threads are waiting on a lock, the naive spin has
191 191 * the property of generating maximum memory traffic from each spinning
192 192 * thread as the spinning threads contend for the lock data structure.
193 193 *
194 194 * By executing a delay loop before retrying a lock, a waiting thread
195 195 * can reduce its memory traffic by a large factor, depending on the
196 196 * size of the delay loop. A large delay loop greatly reduced the memory
197 197 * traffic, but has the drawback of having a period of time when
198 198 * no thread is attempting to gain the lock even though several threads
199 199 * might be waiting. A small delay loop has the drawback of not
200 200 * much reduction in memory traffic, but reduces the potential idle time.
201 201 * The theory of the exponential delay code is to start with a short
202 202 * delay loop and double the waiting time on each iteration, up to
203 203 * a preselected maximum.
204 204 */
205 205
206 206 #include <sys/param.h>
207 207 #include <sys/time.h>
208 208 #include <sys/cpuvar.h>
209 209 #include <sys/thread.h>
210 210 #include <sys/debug.h>
211 211 #include <sys/cmn_err.h>
212 212 #include <sys/sobject.h>
213 213 #include <sys/turnstile.h>
214 214 #include <sys/systm.h>
215 215 #include <sys/mutex_impl.h>
216 216 #include <sys/spl.h>
217 217 #include <sys/lockstat.h>
218 218 #include <sys/atomic.h>
219 219 #include <sys/cpu.h>
220 220 #include <sys/stack.h>
221 221 #include <sys/archsystm.h>
222 222 #include <sys/machsystm.h>
223 223 #include <sys/x_call.h>
224 224
225 225 /*
226 226 * The sobj_ops vector exports a set of functions needed when a thread
227 227 * is asleep on a synchronization object of this type.
228 228 */
229 229 static sobj_ops_t mutex_sobj_ops = {
230 230 SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
231 231 };
232 232
233 233 /*
234 234 * If the system panics on a mutex, save the address of the offending
235 235 * mutex in panic_mutex_addr, and save the contents in panic_mutex.
↓ open down ↓ |
235 lines elided |
↑ open up ↑ |
236 236 */
237 237 static mutex_impl_t panic_mutex;
238 238 static mutex_impl_t *panic_mutex_addr;
239 239
240 240 static void
241 241 mutex_panic(char *msg, mutex_impl_t *lp)
242 242 {
243 243 if (panicstr)
244 244 return;
245 245
246 - if (casptr(&panic_mutex_addr, NULL, lp) == NULL)
246 + if (atomic_cas_ptr(&panic_mutex_addr, NULL, lp) == NULL)
247 247 panic_mutex = *lp;
248 248
249 249 panic("%s, lp=%p owner=%p thread=%p",
250 250 msg, (void *)lp, (void *)MUTEX_OWNER(&panic_mutex),
251 251 (void *)curthread);
252 252 }
253 253
254 254 /* "tunables" for per-platform backoff constants. */
255 255 uint_t mutex_backoff_cap = 0;
256 256 ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE;
257 257 ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR;
258 258 uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT;
259 259
260 260 void
261 261 mutex_sync(void)
262 262 {
263 263 MUTEX_SYNC();
264 264 }
265 265
266 266 /* calculate the backoff interval */
267 267 uint_t
268 268 default_lock_backoff(uint_t backoff)
269 269 {
270 270 uint_t cap; /* backoff cap calculated */
271 271
272 272 if (backoff == 0) {
273 273 backoff = mutex_backoff_base;
274 274 /* first call just sets the base */
275 275 return (backoff);
276 276 }
277 277
278 278 /* set cap */
279 279 if (mutex_backoff_cap == 0) {
280 280 /*
281 281 * For a contended lock, in the worst case a load + cas may
282 282 * be queued at the controller for each contending CPU.
283 283 * Therefore, to avoid queueing, the accesses for all CPUS must
284 284 * be spread out in time over an interval of (ncpu *
285 285 * cap-factor). Maximum backoff is set to this value, and
286 286 * actual backoff is a random number from 0 to the current max.
287 287 */
288 288 cap = ncpus_online * mutex_cap_factor;
289 289 } else {
290 290 cap = mutex_backoff_cap;
291 291 }
292 292
293 293 /* calculate new backoff value */
294 294 backoff <<= mutex_backoff_shift; /* increase backoff */
295 295 if (backoff > cap) {
296 296 if (cap < mutex_backoff_base)
297 297 backoff = mutex_backoff_base;
298 298 else
299 299 backoff = cap;
300 300 }
301 301
302 302 return (backoff);
303 303 }
304 304
305 305 /*
306 306 * default delay function for mutexes.
307 307 */
308 308 void
309 309 default_lock_delay(uint_t backoff)
310 310 {
311 311 ulong_t rnd; /* random factor */
312 312 uint_t cur_backoff; /* calculated backoff */
313 313 uint_t backctr;
314 314
315 315 /*
316 316 * Modify backoff by a random amount to avoid lockstep, and to
317 317 * make it probable that some thread gets a small backoff, and
318 318 * re-checks quickly
319 319 */
320 320 rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK());
321 321 cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) +
322 322 mutex_backoff_base;
323 323
324 324 /*
325 325 * Delay before trying
326 326 * to touch the mutex data structure.
327 327 */
328 328 for (backctr = cur_backoff; backctr; backctr--) {
329 329 MUTEX_DELAY();
330 330 };
331 331 }
332 332
333 333 uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff;
334 334 void (*mutex_lock_delay)(uint_t) = default_lock_delay;
335 335 void (*mutex_delay)(void) = mutex_delay_default;
336 336
337 337 /*
338 338 * mutex_vector_enter() is called from the assembly mutex_enter() routine
339 339 * if the lock is held or is not of type MUTEX_ADAPTIVE.
340 340 */
341 341 void
342 342 mutex_vector_enter(mutex_impl_t *lp)
343 343 {
344 344 kthread_id_t owner;
345 345 kthread_id_t lastowner = MUTEX_NO_OWNER; /* track owner changes */
346 346 hrtime_t sleep_time = 0; /* how long we slept */
347 347 hrtime_t spin_time = 0; /* how long we spun */
348 348 cpu_t *cpup;
349 349 turnstile_t *ts;
350 350 volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
351 351 uint_t backoff = 0; /* current backoff */
352 352 int changecnt = 0; /* count of owner changes */
353 353
354 354 ASSERT_STACK_ALIGNED();
355 355
356 356 if (MUTEX_TYPE_SPIN(lp)) {
357 357 lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
358 358 &lp->m_spin.m_oldspl);
359 359 return;
360 360 }
361 361
362 362 if (!MUTEX_TYPE_ADAPTIVE(lp)) {
363 363 mutex_panic("mutex_enter: bad mutex", lp);
364 364 return;
365 365 }
366 366
367 367 /*
368 368 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
369 369 * We can migrate after loading CPU but before checking CPU_ON_INTR,
370 370 * so we must verify by disabling preemption and loading CPU again.
371 371 */
372 372 cpup = CPU;
373 373 if (CPU_ON_INTR(cpup) && !panicstr) {
374 374 kpreempt_disable();
375 375 if (CPU_ON_INTR(CPU))
376 376 mutex_panic("mutex_enter: adaptive at high PIL", lp);
377 377 kpreempt_enable();
378 378 }
379 379
380 380 CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
381 381
382 382 spin_time = LOCKSTAT_START_TIME(LS_MUTEX_ENTER_SPIN);
383 383
384 384 backoff = mutex_lock_backoff(0); /* set base backoff */
385 385 for (;;) {
386 386 mutex_lock_delay(backoff); /* backoff delay */
387 387
388 388 if (panicstr)
389 389 return;
390 390
391 391 if ((owner = MUTEX_OWNER(vlp)) == NULL) {
392 392 if (mutex_adaptive_tryenter(lp)) {
393 393 break;
394 394 }
395 395 /* increase backoff only on failed attempt. */
396 396 backoff = mutex_lock_backoff(backoff);
397 397 changecnt++;
398 398 continue;
399 399 } else if (lastowner != owner) {
400 400 lastowner = owner;
401 401 backoff = mutex_lock_backoff(backoff);
402 402 changecnt++;
403 403 }
404 404
405 405 if (changecnt >= ncpus_online) {
406 406 backoff = mutex_lock_backoff(0);
407 407 changecnt = 0;
408 408 }
409 409
410 410 if (owner == curthread)
411 411 mutex_panic("recursive mutex_enter", lp);
412 412
413 413 /*
414 414 * If lock is held but owner is not yet set, spin.
415 415 * (Only relevant for platforms that don't have cas.)
416 416 */
417 417 if (owner == MUTEX_NO_OWNER)
418 418 continue;
419 419
420 420 if (mutex_owner_running(lp) != NULL) {
421 421 continue;
422 422 }
423 423
424 424 /*
425 425 * The owner appears not to be running, so block.
426 426 * See the Big Theory Statement for memory ordering issues.
427 427 */
428 428 ts = turnstile_lookup(lp);
429 429 MUTEX_SET_WAITERS(lp);
430 430 membar_enter();
431 431
432 432 /*
433 433 * Recheck whether owner is running after waiters bit hits
434 434 * global visibility (above). If owner is running, spin.
435 435 */
436 436 if (mutex_owner_running(lp) != NULL) {
437 437 turnstile_exit(lp);
438 438 continue;
439 439 }
440 440 membar_consumer();
441 441
442 442 /*
443 443 * If owner and waiters bit are unchanged, block.
444 444 */
445 445 if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
446 446 sleep_time -= gethrtime();
447 447 (void) turnstile_block(ts, TS_WRITER_Q, lp,
448 448 &mutex_sobj_ops, NULL, NULL);
449 449 sleep_time += gethrtime();
450 450 /* reset backoff after turnstile */
451 451 backoff = mutex_lock_backoff(0);
452 452 } else {
453 453 turnstile_exit(lp);
454 454 }
455 455 }
456 456
457 457 ASSERT(MUTEX_OWNER(lp) == curthread);
458 458
459 459 if (sleep_time != 0) {
460 460 /*
461 461 * Note, sleep time is the sum of all the sleeping we
462 462 * did.
463 463 */
464 464 LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
465 465 }
466 466
467 467 /* record spin time, don't count sleep time */
468 468 if (spin_time != 0) {
469 469 LOCKSTAT_RECORD_TIME(LS_MUTEX_ENTER_SPIN, lp,
470 470 spin_time + sleep_time);
471 471 }
472 472
473 473 LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
474 474 }
475 475
476 476 /*
477 477 * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
478 478 * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
479 479 */
480 480 int
481 481 mutex_vector_tryenter(mutex_impl_t *lp)
482 482 {
483 483 int s;
484 484
485 485 if (MUTEX_TYPE_ADAPTIVE(lp))
486 486 return (0); /* we already tried in assembly */
487 487
488 488 if (!MUTEX_TYPE_SPIN(lp)) {
489 489 mutex_panic("mutex_tryenter: bad mutex", lp);
490 490 return (0);
491 491 }
492 492
493 493 s = splr(lp->m_spin.m_minspl);
494 494 if (lock_try(&lp->m_spin.m_spinlock)) {
495 495 lp->m_spin.m_oldspl = (ushort_t)s;
496 496 return (1);
497 497 }
498 498 splx(s);
499 499 return (0);
500 500 }
501 501
502 502 /*
503 503 * mutex_vector_exit() is called from mutex_exit() if the lock is not
504 504 * adaptive, has waiters, or is not owned by the current thread (panic).
505 505 */
506 506 void
507 507 mutex_vector_exit(mutex_impl_t *lp)
508 508 {
509 509 turnstile_t *ts;
510 510
511 511 if (MUTEX_TYPE_SPIN(lp)) {
512 512 lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
513 513 return;
514 514 }
515 515
516 516 if (MUTEX_OWNER(lp) != curthread) {
517 517 mutex_panic("mutex_exit: not owner", lp);
518 518 return;
519 519 }
520 520
521 521 ts = turnstile_lookup(lp);
522 522 MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
523 523 if (ts == NULL)
524 524 turnstile_exit(lp);
525 525 else
526 526 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
527 527 LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
528 528 }
529 529
530 530 int
531 531 mutex_owned(const kmutex_t *mp)
532 532 {
533 533 const mutex_impl_t *lp = (const mutex_impl_t *)mp;
534 534
535 535 if (panicstr || quiesce_active)
536 536 return (1);
537 537
538 538 if (MUTEX_TYPE_ADAPTIVE(lp))
539 539 return (MUTEX_OWNER(lp) == curthread);
540 540 return (LOCK_HELD(&lp->m_spin.m_spinlock));
541 541 }
542 542
543 543 kthread_t *
544 544 mutex_owner(const kmutex_t *mp)
545 545 {
546 546 const mutex_impl_t *lp = (const mutex_impl_t *)mp;
547 547 kthread_id_t t;
548 548
549 549 if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
550 550 return (t);
551 551 return (NULL);
552 552 }
553 553
554 554 /*
555 555 * The iblock cookie 'ibc' is the spl level associated with the lock;
556 556 * this alone determines whether the lock will be ADAPTIVE or SPIN.
557 557 *
558 558 * Adaptive mutexes created in zeroed memory do not need to call
559 559 * mutex_init() as their allocation in this fashion guarantees
560 560 * their initialization.
561 561 * eg adaptive mutexes created as static within the BSS or allocated
562 562 * by kmem_zalloc().
563 563 */
564 564 /* ARGSUSED */
565 565 void
566 566 mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
567 567 {
568 568 mutex_impl_t *lp = (mutex_impl_t *)mp;
569 569
570 570 ASSERT(ibc < (void *)KERNELBASE); /* see 1215173 */
571 571
572 572 if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
573 573 ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
574 574 MUTEX_SET_TYPE(lp, MUTEX_SPIN);
575 575 LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
576 576 LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
577 577 lp->m_spin.m_minspl = (int)(intptr_t)ibc;
578 578 } else {
579 579 #ifdef MUTEX_ALIGN
580 580 static int misalign_cnt = 0;
581 581
582 582 if (((uintptr_t)lp & (uintptr_t)(MUTEX_ALIGN - 1)) &&
583 583 (misalign_cnt < MUTEX_ALIGN_WARNINGS)) {
584 584 /*
585 585 * The mutex is not aligned and may cross a cache line.
586 586 * This is not supported and may cause a panic.
587 587 * Show a warning that the mutex is not aligned
588 588 * and attempt to identify the origin.
589 589 * Unaligned mutexes are not (supposed to be)
590 590 * possible on SPARC.
591 591 */
592 592 char *funcname;
593 593 ulong_t offset = 0;
594 594
595 595 funcname = modgetsymname((uintptr_t)caller(), &offset);
596 596 cmn_err(CE_WARN, "mutex_init: %p is not %d byte "
597 597 "aligned; caller %s+%lx in module %s. "
598 598 "This is unsupported and may cause a panic. "
599 599 "Please report this to the kernel module supplier.",
600 600 (void *)lp, MUTEX_ALIGN,
601 601 funcname ? funcname : "unknown", offset,
602 602 mod_containing_pc(caller()));
603 603 misalign_cnt++;
604 604 if (misalign_cnt >= MUTEX_ALIGN_WARNINGS) {
605 605 cmn_err(CE_WARN, "mutex_init: further unaligned"
606 606 " mutex warnings will be suppressed.");
607 607 }
608 608 }
609 609 #endif /* MUTEX_ALIGN */
610 610 ASSERT(type != MUTEX_SPIN);
611 611
612 612 MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
613 613 MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
614 614 }
615 615 }
616 616
617 617 void
618 618 mutex_destroy(kmutex_t *mp)
619 619 {
620 620 mutex_impl_t *lp = (mutex_impl_t *)mp;
621 621
622 622 if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
623 623 MUTEX_DESTROY(lp);
624 624 } else if (MUTEX_TYPE_SPIN(lp)) {
625 625 LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
626 626 MUTEX_DESTROY(lp);
627 627 } else if (MUTEX_TYPE_ADAPTIVE(lp)) {
628 628 LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
629 629 if (MUTEX_OWNER(lp) != curthread)
630 630 mutex_panic("mutex_destroy: not owner", lp);
631 631 if (MUTEX_HAS_WAITERS(lp)) {
632 632 turnstile_t *ts = turnstile_lookup(lp);
633 633 turnstile_exit(lp);
634 634 if (ts != NULL)
635 635 mutex_panic("mutex_destroy: has waiters", lp);
636 636 }
637 637 MUTEX_DESTROY(lp);
638 638 } else {
639 639 mutex_panic("mutex_destroy: bad mutex", lp);
640 640 }
641 641 }
642 642
643 643 /*
644 644 * Simple C support for the cases where spin locks miss on the first try.
645 645 */
646 646 void
647 647 lock_set_spin(lock_t *lp)
648 648 {
649 649 int loop_count = 0;
650 650 uint_t backoff = 0; /* current backoff */
651 651 hrtime_t spin_time = 0; /* how long we spun */
652 652
653 653 if (panicstr)
654 654 return;
655 655
656 656 if (ncpus == 1)
657 657 panic("lock_set: %p lock held and only one CPU", (void *)lp);
658 658
659 659 spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPIN);
660 660
661 661 while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
662 662 if (panicstr)
663 663 return;
664 664 loop_count++;
665 665
666 666 if (ncpus_online == loop_count) {
667 667 backoff = mutex_lock_backoff(0);
668 668 loop_count = 0;
669 669 } else {
670 670 backoff = mutex_lock_backoff(backoff);
671 671 }
672 672 mutex_lock_delay(backoff);
673 673 }
674 674
675 675 LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPIN, lp, spin_time);
676 676
677 677 LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
678 678 }
679 679
680 680 void
681 681 lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
682 682 {
683 683 int loop_count = 0;
684 684 uint_t backoff = 0; /* current backoff */
685 685 hrtime_t spin_time = 0; /* how long we spun */
686 686
687 687 if (panicstr)
688 688 return;
689 689
690 690 if (ncpus == 1)
691 691 panic("lock_set_spl: %p lock held and only one CPU",
692 692 (void *)lp);
693 693
694 694 ASSERT(new_pil > LOCK_LEVEL);
695 695
696 696 spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPL_SPIN);
697 697
698 698 do {
699 699 splx(old_pil);
700 700 while (LOCK_HELD(lp)) {
701 701 loop_count++;
702 702
703 703 if (panicstr) {
704 704 *old_pil_addr = (ushort_t)splr(new_pil);
705 705 return;
706 706 }
707 707 if (ncpus_online == loop_count) {
708 708 backoff = mutex_lock_backoff(0);
709 709 loop_count = 0;
710 710 } else {
711 711 backoff = mutex_lock_backoff(backoff);
712 712 }
713 713 mutex_lock_delay(backoff);
714 714 }
715 715 old_pil = splr(new_pil);
716 716 } while (!lock_spin_try(lp));
717 717
718 718 *old_pil_addr = (ushort_t)old_pil;
719 719
720 720 LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPL_SPIN, lp, spin_time);
721 721
722 722 LOCKSTAT_RECORD0(LS_LOCK_SET_SPL_ACQUIRE, lp);
723 723 }
↓ open down ↓ |
467 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX