5042-stop-using-deprecated-atomic-functions Wdiff usr/src/uts/common/os/mutex.c

Print this page

5042 stop using deprecated atomic functions

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/mutex.c
          +++ new/usr/src/uts/common/os/mutex.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Big Theory Statement for mutual exclusion locking primitives.
  28   28   *
  29   29   * A mutex serializes multiple threads so that only one thread
  30   30   * (the "owner" of the mutex) is active at a time.  See mutex(9F)
  31   31   * for a full description of the interfaces and programming model.
  32   32   * The rest of this comment describes the implementation.
  33   33   *
  34   34   * Mutexes come in two flavors: adaptive and spin.  mutex_init(9F)
  35   35   * determines the type based solely on the iblock cookie (PIL) argument.
  36   36   * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
  37   37   *
  38   38   * Spin mutexes block interrupts and spin until the lock becomes available.
  39   39   * A thread may not sleep, or call any function that might sleep, while
  40   40   * holding a spin mutex.  With few exceptions, spin mutexes should only
  41   41   * be used to synchronize with interrupt handlers.
  42   42   *
  43   43   * Adaptive mutexes (the default type) spin if the owner is running on
  44   44   * another CPU and block otherwise.  This policy is based on the assumption
  45   45   * that mutex hold times are typically short enough that the time spent
  46   46   * spinning is less than the time it takes to block.  If you need mutual
  47   47   * exclusion semantics with long hold times, consider an rwlock(9F) as
  48   48   * RW_WRITER.  Better still, reconsider the algorithm: if it requires
  49   49   * mutual exclusion for long periods of time, it's probably not scalable.
  50   50   *
  51   51   * Adaptive mutexes are overwhelmingly more common than spin mutexes,
  52   52   * so mutex_enter() assumes that the lock is adaptive.  We get away
  53   53   * with this by structuring mutexes so that an attempt to acquire a
  54   54   * spin mutex as adaptive always fails.  When mutex_enter() fails
  55   55   * it punts to mutex_vector_enter(), which does all the hard stuff.
  56   56   *
  57   57   * mutex_vector_enter() first checks the type.  If it's spin mutex,
  58   58   * we just call lock_set_spl() and return.  If it's an adaptive mutex,
  59   59   * we check to see what the owner is doing.  If the owner is running,
  60   60   * we spin until the lock becomes available; if not, we mark the lock
  61   61   * as having waiters and block.
  62   62   *
  63   63   * Blocking on a mutex is surprisingly delicate dance because, for speed,
  64   64   * mutex_exit() doesn't use an atomic instruction.  Thus we have to work
  65   65   * a little harder in the (rarely-executed) blocking path to make sure
  66   66   * we don't block on a mutex that's just been released -- otherwise we
  67   67   * might never be woken up.
  68   68   *
  69   69   * The logic for synchronizing mutex_vector_enter() with mutex_exit()
  70   70   * in the face of preemption and relaxed memory ordering is as follows:
  71   71   *
  72   72   * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
  73   73   *     to restart.  Each platform must enforce this by checking the
  74   74   *     interrupted PC in the interrupt handler (or on return from trap --
  75   75   *     whichever is more convenient for the platform).  If the PC
  76   76   *     lies within the critical region of mutex_exit(), the interrupt
  77   77   *     handler must reset the PC back to the beginning of mutex_exit().
  78   78   *     The critical region consists of all instructions up to, but not
  79   79   *     including, the store that clears the lock (which, of course,
  80   80   *     must never be executed twice.)
  81   81   *
  82   82   *     This ensures that the owner will always check for waiters after
  83   83   *     resuming from a previous preemption.
  84   84   *
  85   85   * (2) A thread resuming in mutex_exit() does (at least) the following:
  86   86   *
  87   87   *      when resuming:  set CPU_THREAD = owner
  88   88   *                      membar #StoreLoad
  89   89   *
  90   90   *      in mutex_exit:  check waiters bit; do wakeup if set
  91   91   *                      membar #LoadStore|#StoreStore
  92   92   *                      clear owner
  93   93   *                      (at this point, other threads may or may not grab
  94   94   *                      the lock, and we may or may not reacquire it)
  95   95   *
  96   96   *      when blocking:  membar #StoreStore (due to disp_lock_enter())
  97   97   *                      set CPU_THREAD = (possibly) someone else
  98   98   *
  99   99   * (3) A thread blocking in mutex_vector_enter() does the following:
 100  100   *
 101  101   *                      set waiters bit
 102  102   *                      membar #StoreLoad (via membar_enter())
 103  103   *                      check CPU_THREAD for owner's t_cpu
 104  104   *                              continue if owner running
 105  105   *                      membar #LoadLoad (via membar_consumer())
 106  106   *                      check owner and waiters bit; abort if either changed
 107  107   *                      block
 108  108   *
 109  109   * Thus the global memory orderings for (2) and (3) are as follows:
 110  110   *
 111  111   * (2M) mutex_exit() memory order:
 112  112   *
 113  113   *                      STORE   CPU_THREAD = owner
 114  114   *                      LOAD    waiters bit
 115  115   *                      STORE   owner = NULL
 116  116   *                      STORE   CPU_THREAD = (possibly) someone else
 117  117   *
 118  118   * (3M) mutex_vector_enter() memory order:
 119  119   *
 120  120   *                      STORE   waiters bit = 1
 121  121   *                      LOAD    CPU_THREAD for each CPU
 122  122   *                      LOAD    owner and waiters bit
 123  123   *
 124  124   * It has been verified by exhaustive simulation that all possible global
 125  125   * memory orderings of (2M) interleaved with (3M) result in correct
 126  126   * behavior.  Moreover, these ordering constraints are minimal: changing
 127  127   * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
 128  128   * windows for missed wakeups.  Note: the possibility that other threads
 129  129   * may grab the lock after the owner drops it can be factored out of the
 130  130   * memory ordering analysis because mutex_vector_enter() won't block
 131  131   * if the lock isn't still owned by the same thread.
 132  132   *
 133  133   * The only requirements of code outside the mutex implementation are
 134  134   * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
 135  135   * (2) a membar #StoreLoad after setting CPU_THREAD in resume(),
 136  136   * (3) mutex_owner_running() preemption fixup in interrupt handlers
 137  137   * or trap returns.
 138  138   * Note: idle threads cannot grab adaptive locks (since they cannot block),
 139  139   * so the membar may be safely omitted when resuming an idle thread.
 140  140   *
 141  141   * When a mutex has waiters, mutex_vector_exit() has several options:
 142  142   *
 143  143   * (1) Choose a waiter and make that thread the owner before waking it;
 144  144   *     this is known as "direct handoff" of ownership.
 145  145   *
 146  146   * (2) Drop the lock and wake one waiter.
 147  147   *
 148  148   * (3) Drop the lock, clear the waiters bit, and wake all waiters.
 149  149   *
 150  150   * In many ways (1) is the cleanest solution, but if a lock is moderately
 151  151   * contended it defeats the adaptive spin logic.  If we make some other
 152  152   * thread the owner, but he's not ONPROC yet, then all other threads on
 153  153   * other cpus that try to get the lock will conclude that the owner is
 154  154   * blocked, so they'll block too.  And so on -- it escalates quickly,
 155  155   * with every thread taking the blocking path rather than the spin path.
 156  156   * Thus, direct handoff is *not* a good idea for adaptive mutexes.
 157  157   *
 158  158   * Option (2) is the next most natural-seeming option, but it has several
 159  159   * annoying properties.  If there's more than one waiter, we must preserve
 160  160   * the waiters bit on an unheld lock.  On cas-capable platforms, where
 161  161   * the waiters bit is part of the lock word, this means that both 0x0
 162  162   * and 0x1 represent unheld locks, so we have to cas against *both*.
 163  163   * Priority inheritance also gets more complicated, because a lock can
 164  164   * have waiters but no owner to whom priority can be willed.  So while
 165  165   * it is possible to make option (2) work, it's surprisingly vile.
 166  166   *
 167  167   * Option (3), the least-intuitive at first glance, is what we actually do.
 168  168   * It has the advantage that because you always wake all waiters, you
 169  169   * never have to preserve the waiters bit.  Waking all waiters seems like
 170  170   * begging for a thundering herd problem, but consider: under option (2),
 171  171   * every thread that grabs and drops the lock will wake one waiter -- so
 172  172   * if the lock is fairly active, all waiters will be awakened very quickly
 173  173   * anyway.  Moreover, this is how adaptive locks are *supposed* to work.
 174  174   * The blocking case is rare; the more common case (by 3-4 orders of
 175  175   * magnitude) is that one or more threads spin waiting to get the lock.
 176  176   * Only direct handoff can prevent the thundering herd problem, but as
 177  177   * mentioned earlier, that would tend to defeat the adaptive spin logic.
 178  178   * In practice, option (3) works well because the blocking case is rare.
 179  179   */
 180  180  
 181  181  /*
 182  182   * delayed lock retry with exponential delay for spin locks
 183  183   *
 184  184   * It is noted above that for both the spin locks and the adaptive locks,
 185  185   * spinning is the dominate mode of operation.  So long as there is only
 186  186   * one thread waiting on a lock, the naive spin loop works very well in
 187  187   * cache based architectures.  The lock data structure is pulled into the
 188  188   * cache of the processor with the waiting/spinning thread and no further
 189  189   * memory traffic is generated until the lock is released.  Unfortunately,
 190  190   * once two or more threads are waiting on a lock, the naive spin has
 191  191   * the property of generating maximum memory traffic from each spinning
 192  192   * thread as the spinning threads contend for the lock data structure.
 193  193   *
 194  194   * By executing a delay loop before retrying a lock, a waiting thread
 195  195   * can reduce its memory traffic by a large factor, depending on the
 196  196   * size of the delay loop.  A large delay loop greatly reduced the memory
 197  197   * traffic, but has the drawback of having a period of time when
 198  198   * no thread is attempting to gain the lock even though several threads
 199  199   * might be waiting.  A small delay loop has the drawback of not
 200  200   * much reduction in memory traffic, but reduces the potential idle time.
 201  201   * The theory of the exponential delay code is to start with a short
 202  202   * delay loop and double the waiting time on each iteration, up to
 203  203   * a preselected maximum.
 204  204   */
 205  205  
 206  206  #include <sys/param.h>
 207  207  #include <sys/time.h>
 208  208  #include <sys/cpuvar.h>
 209  209  #include <sys/thread.h>
 210  210  #include <sys/debug.h>
 211  211  #include <sys/cmn_err.h>
 212  212  #include <sys/sobject.h>
 213  213  #include <sys/turnstile.h>
 214  214  #include <sys/systm.h>
 215  215  #include <sys/mutex_impl.h>
 216  216  #include <sys/spl.h>
 217  217  #include <sys/lockstat.h>
 218  218  #include <sys/atomic.h>
 219  219  #include <sys/cpu.h>
 220  220  #include <sys/stack.h>
 221  221  #include <sys/archsystm.h>
 222  222  #include <sys/machsystm.h>
 223  223  #include <sys/x_call.h>
 224  224  
 225  225  /*
 226  226   * The sobj_ops vector exports a set of functions needed when a thread
 227  227   * is asleep on a synchronization object of this type.
 228  228   */
 229  229  static sobj_ops_t mutex_sobj_ops = {
 230  230          SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
 231  231  };
 232  232  
 233  233  /*
 234  234   * If the system panics on a mutex, save the address of the offending
 235  235   * mutex in panic_mutex_addr, and save the contents in panic_mutex.

↓ open down ↓

235 lines elided

↑ open up ↑

 236  236   */
 237  237  static mutex_impl_t panic_mutex;
 238  238  static mutex_impl_t *panic_mutex_addr;
 239  239  
 240  240  static void
 241  241  mutex_panic(char *msg, mutex_impl_t *lp)
 242  242  {
 243  243          if (panicstr)
 244  244                  return;
 245  245  
 246      -        if (casptr(&panic_mutex_addr, NULL, lp) == NULL)
      246 +        if (atomic_cas_ptr(&panic_mutex_addr, NULL, lp) == NULL)
 247  247                  panic_mutex = *lp;
 248  248  
 249  249          panic("%s, lp=%p owner=%p thread=%p",
 250  250              msg, (void *)lp, (void *)MUTEX_OWNER(&panic_mutex),
 251  251              (void *)curthread);
 252  252  }
 253  253  
 254  254  /* "tunables" for per-platform backoff constants. */
 255  255  uint_t mutex_backoff_cap = 0;
 256  256  ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE;

 257  257  ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR;
 258  258  uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT;
 259  259  
 260  260  void
 261  261  mutex_sync(void)
 262  262  {
 263  263          MUTEX_SYNC();
 264  264  }
 265  265  
 266  266  /* calculate the backoff interval */
 267  267  uint_t
 268  268  default_lock_backoff(uint_t backoff)
 269  269  {
 270  270          uint_t cap;             /* backoff cap calculated */
 271  271  
 272  272          if (backoff == 0) {
 273  273                  backoff = mutex_backoff_base;
 274  274                  /* first call just sets the base */
 275  275                  return (backoff);
 276  276          }
 277  277  
 278  278          /* set cap */
 279  279          if (mutex_backoff_cap == 0) {
 280  280                  /*
 281  281                   * For a contended lock, in the worst case a load + cas may
 282  282                   * be queued  at the controller for each contending CPU.
 283  283                   * Therefore, to avoid queueing, the accesses for all CPUS must
 284  284                   * be spread out in time over an interval of (ncpu *
 285  285                   * cap-factor).  Maximum backoff is set to this value, and
 286  286                   * actual backoff is a random number from 0 to the current max.
 287  287                   */
 288  288                  cap = ncpus_online * mutex_cap_factor;
 289  289          } else {
 290  290                  cap = mutex_backoff_cap;
 291  291          }
 292  292  
 293  293          /* calculate new backoff value */
 294  294          backoff <<= mutex_backoff_shift;        /* increase backoff */
 295  295          if (backoff > cap) {
 296  296                  if (cap < mutex_backoff_base)
 297  297                          backoff = mutex_backoff_base;
 298  298                  else
 299  299                          backoff = cap;
 300  300          }
 301  301  
 302  302          return (backoff);
 303  303  }
 304  304  
 305  305  /*
 306  306   * default delay function for mutexes.
 307  307   */
 308  308  void
 309  309  default_lock_delay(uint_t backoff)
 310  310  {
 311  311          ulong_t rnd;            /* random factor */
 312  312          uint_t cur_backoff;     /* calculated backoff */
 313  313          uint_t backctr;
 314  314  
 315  315          /*
 316  316           * Modify backoff by a random amount to avoid lockstep, and to
 317  317           * make it probable that some thread gets a small backoff, and
 318  318           * re-checks quickly
 319  319           */
 320  320          rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK());
 321  321          cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) +
 322  322              mutex_backoff_base;
 323  323  
 324  324          /*
 325  325           * Delay before trying
 326  326           * to touch the mutex data structure.
 327  327           */
 328  328          for (backctr = cur_backoff; backctr; backctr--) {
 329  329                  MUTEX_DELAY();
 330  330          };
 331  331  }
 332  332  
 333  333  uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff;
 334  334  void (*mutex_lock_delay)(uint_t) = default_lock_delay;
 335  335  void (*mutex_delay)(void) = mutex_delay_default;
 336  336  
 337  337  /*
 338  338   * mutex_vector_enter() is called from the assembly mutex_enter() routine
 339  339   * if the lock is held or is not of type MUTEX_ADAPTIVE.
 340  340   */
 341  341  void
 342  342  mutex_vector_enter(mutex_impl_t *lp)
 343  343  {
 344  344          kthread_id_t    owner;
 345  345          kthread_id_t    lastowner = MUTEX_NO_OWNER; /* track owner changes */
 346  346          hrtime_t        sleep_time = 0; /* how long we slept */
 347  347          hrtime_t        spin_time = 0;  /* how long we spun */
 348  348          cpu_t           *cpup;
 349  349          turnstile_t     *ts;
 350  350          volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
 351  351          uint_t          backoff = 0;    /* current backoff */
 352  352          int             changecnt = 0;  /* count of owner changes */
 353  353  
 354  354          ASSERT_STACK_ALIGNED();
 355  355  
 356  356          if (MUTEX_TYPE_SPIN(lp)) {
 357  357                  lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
 358  358                      &lp->m_spin.m_oldspl);
 359  359                  return;
 360  360          }
 361  361  
 362  362          if (!MUTEX_TYPE_ADAPTIVE(lp)) {
 363  363                  mutex_panic("mutex_enter: bad mutex", lp);
 364  364                  return;
 365  365          }
 366  366  
 367  367          /*
 368  368           * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
 369  369           * We can migrate after loading CPU but before checking CPU_ON_INTR,
 370  370           * so we must verify by disabling preemption and loading CPU again.
 371  371           */
 372  372          cpup = CPU;
 373  373          if (CPU_ON_INTR(cpup) && !panicstr) {
 374  374                  kpreempt_disable();
 375  375                  if (CPU_ON_INTR(CPU))
 376  376                          mutex_panic("mutex_enter: adaptive at high PIL", lp);
 377  377                  kpreempt_enable();
 378  378          }
 379  379  
 380  380          CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
 381  381  
 382  382          spin_time = LOCKSTAT_START_TIME(LS_MUTEX_ENTER_SPIN);
 383  383  
 384  384          backoff = mutex_lock_backoff(0);        /* set base backoff */
 385  385          for (;;) {
 386  386                  mutex_lock_delay(backoff); /* backoff delay */
 387  387  
 388  388                  if (panicstr)
 389  389                          return;
 390  390  
 391  391                  if ((owner = MUTEX_OWNER(vlp)) == NULL) {
 392  392                          if (mutex_adaptive_tryenter(lp)) {
 393  393                                  break;
 394  394                          }
 395  395                          /* increase backoff only on failed attempt. */
 396  396                          backoff = mutex_lock_backoff(backoff);
 397  397                          changecnt++;
 398  398                          continue;
 399  399                  } else if (lastowner != owner) {
 400  400                          lastowner = owner;
 401  401                          backoff = mutex_lock_backoff(backoff);
 402  402                          changecnt++;
 403  403                  }
 404  404  
 405  405                  if (changecnt >= ncpus_online) {
 406  406                          backoff = mutex_lock_backoff(0);
 407  407                          changecnt = 0;
 408  408                  }
 409  409  
 410  410                  if (owner == curthread)
 411  411                          mutex_panic("recursive mutex_enter", lp);
 412  412  
 413  413                  /*
 414  414                   * If lock is held but owner is not yet set, spin.
 415  415                   * (Only relevant for platforms that don't have cas.)
 416  416                   */
 417  417                  if (owner == MUTEX_NO_OWNER)
 418  418                          continue;
 419  419  
 420  420                  if (mutex_owner_running(lp) != NULL)  {
 421  421                          continue;
 422  422                  }
 423  423  
 424  424                  /*
 425  425                   * The owner appears not to be running, so block.
 426  426                   * See the Big Theory Statement for memory ordering issues.
 427  427                   */
 428  428                  ts = turnstile_lookup(lp);
 429  429                  MUTEX_SET_WAITERS(lp);
 430  430                  membar_enter();
 431  431  
 432  432                  /*
 433  433                   * Recheck whether owner is running after waiters bit hits
 434  434                   * global visibility (above).  If owner is running, spin.
 435  435                   */
 436  436                  if (mutex_owner_running(lp) != NULL) {
 437  437                          turnstile_exit(lp);
 438  438                          continue;
 439  439                  }
 440  440                  membar_consumer();
 441  441  
 442  442                  /*
 443  443                   * If owner and waiters bit are unchanged, block.
 444  444                   */
 445  445                  if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
 446  446                          sleep_time -= gethrtime();
 447  447                          (void) turnstile_block(ts, TS_WRITER_Q, lp,
 448  448                              &mutex_sobj_ops, NULL, NULL);
 449  449                          sleep_time += gethrtime();
 450  450                          /* reset backoff after turnstile */
 451  451                          backoff = mutex_lock_backoff(0);
 452  452                  } else {
 453  453                          turnstile_exit(lp);
 454  454                  }
 455  455          }
 456  456  
 457  457          ASSERT(MUTEX_OWNER(lp) == curthread);
 458  458  
 459  459          if (sleep_time != 0) {
 460  460                  /*
 461  461                   * Note, sleep time is the sum of all the sleeping we
 462  462                   * did.
 463  463                   */
 464  464                  LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
 465  465          }
 466  466  
 467  467          /* record spin time, don't count sleep time */
 468  468          if (spin_time != 0) {
 469  469                  LOCKSTAT_RECORD_TIME(LS_MUTEX_ENTER_SPIN, lp,
 470  470                      spin_time + sleep_time);
 471  471          }
 472  472  
 473  473          LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
 474  474  }
 475  475  
 476  476  /*
 477  477   * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
 478  478   * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
 479  479   */
 480  480  int
 481  481  mutex_vector_tryenter(mutex_impl_t *lp)
 482  482  {
 483  483          int s;
 484  484  
 485  485          if (MUTEX_TYPE_ADAPTIVE(lp))
 486  486                  return (0);             /* we already tried in assembly */
 487  487  
 488  488          if (!MUTEX_TYPE_SPIN(lp)) {
 489  489                  mutex_panic("mutex_tryenter: bad mutex", lp);
 490  490                  return (0);
 491  491          }
 492  492  
 493  493          s = splr(lp->m_spin.m_minspl);
 494  494          if (lock_try(&lp->m_spin.m_spinlock)) {
 495  495                  lp->m_spin.m_oldspl = (ushort_t)s;
 496  496                  return (1);
 497  497          }
 498  498          splx(s);
 499  499          return (0);
 500  500  }
 501  501  
 502  502  /*
 503  503   * mutex_vector_exit() is called from mutex_exit() if the lock is not
 504  504   * adaptive, has waiters, or is not owned by the current thread (panic).
 505  505   */
 506  506  void
 507  507  mutex_vector_exit(mutex_impl_t *lp)
 508  508  {
 509  509          turnstile_t *ts;
 510  510  
 511  511          if (MUTEX_TYPE_SPIN(lp)) {
 512  512                  lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
 513  513                  return;
 514  514          }
 515  515  
 516  516          if (MUTEX_OWNER(lp) != curthread) {
 517  517                  mutex_panic("mutex_exit: not owner", lp);
 518  518                  return;
 519  519          }
 520  520  
 521  521          ts = turnstile_lookup(lp);
 522  522          MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
 523  523          if (ts == NULL)
 524  524                  turnstile_exit(lp);
 525  525          else
 526  526                  turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
 527  527          LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
 528  528  }
 529  529  
 530  530  int
 531  531  mutex_owned(const kmutex_t *mp)
 532  532  {
 533  533          const mutex_impl_t *lp = (const mutex_impl_t *)mp;
 534  534  
 535  535          if (panicstr || quiesce_active)
 536  536                  return (1);
 537  537  
 538  538          if (MUTEX_TYPE_ADAPTIVE(lp))
 539  539                  return (MUTEX_OWNER(lp) == curthread);
 540  540          return (LOCK_HELD(&lp->m_spin.m_spinlock));
 541  541  }
 542  542  
 543  543  kthread_t *
 544  544  mutex_owner(const kmutex_t *mp)
 545  545  {
 546  546          const mutex_impl_t *lp = (const mutex_impl_t *)mp;
 547  547          kthread_id_t t;
 548  548  
 549  549          if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
 550  550                  return (t);
 551  551          return (NULL);
 552  552  }
 553  553  
 554  554  /*
 555  555   * The iblock cookie 'ibc' is the spl level associated with the lock;
 556  556   * this alone determines whether the lock will be ADAPTIVE or SPIN.
 557  557   *
 558  558   * Adaptive mutexes created in zeroed memory do not need to call
 559  559   * mutex_init() as their allocation in this fashion guarantees
 560  560   * their initialization.
 561  561   *   eg adaptive mutexes created as static within the BSS or allocated
 562  562   *      by kmem_zalloc().
 563  563   */
 564  564  /* ARGSUSED */
 565  565  void
 566  566  mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
 567  567  {
 568  568          mutex_impl_t *lp = (mutex_impl_t *)mp;
 569  569  
 570  570          ASSERT(ibc < (void *)KERNELBASE);       /* see 1215173 */
 571  571  
 572  572          if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
 573  573                  ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
 574  574                  MUTEX_SET_TYPE(lp, MUTEX_SPIN);
 575  575                  LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
 576  576                  LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
 577  577                  lp->m_spin.m_minspl = (int)(intptr_t)ibc;
 578  578          } else {
 579  579  #ifdef MUTEX_ALIGN
 580  580                  static int misalign_cnt = 0;
 581  581  
 582  582                  if (((uintptr_t)lp & (uintptr_t)(MUTEX_ALIGN - 1)) &&
 583  583                      (misalign_cnt < MUTEX_ALIGN_WARNINGS)) {
 584  584                          /*
 585  585                           * The mutex is not aligned and may cross a cache line.
 586  586                           * This is not supported and may cause a panic.
 587  587                           * Show a warning that the mutex is not aligned
 588  588                           * and attempt to identify the origin.
 589  589                           * Unaligned mutexes are not (supposed to be)
 590  590                           * possible on SPARC.
 591  591                           */
 592  592                          char *funcname;
 593  593                          ulong_t offset = 0;
 594  594  
 595  595                          funcname = modgetsymname((uintptr_t)caller(), &offset);
 596  596                          cmn_err(CE_WARN, "mutex_init: %p is not %d byte "
 597  597                              "aligned; caller %s+%lx in module %s. "
 598  598                              "This is unsupported and may cause a panic. "
 599  599                              "Please report this to the kernel module supplier.",
 600  600                              (void *)lp, MUTEX_ALIGN,
 601  601                              funcname ? funcname : "unknown", offset,
 602  602                              mod_containing_pc(caller()));
 603  603                          misalign_cnt++;
 604  604                          if (misalign_cnt >= MUTEX_ALIGN_WARNINGS) {
 605  605                                  cmn_err(CE_WARN, "mutex_init: further unaligned"
 606  606                                      " mutex warnings will be suppressed.");
 607  607                          }
 608  608                  }
 609  609  #endif  /* MUTEX_ALIGN */
 610  610                  ASSERT(type != MUTEX_SPIN);
 611  611  
 612  612                  MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
 613  613                  MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
 614  614          }
 615  615  }
 616  616  
 617  617  void
 618  618  mutex_destroy(kmutex_t *mp)
 619  619  {
 620  620          mutex_impl_t *lp = (mutex_impl_t *)mp;
 621  621  
 622  622          if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
 623  623                  MUTEX_DESTROY(lp);
 624  624          } else if (MUTEX_TYPE_SPIN(lp)) {
 625  625                  LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
 626  626                  MUTEX_DESTROY(lp);
 627  627          } else if (MUTEX_TYPE_ADAPTIVE(lp)) {
 628  628                  LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
 629  629                  if (MUTEX_OWNER(lp) != curthread)
 630  630                          mutex_panic("mutex_destroy: not owner", lp);
 631  631                  if (MUTEX_HAS_WAITERS(lp)) {
 632  632                          turnstile_t *ts = turnstile_lookup(lp);
 633  633                          turnstile_exit(lp);
 634  634                          if (ts != NULL)
 635  635                                  mutex_panic("mutex_destroy: has waiters", lp);
 636  636                  }
 637  637                  MUTEX_DESTROY(lp);
 638  638          } else {
 639  639                  mutex_panic("mutex_destroy: bad mutex", lp);
 640  640          }
 641  641  }
 642  642  
 643  643  /*
 644  644   * Simple C support for the cases where spin locks miss on the first try.
 645  645   */
 646  646  void
 647  647  lock_set_spin(lock_t *lp)
 648  648  {
 649  649          int loop_count = 0;
 650  650          uint_t backoff = 0;     /* current backoff */
 651  651          hrtime_t spin_time = 0; /* how long we spun */
 652  652  
 653  653          if (panicstr)
 654  654                  return;
 655  655  
 656  656          if (ncpus == 1)
 657  657                  panic("lock_set: %p lock held and only one CPU", (void *)lp);
 658  658  
 659  659          spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPIN);
 660  660  
 661  661          while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
 662  662                  if (panicstr)
 663  663                          return;
 664  664                  loop_count++;
 665  665  
 666  666                  if (ncpus_online == loop_count) {
 667  667                          backoff = mutex_lock_backoff(0);
 668  668                          loop_count = 0;
 669  669                  } else {
 670  670                          backoff = mutex_lock_backoff(backoff);
 671  671                  }
 672  672                  mutex_lock_delay(backoff);
 673  673          }
 674  674  
 675  675          LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPIN, lp, spin_time);
 676  676  
 677  677          LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
 678  678  }
 679  679  
 680  680  void
 681  681  lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
 682  682  {
 683  683          int loop_count = 0;
 684  684          uint_t backoff = 0;     /* current backoff */
 685  685          hrtime_t spin_time = 0; /* how long we spun */
 686  686  
 687  687          if (panicstr)
 688  688                  return;
 689  689  
 690  690          if (ncpus == 1)
 691  691                  panic("lock_set_spl: %p lock held and only one CPU",
 692  692                      (void *)lp);
 693  693  
 694  694          ASSERT(new_pil > LOCK_LEVEL);
 695  695  
 696  696          spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPL_SPIN);
 697  697  
 698  698          do {
 699  699                  splx(old_pil);
 700  700                  while (LOCK_HELD(lp)) {
 701  701                          loop_count++;
 702  702  
 703  703                          if (panicstr) {
 704  704                                  *old_pil_addr = (ushort_t)splr(new_pil);
 705  705                                  return;
 706  706                          }
 707  707                          if (ncpus_online == loop_count) {
 708  708                                  backoff = mutex_lock_backoff(0);
 709  709                                  loop_count = 0;
 710  710                          } else {
 711  711                                  backoff = mutex_lock_backoff(backoff);
 712  712                          }
 713  713                          mutex_lock_delay(backoff);
 714  714                  }
 715  715                  old_pil = splr(new_pil);
 716  716          } while (!lock_spin_try(lp));
 717  717  
 718  718          *old_pil_addr = (ushort_t)old_pil;
 719  719  
 720  720          LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPL_SPIN, lp, spin_time);
 721  721  
 722  722          LOCKSTAT_RECORD0(LS_LOCK_SET_SPL_ACQUIRE, lp);
 723  723  }

↓ open down ↓

467 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX