wip-cpu Sdiff usr/src/uts/common/disp/disp.c

Print this page

patch setfrontbackdq
patch cpu-pause-func-deglobalize

 323         ASSERT(MUTEX_HELD(&cpu_lock));
 324 
 325         disp_mem = kmem_zalloc(NCPU *
 326             sizeof (struct disp_queue_info), KM_SLEEP);
 327 
 328         /*
 329          * This routine must allocate all of the memory before stopping
 330          * the cpus because it must not sleep in kmem_alloc while the
 331          * CPUs are stopped.  Locks they hold will not be freed until they
 332          * are restarted.
 333          */
 334         i = 0;
 335         cpup = cpu_list;
 336         do {
 337                 disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp);
 338                 i++;
 339                 cpup = cpup->cpu_next;
 340         } while (cpup != cpu_list);
 341         num = i;
 342 
 343         pause_cpus(NULL);
 344         for (i = 0; i < num; i++)
 345                 disp_dq_assign(&disp_mem[i], numpris);
 346         start_cpus();
 347 
 348         /*
 349          * I must free all of the memory after starting the cpus because
 350          * I can not risk sleeping in kmem_free while the cpus are stopped.
 351          */
 352         for (i = 0; i < num; i++)
 353                 disp_dq_free(&disp_mem[i]);
 354 
 355         kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info));
 356 }
 357 
 358 static void
 359 disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp)
 360 {
 361         dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP);
 362         dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) *
 363             sizeof (long), KM_SLEEP);

1152  * setbackdq() keeps runqs balanced such that the difference in length
1153  * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF.
1154  * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths
1155  * must match.  When per-thread TS_RUNQMATCH flag is set, setbackdq() will
1156  * try to keep runqs perfectly balanced regardless of the thread priority.
1157  */
1158 #define RUNQ_MATCH_PRI  16      /* pri below which queue lengths must match */
1159 #define RUNQ_MAX_DIFF   2       /* maximum runq length difference */
1160 #define RUNQ_LEN(cp, pri)       ((cp)->cpu_disp->disp_q[pri].dq_sruncnt)
1161 
1162 /*
1163  * Macro that evaluates to true if it is likely that the thread has cache
1164  * warmth. This is based on the amount of time that has elapsed since the
1165  * thread last ran. If that amount of time is less than "rechoose_interval"
1166  * ticks, then we decide that the thread has enough cache warmth to warrant
1167  * some affinity for t->t_cpu.
1168  */
1169 #define THREAD_HAS_CACHE_WARMTH(thread) \
1170         ((thread == curthread) ||       \
1171         ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval))

1172 /*
1173  * Put the specified thread on the back of the dispatcher
1174  * queue corresponding to its current priority.
1175  *
1176  * Called with the thread in transition, onproc or stopped state
1177  * and locked (transition implies locked) and at high spl.
1178  * Returns with the thread in TS_RUN state and still locked.
1179  */
1180 void
1181 setbackdq(kthread_t *tp)
1182 {
1183         dispq_t *dq;
1184         disp_t          *dp;
1185         cpu_t           *cp;
1186         pri_t           tpri;
1187         int             bound;
1188         boolean_t       self;
1189 
1190         ASSERT(THREAD_LOCK_HELD(tp));
1191         ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1192         ASSERT(!thread_on_queue(tp));   /* make sure tp isn't on a runq */
1193 
1194         /*
1195          * If thread is "swapped" or on the swap queue don't
1196          * queue it, but wake sched.
1197          */
1198         if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1199                 disp_swapped_setrun(tp);
1200                 return;
1201         }
1202 
1203         self = (tp == curthread);
1204 
1205         if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1206                 bound = 1;
1207         else
1208                 bound = 0;
1209 
1210         tpri = DISP_PRIO(tp);
1211         if (ncpus == 1)
1212                 cp = tp->t_cpu;
1213         else if (!bound) {
1214                 if (tpri >= kpqpri) {
1215                         setkpdq(tp, SETKP_BACK);
1216                         return;
1217                 }
1218 



1219                 /*
1220                  * We'll generally let this thread continue to run where
1221                  * it last ran...but will consider migration if:
1222                  * - We thread probably doesn't have much cache warmth.
1223                  * - The CPU where it last ran is the target of an offline
1224                  *   request.
1225                  * - The thread last ran outside it's home lgroup.
1226                  */
1227                 if ((!THREAD_HAS_CACHE_WARMTH(tp)) ||
1228                     (tp->t_cpu == cpu_inmotion)) {
1229                         cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL);
1230                 } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
1231                         cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1232                             self ? tp->t_cpu : NULL);
1233                 } else {
1234                         cp = tp->t_cpu;
1235                 }
1236 
1237                 if (tp->t_cpupart == cp->cpu_part) {






















1238                         int     qlen;
1239 
1240                         /*
1241                          * Perform any CMT load balancing
1242                          */
1243                         cp = cmt_balance(tp, cp);
1244 
1245                         /*
1246                          * Balance across the run queues
1247                          */
1248                         qlen = RUNQ_LEN(cp, tpri);
1249                         if (tpri >= RUNQ_MATCH_PRI &&
1250                             !(tp->t_schedflag & TS_RUNQMATCH))
1251                                 qlen -= RUNQ_MAX_DIFF;
1252                         if (qlen > 0) {
1253                                 cpu_t *newcp;
1254 
1255                                 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) {
1256                                         newcp = cp->cpu_next_part;
1257                                 } else if ((newcp = cp->cpu_next_lpl) == cp) {
1258                                         newcp = cp->cpu_next_part;
1259                                 }
1260 
1261                                 if (RUNQ_LEN(newcp, tpri) < qlen) {
1262                                         DTRACE_PROBE3(runq__balance,
1263                                             kthread_t *, tp,
1264                                             cpu_t *, cp, cpu_t *, newcp);
1265                                         cp = newcp;
1266                                 }
1267                         }

1268                 } else {
1269                         /*
1270                          * Migrate to a cpu in the new partition.
1271                          */
1272                         cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1273                             tp->t_lpl, tp->t_pri, NULL);
1274                 }

1275                 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1276         } else {
1277                 /*
1278                  * It is possible that t_weakbound_cpu != t_bound_cpu (for
1279                  * a short time until weak binding that existed when the
1280                  * strong binding was established has dropped) so we must
1281                  * favour weak binding over strong.
1282                  */
1283                 cp = tp->t_weakbound_cpu ?
1284                     tp->t_weakbound_cpu : tp->t_bound_cpu;
1285         }

1286         /*
1287          * A thread that is ONPROC may be temporarily placed on the run queue
1288          * but then chosen to run again by disp.  If the thread we're placing on
1289          * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1290          * replacement process is actually scheduled in swtch().  In this
1291          * situation, curthread is the only thread that could be in the ONPROC
1292          * state.
1293          */
1294         if ((!self) && (tp->t_waitrq == 0)) {
1295                 hrtime_t curtime;
1296 
1297                 curtime = gethrtime_unscaled();
1298                 (void) cpu_update_pct(tp, curtime);
1299                 tp->t_waitrq = curtime;
1300         } else {
1301                 (void) cpu_update_pct(tp, gethrtime_unscaled());
1302         }
1303 
1304         dp = cp->cpu_disp;
1305         disp_lock_enter_high(&dp->disp_lock);
1306 
1307         DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0);




1308         TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p",
1309             tpri, cp, tp);

1310 
1311 #ifndef NPROBE
1312         /* Kernel probe */
1313         if (tnf_tracing_active)
1314                 tnf_thread_queue(tp, cp, tpri);
1315 #endif /* NPROBE */
1316 
1317         ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1318 
1319         THREAD_RUN(tp, &dp->disp_lock);          /* set t_state to TS_RUN */
1320         tp->t_disp_queue = dp;
1321         tp->t_link = NULL;
1322 
1323         dq = &dp->disp_q[tpri];
1324         dp->disp_nrunnable++;
1325         if (!bound)
1326                 dp->disp_steal = 0;
1327         membar_enter();
1328 
1329         if (dq->dq_sruncnt++ != 0) {





1330                 ASSERT(dq->dq_first != NULL);
1331                 dq->dq_last->t_link = tp;
1332                 dq->dq_last = tp;

1333         } else {
1334                 ASSERT(dq->dq_first == NULL);
1335                 ASSERT(dq->dq_last == NULL);
1336                 dq->dq_first = dq->dq_last = tp;
1337                 BT_SET(dp->disp_qactmap, tpri);
1338                 if (tpri > dp->disp_maxrunpri) {
1339                         dp->disp_maxrunpri = tpri;
1340                         membar_enter();
1341                         cpu_resched(cp, tpri);
1342                 }
1343         }
1344 
1345         if (!bound && tpri > dp->disp_max_unbound_pri) {
1346                 if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) {
1347                         /*
1348                          * If there are no other unbound threads on the
1349                          * run queue, don't allow other CPUs to steal
1350                          * this thread while we are in the middle of a
1351                          * context switch. We may just switch to it
1352                          * again right away. CPU_DISP_DONTSTEAL is cleared
1353                          * in swtch and swtch_to.
1354                          */
1355                         cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1356                 }
1357                 dp->disp_max_unbound_pri = tpri;
1358         }

1359         (*disp_enq_thread)(cp, bound);
1360 }
1361 
1362 /*
1363  * Put the specified thread on the front of the dispatcher
1364  * queue corresponding to its current priority.
1365  *
1366  * Called with the thread in transition, onproc or stopped state
1367  * and locked (transition implies locked) and at high spl.
1368  * Returns with the thread in TS_RUN state and still locked.
1369  */
1370 void
1371 setfrontdq(kthread_t *tp)
1372 {
1373         disp_t          *dp;
1374         dispq_t         *dq;
1375         cpu_t           *cp;
1376         pri_t           tpri;
1377         int             bound;
1378 
1379         ASSERT(THREAD_LOCK_HELD(tp));
1380         ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1381         ASSERT(!thread_on_queue(tp));   /* make sure tp isn't on a runq */
1382 
1383         /*
1384          * If thread is "swapped" or on the swap queue don't
1385          * queue it, but wake sched.
1386          */
1387         if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1388                 disp_swapped_setrun(tp);
1389                 return;
1390         }
1391 
1392         if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1393                 bound = 1;
1394         else
1395                 bound = 0;
1396 
1397         tpri = DISP_PRIO(tp);
1398         if (ncpus == 1)
1399                 cp = tp->t_cpu;
1400         else if (!bound) {
1401                 if (tpri >= kpqpri) {
1402                         setkpdq(tp, SETKP_FRONT);
1403                         return;
1404                 }
1405                 cp = tp->t_cpu;
1406                 if (tp->t_cpupart == cp->cpu_part) {
1407                         /*
1408                          * We'll generally let this thread continue to run
1409                          * where it last ran, but will consider migration if:
1410                          * - The thread last ran outside it's home lgroup.
1411                          * - The CPU where it last ran is the target of an
1412                          *   offline request (a thread_nomigrate() on the in
1413                          *   motion CPU relies on this when forcing a preempt).
1414                          * - The thread isn't the highest priority thread where
1415                          *   it last ran, and it is considered not likely to
1416                          *   have significant cache warmth.
1417                          */
1418                         if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) ||
1419                             (cp == cpu_inmotion)) {
1420                                 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1421                                     (tp == curthread) ? cp : NULL);
1422                         } else if ((tpri < cp->cpu_disp->disp_maxrunpri) &&
1423                             (!THREAD_HAS_CACHE_WARMTH(tp))) {
1424                                 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1425                                     NULL);
1426                         }
1427                 } else {
1428                         /*
1429                          * Migrate to a cpu in the new partition.
1430                          */
1431                         cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1432                             tp->t_lpl, tp->t_pri, NULL);
1433                 }
1434                 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1435         } else {
1436                 /*
1437                  * It is possible that t_weakbound_cpu != t_bound_cpu (for
1438                  * a short time until weak binding that existed when the
1439                  * strong binding was established has dropped) so we must
1440                  * favour weak binding over strong.
1441                  */
1442                 cp = tp->t_weakbound_cpu ?
1443                     tp->t_weakbound_cpu : tp->t_bound_cpu;
1444         }
1445 
1446         /*
1447          * A thread that is ONPROC may be temporarily placed on the run queue
1448          * but then chosen to run again by disp.  If the thread we're placing on
1449          * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1450          * replacement process is actually scheduled in swtch().  In this
1451          * situation, curthread is the only thread that could be in the ONPROC
1452          * state.
1453          */
1454         if ((tp != curthread) && (tp->t_waitrq == 0)) {
1455                 hrtime_t curtime;
1456 
1457                 curtime = gethrtime_unscaled();
1458                 (void) cpu_update_pct(tp, curtime);
1459                 tp->t_waitrq = curtime;
1460         } else {
1461                 (void) cpu_update_pct(tp, gethrtime_unscaled());
1462         }
1463 
1464         dp = cp->cpu_disp;
1465         disp_lock_enter_high(&dp->disp_lock);
1466 
1467         TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
1468         DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1);
1469 
1470 #ifndef NPROBE
1471         /* Kernel probe */
1472         if (tnf_tracing_active)
1473                 tnf_thread_queue(tp, cp, tpri);
1474 #endif /* NPROBE */
1475 
1476         ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1477 
1478         THREAD_RUN(tp, &dp->disp_lock);          /* set TS_RUN state and lock */
1479         tp->t_disp_queue = dp;
1480 
1481         dq = &dp->disp_q[tpri];
1482         dp->disp_nrunnable++;
1483         if (!bound)
1484                 dp->disp_steal = 0;
1485         membar_enter();
1486 
1487         if (dq->dq_sruncnt++ != 0) {
1488                 ASSERT(dq->dq_last != NULL);
1489                 tp->t_link = dq->dq_first;
1490                 dq->dq_first = tp;
1491         } else {
1492                 ASSERT(dq->dq_last == NULL);
1493                 ASSERT(dq->dq_first == NULL);
1494                 tp->t_link = NULL;
1495                 dq->dq_first = dq->dq_last = tp;
1496                 BT_SET(dp->disp_qactmap, tpri);
1497                 if (tpri > dp->disp_maxrunpri) {
1498                         dp->disp_maxrunpri = tpri;
1499                         membar_enter();
1500                         cpu_resched(cp, tpri);
1501                 }
1502         }
1503 
1504         if (!bound && tpri > dp->disp_max_unbound_pri) {
1505                 if (tp == curthread && dp->disp_max_unbound_pri == -1 &&
1506                     cp == CPU) {
1507                         /*
1508                          * If there are no other unbound threads on the
1509                          * run queue, don't allow other CPUs to steal
1510                          * this thread while we are in the middle of a
1511                          * context switch. We may just switch to it
1512                          * again right away. CPU_DISP_DONTSTEAL is cleared
1513                          * in swtch and swtch_to.
1514                          */
1515                         cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1516                 }
1517                 dp->disp_max_unbound_pri = tpri;
1518         }
1519         (*disp_enq_thread)(cp, bound);
1520 }
1521 
1522 /*
1523  * Put a high-priority unbound thread on the kp queue
1524  */
1525 static void
1526 setkpdq(kthread_t *tp, int borf)
1527 {
1528         dispq_t *dq;
1529         disp_t  *dp;
1530         cpu_t   *cp;
1531         pri_t   tpri;
1532 
1533         tpri = DISP_PRIO(tp);
1534 
1535         dp = &tp->t_cpupart->cp_kp_queue;
1536         disp_lock_enter_high(&dp->disp_lock);
1537 
1538         TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
1539

 323         ASSERT(MUTEX_HELD(&cpu_lock));
 324 
 325         disp_mem = kmem_zalloc(NCPU *
 326             sizeof (struct disp_queue_info), KM_SLEEP);
 327 
 328         /*
 329          * This routine must allocate all of the memory before stopping
 330          * the cpus because it must not sleep in kmem_alloc while the
 331          * CPUs are stopped.  Locks they hold will not be freed until they
 332          * are restarted.
 333          */
 334         i = 0;
 335         cpup = cpu_list;
 336         do {
 337                 disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp);
 338                 i++;
 339                 cpup = cpup->cpu_next;
 340         } while (cpup != cpu_list);
 341         num = i;
 342 
 343         pause_cpus(NULL, NULL);
 344         for (i = 0; i < num; i++)
 345                 disp_dq_assign(&disp_mem[i], numpris);
 346         start_cpus();
 347 
 348         /*
 349          * I must free all of the memory after starting the cpus because
 350          * I can not risk sleeping in kmem_free while the cpus are stopped.
 351          */
 352         for (i = 0; i < num; i++)
 353                 disp_dq_free(&disp_mem[i]);
 354 
 355         kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info));
 356 }
 357 
 358 static void
 359 disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp)
 360 {
 361         dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP);
 362         dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) *
 363             sizeof (long), KM_SLEEP);

1152  * setbackdq() keeps runqs balanced such that the difference in length
1153  * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF.
1154  * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths
1155  * must match.  When per-thread TS_RUNQMATCH flag is set, setbackdq() will
1156  * try to keep runqs perfectly balanced regardless of the thread priority.
1157  */
1158 #define RUNQ_MATCH_PRI  16      /* pri below which queue lengths must match */
1159 #define RUNQ_MAX_DIFF   2       /* maximum runq length difference */
1160 #define RUNQ_LEN(cp, pri)       ((cp)->cpu_disp->disp_q[pri].dq_sruncnt)
1161 
1162 /*
1163  * Macro that evaluates to true if it is likely that the thread has cache
1164  * warmth. This is based on the amount of time that has elapsed since the
1165  * thread last ran. If that amount of time is less than "rechoose_interval"
1166  * ticks, then we decide that the thread has enough cache warmth to warrant
1167  * some affinity for t->t_cpu.
1168  */
1169 #define THREAD_HAS_CACHE_WARMTH(thread) \
1170         ((thread == curthread) ||       \
1171         ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval))
1172 
1173 /*
1174  * Put the specified thread on the front/back of the dispatcher queue
1175  * corresponding to its current priority.
1176  *
1177  * Called with the thread in transition, onproc or stopped state and locked
1178  * (transition implies locked) and at high spl.  Returns with the thread in
1179  * TS_RUN state and still locked.
1180  */
1181 static void
1182 setfrontbackdq(kthread_t *tp, boolean_t front)
1183 {
1184         dispq_t         *dq;
1185         disp_t          *dp;
1186         cpu_t           *cp;
1187         pri_t           tpri;
1188         boolean_t       bound;
1189         boolean_t       self;
1190 
1191         ASSERT(THREAD_LOCK_HELD(tp));
1192         ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1193         ASSERT(!thread_on_queue(tp));   /* make sure tp isn't on a runq */
1194 
1195         /*
1196          * If thread is "swapped" or on the swap queue don't
1197          * queue it, but wake sched.
1198          */
1199         if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1200                 disp_swapped_setrun(tp);
1201                 return;
1202         }
1203 
1204         self  = (tp == curthread);
1205         bound = (tp->t_bound_cpu || tp->t_weakbound_cpu);




1206 
1207         tpri = DISP_PRIO(tp);
1208         if (ncpus == 1)
1209                 cp = tp->t_cpu;
1210         else if (!bound) {
1211                 if (tpri >= kpqpri) {
1212                         setkpdq(tp, front ? SETKP_FRONT : SETKP_BACK);
1213                         return;
1214                 }
1215 
1216                 cp = tp->t_cpu;
1217 
1218                 if (!front) {
1219                         /*
1220                          * We'll generally let this thread continue to run where
1221                          * it last ran...but will consider migration if:
1222                          * - We thread probably doesn't have much cache warmth.
1223                          * - The CPU where it last ran is the target of an offline
1224                          *   request.
1225                          * - The thread last ran outside it's home lgroup.
1226                          */
1227                         if ((!THREAD_HAS_CACHE_WARMTH(tp)) || (cp == cpu_inmotion)) {

1228                                 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL);
1229                         } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
1230                                 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1231                                     self ? tp->t_cpu : NULL);
1232                         }
1233 
1234                 }
1235 
1236                 if (tp->t_cpupart == cp->cpu_part) {
1237                         if (front) {
1238                                 /*
1239                                  * We'll generally let this thread continue to run
1240                                  * where it last ran, but will consider migration if:
1241                                  * - The thread last ran outside it's home lgroup.
1242                                  * - The CPU where it last ran is the target of an
1243                                  *   offline request (a thread_nomigrate() on the in
1244                                  *   motion CPU relies on this when forcing a preempt).
1245                                  * - The thread isn't the highest priority thread where
1246                                  *   it last ran, and it is considered not likely to
1247                                  *   have significant cache warmth.
1248                                  */
1249                                 if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) ||
1250                                     (cp == cpu_inmotion)) {
1251                                         cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1252                                             self ? cp : NULL);
1253                                 } else if ((tpri < cp->cpu_disp->disp_maxrunpri) &&
1254                                     (!THREAD_HAS_CACHE_WARMTH(tp))) {
1255                                         cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1256                                             NULL);
1257                                 }
1258                         } else {
1259                                 int     qlen;
1260 
1261                                 /*
1262                                  * Perform any CMT load balancing
1263                                  */
1264                                 cp = cmt_balance(tp, cp);
1265 
1266                                 /*
1267                                  * Balance across the run queues
1268                                  */
1269                                 qlen = RUNQ_LEN(cp, tpri);
1270                                 if (tpri >= RUNQ_MATCH_PRI &&
1271                                     !(tp->t_schedflag & TS_RUNQMATCH))
1272                                         qlen -= RUNQ_MAX_DIFF;
1273                                 if (qlen > 0) {
1274                                         cpu_t *newcp;
1275 
1276                                         if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) {
1277                                                 newcp = cp->cpu_next_part;
1278                                         } else if ((newcp = cp->cpu_next_lpl) == cp) {
1279                                                 newcp = cp->cpu_next_part;
1280                                         }
1281 
1282                                         if (RUNQ_LEN(newcp, tpri) < qlen) {
1283                                                 DTRACE_PROBE3(runq__balance,
1284                                                     kthread_t *, tp,
1285                                                     cpu_t *, cp, cpu_t *, newcp);
1286                                                 cp = newcp;
1287                                         }
1288                                 }
1289                         }
1290                 } else {
1291                         /*
1292                          * Migrate to a cpu in the new partition.
1293                          */
1294                         cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1295                             tp->t_lpl, tp->t_pri, NULL);
1296                 }
1297 
1298                 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1299         } else {
1300                 /*
1301                  * It is possible that t_weakbound_cpu != t_bound_cpu (for
1302                  * a short time until weak binding that existed when the
1303                  * strong binding was established has dropped) so we must
1304                  * favour weak binding over strong.
1305                  */
1306                 cp = tp->t_weakbound_cpu ?
1307                     tp->t_weakbound_cpu : tp->t_bound_cpu;
1308         }
1309 
1310         /*
1311          * A thread that is ONPROC may be temporarily placed on the run queue
1312          * but then chosen to run again by disp.  If the thread we're placing on
1313          * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1314          * replacement process is actually scheduled in swtch().  In this
1315          * situation, curthread is the only thread that could be in the ONPROC
1316          * state.
1317          */
1318         if ((!self) && (tp->t_waitrq == 0)) {
1319                 hrtime_t curtime;
1320 
1321                 curtime = gethrtime_unscaled();
1322                 (void) cpu_update_pct(tp, curtime);
1323                 tp->t_waitrq = curtime;
1324         } else {
1325                 (void) cpu_update_pct(tp, gethrtime_unscaled());
1326         }
1327 
1328         dp = cp->cpu_disp;
1329         disp_lock_enter_high(&dp->disp_lock);
1330 
1331         DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, front);
1332         if (front) {
1333                 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri,
1334                         tp);
1335         } else {
1336                 TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p",
1337                         tpri, cp, tp);
1338         }
1339 
1340 #ifndef NPROBE
1341         /* Kernel probe */
1342         if (tnf_tracing_active)
1343                 tnf_thread_queue(tp, cp, tpri);
1344 #endif /* NPROBE */
1345 
1346         ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1347 
1348         THREAD_RUN(tp, &dp->disp_lock);          /* set t_state to TS_RUN */
1349         tp->t_disp_queue = dp;
1350         tp->t_link = NULL;
1351 
1352         dq = &dp->disp_q[tpri];
1353         dp->disp_nrunnable++;
1354         if (!bound)
1355                 dp->disp_steal = 0;
1356         membar_enter();
1357 
1358         if (dq->dq_sruncnt++ != 0) {
1359                 if (front) {
1360                         ASSERT(dq->dq_last != NULL);
1361                         tp->t_link = dq->dq_first;
1362                         dq->dq_first = tp;
1363                 } else {
1364                         ASSERT(dq->dq_first != NULL);
1365                         dq->dq_last->t_link = tp;
1366                         dq->dq_last = tp;
1367                 }
1368         } else {
1369                 ASSERT(dq->dq_first == NULL);
1370                 ASSERT(dq->dq_last == NULL);
1371                 dq->dq_first = dq->dq_last = tp;
1372                 BT_SET(dp->disp_qactmap, tpri);
1373                 if (tpri > dp->disp_maxrunpri) {
1374                         dp->disp_maxrunpri = tpri;
1375                         membar_enter();
1376                         cpu_resched(cp, tpri);
1377                 }
1378         }
1379 
1380         if (!bound && tpri > dp->disp_max_unbound_pri) {
1381                 if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) {
1382                         /*
1383                          * If there are no other unbound threads on the
1384                          * run queue, don't allow other CPUs to steal
1385                          * this thread while we are in the middle of a
1386                          * context switch. We may just switch to it
1387                          * again right away. CPU_DISP_DONTSTEAL is cleared
1388                          * in swtch and swtch_to.
1389                          */
1390                         cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1391                 }
1392                 dp->disp_max_unbound_pri = tpri;
1393         }
1394 
1395         (*disp_enq_thread)(cp, bound);
1396 }
1397 
1398 /*
1399  * Put the specified thread on the back of the dispatcher
1400  * queue corresponding to its current priority.
1401  *
1402  * Called with the thread in transition, onproc or stopped state
1403  * and locked (transition implies locked) and at high spl.
1404  * Returns with the thread in TS_RUN state and still locked.
1405  */
1406 void
1407 setbackdq(kthread_t *tp)
1408 {
1409         setfrontbackdq(tp, B_FALSE);
1410 }
































































































































1411 
1412 /*
1413  * Put the specified thread on the front of the dispatcher
1414  * queue corresponding to its current priority.
1415  *
1416  * Called with the thread in transition, onproc or stopped state
1417  * and locked (transition implies locked) and at high spl.
1418  * Returns with the thread in TS_RUN state and still locked.



1419  */
1420 void
1421 setfrontdq(kthread_t *tp)
1422 {
1423         setfrontbackdq(tp, B_TRUE);

1424 }
1425 
1426 /*
1427  * Put a high-priority unbound thread on the kp queue
1428  */
1429 static void
1430 setkpdq(kthread_t *tp, int borf)
1431 {
1432         dispq_t *dq;
1433         disp_t  *dp;
1434         cpu_t   *cp;
1435         pri_t   tpri;
1436 
1437         tpri = DISP_PRIO(tp);
1438 
1439         dp = &tp->t_cpupart->cp_kp_queue;
1440         disp_lock_enter_high(&dp->disp_lock);
1441 
1442         TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
1443