323 ASSERT(MUTEX_HELD(&cpu_lock));
324
325 disp_mem = kmem_zalloc(NCPU *
326 sizeof (struct disp_queue_info), KM_SLEEP);
327
328 /*
329 * This routine must allocate all of the memory before stopping
330 * the cpus because it must not sleep in kmem_alloc while the
331 * CPUs are stopped. Locks they hold will not be freed until they
332 * are restarted.
333 */
334 i = 0;
335 cpup = cpu_list;
336 do {
337 disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp);
338 i++;
339 cpup = cpup->cpu_next;
340 } while (cpup != cpu_list);
341 num = i;
342
343 pause_cpus(NULL);
344 for (i = 0; i < num; i++)
345 disp_dq_assign(&disp_mem[i], numpris);
346 start_cpus();
347
348 /*
349 * I must free all of the memory after starting the cpus because
350 * I can not risk sleeping in kmem_free while the cpus are stopped.
351 */
352 for (i = 0; i < num; i++)
353 disp_dq_free(&disp_mem[i]);
354
355 kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info));
356 }
357
358 static void
359 disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp)
360 {
361 dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP);
362 dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) *
363 sizeof (long), KM_SLEEP);
1152 * setbackdq() keeps runqs balanced such that the difference in length
1153 * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF.
1154 * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths
1155 * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will
1156 * try to keep runqs perfectly balanced regardless of the thread priority.
1157 */
1158 #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */
1159 #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */
1160 #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt)
1161
1162 /*
1163 * Macro that evaluates to true if it is likely that the thread has cache
1164 * warmth. This is based on the amount of time that has elapsed since the
1165 * thread last ran. If that amount of time is less than "rechoose_interval"
1166 * ticks, then we decide that the thread has enough cache warmth to warrant
1167 * some affinity for t->t_cpu.
1168 */
1169 #define THREAD_HAS_CACHE_WARMTH(thread) \
1170 ((thread == curthread) || \
1171 ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval))
1172 /*
1173 * Put the specified thread on the back of the dispatcher
1174 * queue corresponding to its current priority.
1175 *
1176 * Called with the thread in transition, onproc or stopped state
1177 * and locked (transition implies locked) and at high spl.
1178 * Returns with the thread in TS_RUN state and still locked.
1179 */
1180 void
1181 setbackdq(kthread_t *tp)
1182 {
1183 dispq_t *dq;
1184 disp_t *dp;
1185 cpu_t *cp;
1186 pri_t tpri;
1187 int bound;
1188 boolean_t self;
1189
1190 ASSERT(THREAD_LOCK_HELD(tp));
1191 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1192 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */
1193
1194 /*
1195 * If thread is "swapped" or on the swap queue don't
1196 * queue it, but wake sched.
1197 */
1198 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1199 disp_swapped_setrun(tp);
1200 return;
1201 }
1202
1203 self = (tp == curthread);
1204
1205 if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1206 bound = 1;
1207 else
1208 bound = 0;
1209
1210 tpri = DISP_PRIO(tp);
1211 if (ncpus == 1)
1212 cp = tp->t_cpu;
1213 else if (!bound) {
1214 if (tpri >= kpqpri) {
1215 setkpdq(tp, SETKP_BACK);
1216 return;
1217 }
1218
1219 /*
1220 * We'll generally let this thread continue to run where
1221 * it last ran...but will consider migration if:
1222 * - We thread probably doesn't have much cache warmth.
1223 * - The CPU where it last ran is the target of an offline
1224 * request.
1225 * - The thread last ran outside it's home lgroup.
1226 */
1227 if ((!THREAD_HAS_CACHE_WARMTH(tp)) ||
1228 (tp->t_cpu == cpu_inmotion)) {
1229 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL);
1230 } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
1231 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1232 self ? tp->t_cpu : NULL);
1233 } else {
1234 cp = tp->t_cpu;
1235 }
1236
1237 if (tp->t_cpupart == cp->cpu_part) {
1238 int qlen;
1239
1240 /*
1241 * Perform any CMT load balancing
1242 */
1243 cp = cmt_balance(tp, cp);
1244
1245 /*
1246 * Balance across the run queues
1247 */
1248 qlen = RUNQ_LEN(cp, tpri);
1249 if (tpri >= RUNQ_MATCH_PRI &&
1250 !(tp->t_schedflag & TS_RUNQMATCH))
1251 qlen -= RUNQ_MAX_DIFF;
1252 if (qlen > 0) {
1253 cpu_t *newcp;
1254
1255 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) {
1256 newcp = cp->cpu_next_part;
1257 } else if ((newcp = cp->cpu_next_lpl) == cp) {
1258 newcp = cp->cpu_next_part;
1259 }
1260
1261 if (RUNQ_LEN(newcp, tpri) < qlen) {
1262 DTRACE_PROBE3(runq__balance,
1263 kthread_t *, tp,
1264 cpu_t *, cp, cpu_t *, newcp);
1265 cp = newcp;
1266 }
1267 }
1268 } else {
1269 /*
1270 * Migrate to a cpu in the new partition.
1271 */
1272 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1273 tp->t_lpl, tp->t_pri, NULL);
1274 }
1275 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1276 } else {
1277 /*
1278 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1279 * a short time until weak binding that existed when the
1280 * strong binding was established has dropped) so we must
1281 * favour weak binding over strong.
1282 */
1283 cp = tp->t_weakbound_cpu ?
1284 tp->t_weakbound_cpu : tp->t_bound_cpu;
1285 }
1286 /*
1287 * A thread that is ONPROC may be temporarily placed on the run queue
1288 * but then chosen to run again by disp. If the thread we're placing on
1289 * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1290 * replacement process is actually scheduled in swtch(). In this
1291 * situation, curthread is the only thread that could be in the ONPROC
1292 * state.
1293 */
1294 if ((!self) && (tp->t_waitrq == 0)) {
1295 hrtime_t curtime;
1296
1297 curtime = gethrtime_unscaled();
1298 (void) cpu_update_pct(tp, curtime);
1299 tp->t_waitrq = curtime;
1300 } else {
1301 (void) cpu_update_pct(tp, gethrtime_unscaled());
1302 }
1303
1304 dp = cp->cpu_disp;
1305 disp_lock_enter_high(&dp->disp_lock);
1306
1307 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0);
1308 TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p",
1309 tpri, cp, tp);
1310
1311 #ifndef NPROBE
1312 /* Kernel probe */
1313 if (tnf_tracing_active)
1314 tnf_thread_queue(tp, cp, tpri);
1315 #endif /* NPROBE */
1316
1317 ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1318
1319 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */
1320 tp->t_disp_queue = dp;
1321 tp->t_link = NULL;
1322
1323 dq = &dp->disp_q[tpri];
1324 dp->disp_nrunnable++;
1325 if (!bound)
1326 dp->disp_steal = 0;
1327 membar_enter();
1328
1329 if (dq->dq_sruncnt++ != 0) {
1330 ASSERT(dq->dq_first != NULL);
1331 dq->dq_last->t_link = tp;
1332 dq->dq_last = tp;
1333 } else {
1334 ASSERT(dq->dq_first == NULL);
1335 ASSERT(dq->dq_last == NULL);
1336 dq->dq_first = dq->dq_last = tp;
1337 BT_SET(dp->disp_qactmap, tpri);
1338 if (tpri > dp->disp_maxrunpri) {
1339 dp->disp_maxrunpri = tpri;
1340 membar_enter();
1341 cpu_resched(cp, tpri);
1342 }
1343 }
1344
1345 if (!bound && tpri > dp->disp_max_unbound_pri) {
1346 if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) {
1347 /*
1348 * If there are no other unbound threads on the
1349 * run queue, don't allow other CPUs to steal
1350 * this thread while we are in the middle of a
1351 * context switch. We may just switch to it
1352 * again right away. CPU_DISP_DONTSTEAL is cleared
1353 * in swtch and swtch_to.
1354 */
1355 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1356 }
1357 dp->disp_max_unbound_pri = tpri;
1358 }
1359 (*disp_enq_thread)(cp, bound);
1360 }
1361
1362 /*
1363 * Put the specified thread on the front of the dispatcher
1364 * queue corresponding to its current priority.
1365 *
1366 * Called with the thread in transition, onproc or stopped state
1367 * and locked (transition implies locked) and at high spl.
1368 * Returns with the thread in TS_RUN state and still locked.
1369 */
1370 void
1371 setfrontdq(kthread_t *tp)
1372 {
1373 disp_t *dp;
1374 dispq_t *dq;
1375 cpu_t *cp;
1376 pri_t tpri;
1377 int bound;
1378
1379 ASSERT(THREAD_LOCK_HELD(tp));
1380 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1381 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */
1382
1383 /*
1384 * If thread is "swapped" or on the swap queue don't
1385 * queue it, but wake sched.
1386 */
1387 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1388 disp_swapped_setrun(tp);
1389 return;
1390 }
1391
1392 if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1393 bound = 1;
1394 else
1395 bound = 0;
1396
1397 tpri = DISP_PRIO(tp);
1398 if (ncpus == 1)
1399 cp = tp->t_cpu;
1400 else if (!bound) {
1401 if (tpri >= kpqpri) {
1402 setkpdq(tp, SETKP_FRONT);
1403 return;
1404 }
1405 cp = tp->t_cpu;
1406 if (tp->t_cpupart == cp->cpu_part) {
1407 /*
1408 * We'll generally let this thread continue to run
1409 * where it last ran, but will consider migration if:
1410 * - The thread last ran outside it's home lgroup.
1411 * - The CPU where it last ran is the target of an
1412 * offline request (a thread_nomigrate() on the in
1413 * motion CPU relies on this when forcing a preempt).
1414 * - The thread isn't the highest priority thread where
1415 * it last ran, and it is considered not likely to
1416 * have significant cache warmth.
1417 */
1418 if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) ||
1419 (cp == cpu_inmotion)) {
1420 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1421 (tp == curthread) ? cp : NULL);
1422 } else if ((tpri < cp->cpu_disp->disp_maxrunpri) &&
1423 (!THREAD_HAS_CACHE_WARMTH(tp))) {
1424 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1425 NULL);
1426 }
1427 } else {
1428 /*
1429 * Migrate to a cpu in the new partition.
1430 */
1431 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1432 tp->t_lpl, tp->t_pri, NULL);
1433 }
1434 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1435 } else {
1436 /*
1437 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1438 * a short time until weak binding that existed when the
1439 * strong binding was established has dropped) so we must
1440 * favour weak binding over strong.
1441 */
1442 cp = tp->t_weakbound_cpu ?
1443 tp->t_weakbound_cpu : tp->t_bound_cpu;
1444 }
1445
1446 /*
1447 * A thread that is ONPROC may be temporarily placed on the run queue
1448 * but then chosen to run again by disp. If the thread we're placing on
1449 * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1450 * replacement process is actually scheduled in swtch(). In this
1451 * situation, curthread is the only thread that could be in the ONPROC
1452 * state.
1453 */
1454 if ((tp != curthread) && (tp->t_waitrq == 0)) {
1455 hrtime_t curtime;
1456
1457 curtime = gethrtime_unscaled();
1458 (void) cpu_update_pct(tp, curtime);
1459 tp->t_waitrq = curtime;
1460 } else {
1461 (void) cpu_update_pct(tp, gethrtime_unscaled());
1462 }
1463
1464 dp = cp->cpu_disp;
1465 disp_lock_enter_high(&dp->disp_lock);
1466
1467 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
1468 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1);
1469
1470 #ifndef NPROBE
1471 /* Kernel probe */
1472 if (tnf_tracing_active)
1473 tnf_thread_queue(tp, cp, tpri);
1474 #endif /* NPROBE */
1475
1476 ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1477
1478 THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */
1479 tp->t_disp_queue = dp;
1480
1481 dq = &dp->disp_q[tpri];
1482 dp->disp_nrunnable++;
1483 if (!bound)
1484 dp->disp_steal = 0;
1485 membar_enter();
1486
1487 if (dq->dq_sruncnt++ != 0) {
1488 ASSERT(dq->dq_last != NULL);
1489 tp->t_link = dq->dq_first;
1490 dq->dq_first = tp;
1491 } else {
1492 ASSERT(dq->dq_last == NULL);
1493 ASSERT(dq->dq_first == NULL);
1494 tp->t_link = NULL;
1495 dq->dq_first = dq->dq_last = tp;
1496 BT_SET(dp->disp_qactmap, tpri);
1497 if (tpri > dp->disp_maxrunpri) {
1498 dp->disp_maxrunpri = tpri;
1499 membar_enter();
1500 cpu_resched(cp, tpri);
1501 }
1502 }
1503
1504 if (!bound && tpri > dp->disp_max_unbound_pri) {
1505 if (tp == curthread && dp->disp_max_unbound_pri == -1 &&
1506 cp == CPU) {
1507 /*
1508 * If there are no other unbound threads on the
1509 * run queue, don't allow other CPUs to steal
1510 * this thread while we are in the middle of a
1511 * context switch. We may just switch to it
1512 * again right away. CPU_DISP_DONTSTEAL is cleared
1513 * in swtch and swtch_to.
1514 */
1515 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1516 }
1517 dp->disp_max_unbound_pri = tpri;
1518 }
1519 (*disp_enq_thread)(cp, bound);
1520 }
1521
1522 /*
1523 * Put a high-priority unbound thread on the kp queue
1524 */
1525 static void
1526 setkpdq(kthread_t *tp, int borf)
1527 {
1528 dispq_t *dq;
1529 disp_t *dp;
1530 cpu_t *cp;
1531 pri_t tpri;
1532
1533 tpri = DISP_PRIO(tp);
1534
1535 dp = &tp->t_cpupart->cp_kp_queue;
1536 disp_lock_enter_high(&dp->disp_lock);
1537
1538 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
1539
|
323 ASSERT(MUTEX_HELD(&cpu_lock));
324
325 disp_mem = kmem_zalloc(NCPU *
326 sizeof (struct disp_queue_info), KM_SLEEP);
327
328 /*
329 * This routine must allocate all of the memory before stopping
330 * the cpus because it must not sleep in kmem_alloc while the
331 * CPUs are stopped. Locks they hold will not be freed until they
332 * are restarted.
333 */
334 i = 0;
335 cpup = cpu_list;
336 do {
337 disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp);
338 i++;
339 cpup = cpup->cpu_next;
340 } while (cpup != cpu_list);
341 num = i;
342
343 pause_cpus(NULL, NULL);
344 for (i = 0; i < num; i++)
345 disp_dq_assign(&disp_mem[i], numpris);
346 start_cpus();
347
348 /*
349 * I must free all of the memory after starting the cpus because
350 * I can not risk sleeping in kmem_free while the cpus are stopped.
351 */
352 for (i = 0; i < num; i++)
353 disp_dq_free(&disp_mem[i]);
354
355 kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info));
356 }
357
358 static void
359 disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp)
360 {
361 dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP);
362 dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) *
363 sizeof (long), KM_SLEEP);
1152 * setbackdq() keeps runqs balanced such that the difference in length
1153 * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF.
1154 * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths
1155 * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will
1156 * try to keep runqs perfectly balanced regardless of the thread priority.
1157 */
1158 #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */
1159 #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */
1160 #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt)
1161
1162 /*
1163 * Macro that evaluates to true if it is likely that the thread has cache
1164 * warmth. This is based on the amount of time that has elapsed since the
1165 * thread last ran. If that amount of time is less than "rechoose_interval"
1166 * ticks, then we decide that the thread has enough cache warmth to warrant
1167 * some affinity for t->t_cpu.
1168 */
1169 #define THREAD_HAS_CACHE_WARMTH(thread) \
1170 ((thread == curthread) || \
1171 ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval))
1172
1173 /*
1174 * Put the specified thread on the front/back of the dispatcher queue
1175 * corresponding to its current priority.
1176 *
1177 * Called with the thread in transition, onproc or stopped state and locked
1178 * (transition implies locked) and at high spl. Returns with the thread in
1179 * TS_RUN state and still locked.
1180 */
1181 static void
1182 setfrontbackdq(kthread_t *tp, boolean_t front)
1183 {
1184 dispq_t *dq;
1185 disp_t *dp;
1186 cpu_t *cp;
1187 pri_t tpri;
1188 boolean_t bound;
1189 boolean_t self;
1190
1191 ASSERT(THREAD_LOCK_HELD(tp));
1192 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1193 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */
1194
1195 /*
1196 * If thread is "swapped" or on the swap queue don't
1197 * queue it, but wake sched.
1198 */
1199 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1200 disp_swapped_setrun(tp);
1201 return;
1202 }
1203
1204 self = (tp == curthread);
1205 bound = (tp->t_bound_cpu || tp->t_weakbound_cpu);
1206
1207 tpri = DISP_PRIO(tp);
1208 if (ncpus == 1)
1209 cp = tp->t_cpu;
1210 else if (!bound) {
1211 if (tpri >= kpqpri) {
1212 setkpdq(tp, front ? SETKP_FRONT : SETKP_BACK);
1213 return;
1214 }
1215
1216 cp = tp->t_cpu;
1217
1218 if (!front) {
1219 /*
1220 * We'll generally let this thread continue to run where
1221 * it last ran...but will consider migration if:
1222 * - We thread probably doesn't have much cache warmth.
1223 * - The CPU where it last ran is the target of an offline
1224 * request.
1225 * - The thread last ran outside it's home lgroup.
1226 */
1227 if ((!THREAD_HAS_CACHE_WARMTH(tp)) || (cp == cpu_inmotion)) {
1228 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL);
1229 } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
1230 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1231 self ? tp->t_cpu : NULL);
1232 }
1233
1234 }
1235
1236 if (tp->t_cpupart == cp->cpu_part) {
1237 if (front) {
1238 /*
1239 * We'll generally let this thread continue to run
1240 * where it last ran, but will consider migration if:
1241 * - The thread last ran outside it's home lgroup.
1242 * - The CPU where it last ran is the target of an
1243 * offline request (a thread_nomigrate() on the in
1244 * motion CPU relies on this when forcing a preempt).
1245 * - The thread isn't the highest priority thread where
1246 * it last ran, and it is considered not likely to
1247 * have significant cache warmth.
1248 */
1249 if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) ||
1250 (cp == cpu_inmotion)) {
1251 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1252 self ? cp : NULL);
1253 } else if ((tpri < cp->cpu_disp->disp_maxrunpri) &&
1254 (!THREAD_HAS_CACHE_WARMTH(tp))) {
1255 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1256 NULL);
1257 }
1258 } else {
1259 int qlen;
1260
1261 /*
1262 * Perform any CMT load balancing
1263 */
1264 cp = cmt_balance(tp, cp);
1265
1266 /*
1267 * Balance across the run queues
1268 */
1269 qlen = RUNQ_LEN(cp, tpri);
1270 if (tpri >= RUNQ_MATCH_PRI &&
1271 !(tp->t_schedflag & TS_RUNQMATCH))
1272 qlen -= RUNQ_MAX_DIFF;
1273 if (qlen > 0) {
1274 cpu_t *newcp;
1275
1276 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) {
1277 newcp = cp->cpu_next_part;
1278 } else if ((newcp = cp->cpu_next_lpl) == cp) {
1279 newcp = cp->cpu_next_part;
1280 }
1281
1282 if (RUNQ_LEN(newcp, tpri) < qlen) {
1283 DTRACE_PROBE3(runq__balance,
1284 kthread_t *, tp,
1285 cpu_t *, cp, cpu_t *, newcp);
1286 cp = newcp;
1287 }
1288 }
1289 }
1290 } else {
1291 /*
1292 * Migrate to a cpu in the new partition.
1293 */
1294 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1295 tp->t_lpl, tp->t_pri, NULL);
1296 }
1297
1298 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1299 } else {
1300 /*
1301 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1302 * a short time until weak binding that existed when the
1303 * strong binding was established has dropped) so we must
1304 * favour weak binding over strong.
1305 */
1306 cp = tp->t_weakbound_cpu ?
1307 tp->t_weakbound_cpu : tp->t_bound_cpu;
1308 }
1309
1310 /*
1311 * A thread that is ONPROC may be temporarily placed on the run queue
1312 * but then chosen to run again by disp. If the thread we're placing on
1313 * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1314 * replacement process is actually scheduled in swtch(). In this
1315 * situation, curthread is the only thread that could be in the ONPROC
1316 * state.
1317 */
1318 if ((!self) && (tp->t_waitrq == 0)) {
1319 hrtime_t curtime;
1320
1321 curtime = gethrtime_unscaled();
1322 (void) cpu_update_pct(tp, curtime);
1323 tp->t_waitrq = curtime;
1324 } else {
1325 (void) cpu_update_pct(tp, gethrtime_unscaled());
1326 }
1327
1328 dp = cp->cpu_disp;
1329 disp_lock_enter_high(&dp->disp_lock);
1330
1331 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, front);
1332 if (front) {
1333 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri,
1334 tp);
1335 } else {
1336 TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p",
1337 tpri, cp, tp);
1338 }
1339
1340 #ifndef NPROBE
1341 /* Kernel probe */
1342 if (tnf_tracing_active)
1343 tnf_thread_queue(tp, cp, tpri);
1344 #endif /* NPROBE */
1345
1346 ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1347
1348 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */
1349 tp->t_disp_queue = dp;
1350 tp->t_link = NULL;
1351
1352 dq = &dp->disp_q[tpri];
1353 dp->disp_nrunnable++;
1354 if (!bound)
1355 dp->disp_steal = 0;
1356 membar_enter();
1357
1358 if (dq->dq_sruncnt++ != 0) {
1359 if (front) {
1360 ASSERT(dq->dq_last != NULL);
1361 tp->t_link = dq->dq_first;
1362 dq->dq_first = tp;
1363 } else {
1364 ASSERT(dq->dq_first != NULL);
1365 dq->dq_last->t_link = tp;
1366 dq->dq_last = tp;
1367 }
1368 } else {
1369 ASSERT(dq->dq_first == NULL);
1370 ASSERT(dq->dq_last == NULL);
1371 dq->dq_first = dq->dq_last = tp;
1372 BT_SET(dp->disp_qactmap, tpri);
1373 if (tpri > dp->disp_maxrunpri) {
1374 dp->disp_maxrunpri = tpri;
1375 membar_enter();
1376 cpu_resched(cp, tpri);
1377 }
1378 }
1379
1380 if (!bound && tpri > dp->disp_max_unbound_pri) {
1381 if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) {
1382 /*
1383 * If there are no other unbound threads on the
1384 * run queue, don't allow other CPUs to steal
1385 * this thread while we are in the middle of a
1386 * context switch. We may just switch to it
1387 * again right away. CPU_DISP_DONTSTEAL is cleared
1388 * in swtch and swtch_to.
1389 */
1390 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1391 }
1392 dp->disp_max_unbound_pri = tpri;
1393 }
1394
1395 (*disp_enq_thread)(cp, bound);
1396 }
1397
1398 /*
1399 * Put the specified thread on the back of the dispatcher
1400 * queue corresponding to its current priority.
1401 *
1402 * Called with the thread in transition, onproc or stopped state
1403 * and locked (transition implies locked) and at high spl.
1404 * Returns with the thread in TS_RUN state and still locked.
1405 */
1406 void
1407 setbackdq(kthread_t *tp)
1408 {
1409 setfrontbackdq(tp, B_FALSE);
1410 }
1411
1412 /*
1413 * Put the specified thread on the front of the dispatcher
1414 * queue corresponding to its current priority.
1415 *
1416 * Called with the thread in transition, onproc or stopped state
1417 * and locked (transition implies locked) and at high spl.
1418 * Returns with the thread in TS_RUN state and still locked.
1419 */
1420 void
1421 setfrontdq(kthread_t *tp)
1422 {
1423 setfrontbackdq(tp, B_TRUE);
1424 }
1425
1426 /*
1427 * Put a high-priority unbound thread on the kp queue
1428 */
1429 static void
1430 setkpdq(kthread_t *tp, int borf)
1431 {
1432 dispq_t *dq;
1433 disp_t *dp;
1434 cpu_t *cp;
1435 pri_t tpri;
1436
1437 tpri = DISP_PRIO(tp);
1438
1439 dp = &tp->t_cpupart->cp_kp_queue;
1440 disp_lock_enter_high(&dp->disp_lock);
1441
1442 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
1443
|