538 }
539 BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
540 if (rval == 0) {
541 eqp->eq_rotor = i;
542 eep = &eqp->eq_elems[i];
543 break;
544 }
545 }
546
547 ASSERT(len <= eqp->eq_size);
548 bcopy(data, eep->eqe_data, MIN(eqp->eq_size, len));
549
550 if (len < eqp->eq_size)
551 bzero((caddr_t)eep->eqe_data + len, eqp->eq_size - len);
552
553 for (;;) {
554 old = eqp->eq_pend;
555 eep->eqe_prev = old;
556 membar_producer();
557
558 if (casptr(&eqp->eq_pend, old, eep) == old)
559 break;
560 }
561
562 atomic_add_64(&eqp->eq_kstat.eqk_dispatched.value.ui64, 1);
563
564 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
565 ddi_trigger_softintr(eqp->eq_id);
566 }
567
568 /*
569 * Drain the specified error queue by calling eq_func() for each pending error.
570 * This function must be called at or below LOCK_LEVEL or from panic context.
571 * In order to synchronize with other attempts to drain the queue, we acquire
572 * the adaptive eq_lock, blocking other consumers. Once this lock is held,
573 * we must use compare-and-swap to move the pending list to the processing
574 * list and to return elements to the free pool in order to synchronize
575 * with producers, who do not acquire any locks and only use atomic set/clear.
576 *
577 * An additional constraint on this function is that if the system panics
578 * while this function is running, the panic code must be able to detect and
579 * handle all intermediate states and correctly dequeue all errors. The
580 * errorq_panic() function below will be used for detecting and handling
581 * these intermediate states. The comments in errorq_drain() below explain
582 * how we make sure each intermediate state is distinct and consistent.
583 */
584 void
585 errorq_drain(errorq_t *eqp)
586 {
587 errorq_elem_t *eep, *dep;
588
589 ASSERT(eqp != NULL);
590 mutex_enter(&eqp->eq_lock);
591
592 /*
593 * If there are one or more pending errors, set eq_ptail to point to
594 * the first element on the pending list and then attempt to compare-
595 * and-swap NULL to the pending list. We use membar_producer() to
596 * make sure that eq_ptail will be visible to errorq_panic() below
597 * before the pending list is NULLed out. This section is labeled
598 * case (1) for errorq_panic, below. If eq_ptail is not yet set (1A)
599 * eq_pend has all the pending errors. If casptr fails or has not
600 * been called yet (1B), eq_pend still has all the pending errors.
601 * If casptr succeeds (1C), eq_ptail has all the pending errors.
602 */
603 while ((eep = eqp->eq_pend) != NULL) {
604 eqp->eq_ptail = eep;
605 membar_producer();
606
607 if (casptr(&eqp->eq_pend, eep, NULL) == eep)
608 break;
609 }
610
611 /*
612 * If no errors were pending, assert that eq_ptail is set to NULL,
613 * drop the consumer lock, and return without doing anything.
614 */
615 if (eep == NULL) {
616 ASSERT(eqp->eq_ptail == NULL);
617 mutex_exit(&eqp->eq_lock);
618 return;
619 }
620
621 /*
622 * Now iterate from eq_ptail (a.k.a. eep, the newest error) to the
623 * oldest error, setting the eqe_next pointer so that we can iterate
624 * over the errors from oldest to newest. We use membar_producer()
625 * to make sure that these stores are visible before we set eq_phead.
626 * If we panic before, during, or just after this loop (case 2),
627 * errorq_panic() will simply redo this work, as described below.
733 * This function is designed to be called from panic context only, and
734 * therefore does not need to acquire errorq_lock when iterating over
735 * errorq_list. This function must be called no more than once for each
736 * 'what' value (if you change this then review the manipulation of 'dep'.
737 */
738 static uint64_t
739 errorq_panic_drain(uint_t what)
740 {
741 errorq_elem_t *eep, *nep, *dep;
742 errorq_t *eqp;
743 uint64_t loggedtmp;
744 uint64_t logged = 0;
745
746 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
747 if ((eqp->eq_flags & (ERRORQ_VITAL | ERRORQ_NVLIST)) != what)
748 continue; /* do not drain this queue on this pass */
749
750 loggedtmp = eqp->eq_kstat.eqk_logged.value.ui64;
751
752 /*
753 * In case (1B) above, eq_ptail may be set but the casptr may
754 * not have been executed yet or may have failed. Either way,
755 * we must log errors in chronological order. So we search
756 * the pending list for the error pointed to by eq_ptail. If
757 * it is found, we know that all subsequent errors are also
758 * still on the pending list, so just NULL out eq_ptail and let
759 * errorq_drain(), below, take care of the logging.
760 */
761 for (eep = eqp->eq_pend; eep != NULL; eep = eep->eqe_prev) {
762 if (eep == eqp->eq_ptail) {
763 ASSERT(eqp->eq_phead == NULL);
764 eqp->eq_ptail = NULL;
765 break;
766 }
767 }
768
769 /*
770 * In cases (1C) and (2) above, eq_ptail will be set to the
771 * newest error on the processing list but eq_phead will still
772 * be NULL. We set the eqe_next pointers so we can iterate
773 * over the processing list in order from oldest error to the
774 * newest error. We then set eq_phead to point to the oldest
775 * error and fall into the for-loop below.
776 */
777 if (eqp->eq_phead == NULL && (eep = eqp->eq_ptail) != NULL) {
778 for (eep->eqe_next = NULL; eep->eqe_prev != NULL;
779 eep = eep->eqe_prev)
780 eep->eqe_prev->eqe_next = eep;
781
782 eqp->eq_phead = eep;
783 eqp->eq_ptail = NULL;
784 }
785
786 /*
787 * In cases (3) and (4) above (or after case (1C/2) handling),
788 * eq_phead will be set to the oldest error on the processing
789 * list. We log each error and return it to the free pool.
790 *
791 * Unlike errorq_drain(), we don't need to worry about updating
792 * eq_phead because errorq_panic() will be called at most once.
793 * However, we must use casptr to update the freelist in case
794 * errors are still being enqueued during panic.
795 */
796 for (eep = eqp->eq_phead; eep != NULL; eep = nep) {
797 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep);
798 eqp->eq_kstat.eqk_logged.value.ui64++;
799
800 nep = eep->eqe_next;
801 eep->eqe_next = NULL;
802
803 /*
804 * On panic, we add the element to the dump list for
805 * each nvlist errorq, stored oldest to newest. Then
806 * continue, so we don't free and subsequently overwrite
807 * any elements which we've put on the dump queue.
808 */
809 if (eqp->eq_flags & ERRORQ_NVLIST) {
810 if (eqp->eq_dump == NULL)
811 dep = eqp->eq_dump = eep;
812 else
813 dep = dep->eqe_dump = eep;
814 membar_producer();
897 /*
898 * Commit an errorq element (eqep) for dispatching.
899 * This function may be called from any context subject
900 * to the Platform Considerations described above.
901 */
902 void
903 errorq_commit(errorq_t *eqp, errorq_elem_t *eqep, uint_t flag)
904 {
905 errorq_elem_t *old;
906
907 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
908 atomic_add_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64, 1);
909 return;
910 }
911
912 for (;;) {
913 old = eqp->eq_pend;
914 eqep->eqe_prev = old;
915 membar_producer();
916
917 if (casptr(&eqp->eq_pend, old, eqep) == old)
918 break;
919 }
920
921 atomic_add_64(&eqp->eq_kstat.eqk_committed.value.ui64, 1);
922
923 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
924 ddi_trigger_softintr(eqp->eq_id);
925 }
926
927 /*
928 * Cancel an errorq element reservation by returning the specified element
929 * to the free pool. Duplicate or invalid frees are not supported.
930 */
931 void
932 errorq_cancel(errorq_t *eqp, errorq_elem_t *eqep)
933 {
934 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE))
935 return;
936
937 BT_ATOMIC_CLEAR(eqp->eq_bitmap, eqep - eqp->eq_elems);
|
538 }
539 BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
540 if (rval == 0) {
541 eqp->eq_rotor = i;
542 eep = &eqp->eq_elems[i];
543 break;
544 }
545 }
546
547 ASSERT(len <= eqp->eq_size);
548 bcopy(data, eep->eqe_data, MIN(eqp->eq_size, len));
549
550 if (len < eqp->eq_size)
551 bzero((caddr_t)eep->eqe_data + len, eqp->eq_size - len);
552
553 for (;;) {
554 old = eqp->eq_pend;
555 eep->eqe_prev = old;
556 membar_producer();
557
558 if (atomic_cas_ptr(&eqp->eq_pend, old, eep) == old)
559 break;
560 }
561
562 atomic_add_64(&eqp->eq_kstat.eqk_dispatched.value.ui64, 1);
563
564 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
565 ddi_trigger_softintr(eqp->eq_id);
566 }
567
568 /*
569 * Drain the specified error queue by calling eq_func() for each pending error.
570 * This function must be called at or below LOCK_LEVEL or from panic context.
571 * In order to synchronize with other attempts to drain the queue, we acquire
572 * the adaptive eq_lock, blocking other consumers. Once this lock is held,
573 * we must use compare-and-swap to move the pending list to the processing
574 * list and to return elements to the free pool in order to synchronize
575 * with producers, who do not acquire any locks and only use atomic set/clear.
576 *
577 * An additional constraint on this function is that if the system panics
578 * while this function is running, the panic code must be able to detect and
579 * handle all intermediate states and correctly dequeue all errors. The
580 * errorq_panic() function below will be used for detecting and handling
581 * these intermediate states. The comments in errorq_drain() below explain
582 * how we make sure each intermediate state is distinct and consistent.
583 */
584 void
585 errorq_drain(errorq_t *eqp)
586 {
587 errorq_elem_t *eep, *dep;
588
589 ASSERT(eqp != NULL);
590 mutex_enter(&eqp->eq_lock);
591
592 /*
593 * If there are one or more pending errors, set eq_ptail to point to
594 * the first element on the pending list and then attempt to compare-
595 * and-swap NULL to the pending list. We use membar_producer() to
596 * make sure that eq_ptail will be visible to errorq_panic() below
597 * before the pending list is NULLed out. This section is labeled
598 * case (1) for errorq_panic, below. If eq_ptail is not yet set (1A)
599 * eq_pend has all the pending errors. If atomic_cas_ptr fails or
600 * has not been called yet (1B), eq_pend still has all the pending
601 * errors. If atomic_cas_ptr succeeds (1C), eq_ptail has all the
602 * pending errors.
603 */
604 while ((eep = eqp->eq_pend) != NULL) {
605 eqp->eq_ptail = eep;
606 membar_producer();
607
608 if (atomic_cas_ptr(&eqp->eq_pend, eep, NULL) == eep)
609 break;
610 }
611
612 /*
613 * If no errors were pending, assert that eq_ptail is set to NULL,
614 * drop the consumer lock, and return without doing anything.
615 */
616 if (eep == NULL) {
617 ASSERT(eqp->eq_ptail == NULL);
618 mutex_exit(&eqp->eq_lock);
619 return;
620 }
621
622 /*
623 * Now iterate from eq_ptail (a.k.a. eep, the newest error) to the
624 * oldest error, setting the eqe_next pointer so that we can iterate
625 * over the errors from oldest to newest. We use membar_producer()
626 * to make sure that these stores are visible before we set eq_phead.
627 * If we panic before, during, or just after this loop (case 2),
628 * errorq_panic() will simply redo this work, as described below.
734 * This function is designed to be called from panic context only, and
735 * therefore does not need to acquire errorq_lock when iterating over
736 * errorq_list. This function must be called no more than once for each
737 * 'what' value (if you change this then review the manipulation of 'dep'.
738 */
739 static uint64_t
740 errorq_panic_drain(uint_t what)
741 {
742 errorq_elem_t *eep, *nep, *dep;
743 errorq_t *eqp;
744 uint64_t loggedtmp;
745 uint64_t logged = 0;
746
747 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
748 if ((eqp->eq_flags & (ERRORQ_VITAL | ERRORQ_NVLIST)) != what)
749 continue; /* do not drain this queue on this pass */
750
751 loggedtmp = eqp->eq_kstat.eqk_logged.value.ui64;
752
753 /*
754 * In case (1B) above, eq_ptail may be set but the
755 * atomic_cas_ptr may not have been executed yet or may have
756 * failed. Either way, we must log errors in chronological
757 * order. So we search the pending list for the error
758 * pointed to by eq_ptail. If it is found, we know that all
759 * subsequent errors are also still on the pending list, so
760 * just NULL out eq_ptail and let errorq_drain(), below,
761 * take care of the logging.
762 */
763 for (eep = eqp->eq_pend; eep != NULL; eep = eep->eqe_prev) {
764 if (eep == eqp->eq_ptail) {
765 ASSERT(eqp->eq_phead == NULL);
766 eqp->eq_ptail = NULL;
767 break;
768 }
769 }
770
771 /*
772 * In cases (1C) and (2) above, eq_ptail will be set to the
773 * newest error on the processing list but eq_phead will still
774 * be NULL. We set the eqe_next pointers so we can iterate
775 * over the processing list in order from oldest error to the
776 * newest error. We then set eq_phead to point to the oldest
777 * error and fall into the for-loop below.
778 */
779 if (eqp->eq_phead == NULL && (eep = eqp->eq_ptail) != NULL) {
780 for (eep->eqe_next = NULL; eep->eqe_prev != NULL;
781 eep = eep->eqe_prev)
782 eep->eqe_prev->eqe_next = eep;
783
784 eqp->eq_phead = eep;
785 eqp->eq_ptail = NULL;
786 }
787
788 /*
789 * In cases (3) and (4) above (or after case (1C/2) handling),
790 * eq_phead will be set to the oldest error on the processing
791 * list. We log each error and return it to the free pool.
792 *
793 * Unlike errorq_drain(), we don't need to worry about updating
794 * eq_phead because errorq_panic() will be called at most once.
795 * However, we must use atomic_cas_ptr to update the
796 * freelist in case errors are still being enqueued during
797 * panic.
798 */
799 for (eep = eqp->eq_phead; eep != NULL; eep = nep) {
800 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep);
801 eqp->eq_kstat.eqk_logged.value.ui64++;
802
803 nep = eep->eqe_next;
804 eep->eqe_next = NULL;
805
806 /*
807 * On panic, we add the element to the dump list for
808 * each nvlist errorq, stored oldest to newest. Then
809 * continue, so we don't free and subsequently overwrite
810 * any elements which we've put on the dump queue.
811 */
812 if (eqp->eq_flags & ERRORQ_NVLIST) {
813 if (eqp->eq_dump == NULL)
814 dep = eqp->eq_dump = eep;
815 else
816 dep = dep->eqe_dump = eep;
817 membar_producer();
900 /*
901 * Commit an errorq element (eqep) for dispatching.
902 * This function may be called from any context subject
903 * to the Platform Considerations described above.
904 */
905 void
906 errorq_commit(errorq_t *eqp, errorq_elem_t *eqep, uint_t flag)
907 {
908 errorq_elem_t *old;
909
910 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
911 atomic_add_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64, 1);
912 return;
913 }
914
915 for (;;) {
916 old = eqp->eq_pend;
917 eqep->eqe_prev = old;
918 membar_producer();
919
920 if (atomic_cas_ptr(&eqp->eq_pend, old, eqep) == old)
921 break;
922 }
923
924 atomic_add_64(&eqp->eq_kstat.eqk_committed.value.ui64, 1);
925
926 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
927 ddi_trigger_softintr(eqp->eq_id);
928 }
929
930 /*
931 * Cancel an errorq element reservation by returning the specified element
932 * to the free pool. Duplicate or invalid frees are not supported.
933 */
934 void
935 errorq_cancel(errorq_t *eqp, errorq_elem_t *eqep)
936 {
937 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE))
938 return;
939
940 BT_ATOMIC_CLEAR(eqp->eq_bitmap, eqep - eqp->eq_elems);
|