mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 13:11:14 +07:00
Merge branches 'urgent.2012.10.27a', 'doc.2012.11.16a', 'fixes.2012.11.13a', 'srcu.2012.10.27a', 'stall.2012.11.13a', 'tracing.2012.11.08a' and 'idle.2012.10.24a' into HEAD
urgent.2012.10.27a: Fix for RCU user-mode transition (already in -tip). doc.2012.11.08a: Documentation updates, most notably codifying the memory-barrier guarantees inherent to grace periods. fixes.2012.11.13a: Miscellaneous fixes. srcu.2012.10.27a: Allow statically allocated and initialized srcu_struct structures (courtesy of Lai Jiangshan). stall.2012.11.13a: Add more diagnostic information to RCU CPU stall warnings, also decrease from 60 seconds to 21 seconds. hotplug.2012.11.08a: Minor updates to CPU hotplug handling. tracing.2012.11.08a: Improved debugfs tracing, courtesy of Michael Wang. idle.2012.10.24a: Updates to RCU idle/adaptive-idle handling, including a boot parameter that maps normal grace periods to expedited. Resolved conflict in kernel/rcutree.c due to side-by-side change.
This commit is contained in:
commit
aac1cda34b
@ -186,7 +186,7 @@ Bibtex Entries
|
||||
|
||||
@article{Kung80
|
||||
,author="H. T. Kung and Q. Lehman"
|
||||
,title="Concurrent Maintenance of Binary Search Trees"
|
||||
,title="Concurrent Manipulation of Binary Search Trees"
|
||||
,Year="1980"
|
||||
,Month="September"
|
||||
,journal="ACM Transactions on Database Systems"
|
||||
|
@ -271,15 +271,14 @@ over a rather long period of time, but improvements are always welcome!
|
||||
The same cautions apply to call_rcu_bh() and call_rcu_sched().
|
||||
|
||||
9. All RCU list-traversal primitives, which include
|
||||
rcu_dereference(), list_for_each_entry_rcu(),
|
||||
list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
|
||||
must be either within an RCU read-side critical section or
|
||||
must be protected by appropriate update-side locks. RCU
|
||||
read-side critical sections are delimited by rcu_read_lock()
|
||||
and rcu_read_unlock(), or by similar primitives such as
|
||||
rcu_read_lock_bh() and rcu_read_unlock_bh(), in which case
|
||||
the matching rcu_dereference() primitive must be used in order
|
||||
to keep lockdep happy, in this case, rcu_dereference_bh().
|
||||
rcu_dereference(), list_for_each_entry_rcu(), and
|
||||
list_for_each_safe_rcu(), must be either within an RCU read-side
|
||||
critical section or must be protected by appropriate update-side
|
||||
locks. RCU read-side critical sections are delimited by
|
||||
rcu_read_lock() and rcu_read_unlock(), or by similar primitives
|
||||
such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which
|
||||
case the matching rcu_dereference() primitive must be used in
|
||||
order to keep lockdep happy, in this case, rcu_dereference_bh().
|
||||
|
||||
The reason that it is permissible to use RCU list-traversal
|
||||
primitives when the update-side lock is held is that doing so
|
||||
|
@ -205,7 +205,7 @@ RCU ("read-copy update") its name. The RCU code is as follows:
|
||||
audit_copy_rule(&ne->rule, &e->rule);
|
||||
ne->rule.action = newaction;
|
||||
ne->rule.file_count = newfield_count;
|
||||
list_replace_rcu(e, ne);
|
||||
list_replace_rcu(&e->list, &ne->list);
|
||||
call_rcu(&e->rcu, audit_free_rule);
|
||||
return 0;
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ release_referenced() delete()
|
||||
{ {
|
||||
... write_lock(&list_lock);
|
||||
atomic_dec(&el->rc, relfunc) ...
|
||||
... delete_element
|
||||
... remove_element
|
||||
} write_unlock(&list_lock);
|
||||
...
|
||||
if (atomic_dec_and_test(&el->rc))
|
||||
@ -52,7 +52,7 @@ release_referenced() delete()
|
||||
{ {
|
||||
... spin_lock(&list_lock);
|
||||
if (atomic_dec_and_test(&el->rc)) ...
|
||||
call_rcu(&el->head, el_free); delete_element
|
||||
call_rcu(&el->head, el_free); remove_element
|
||||
... spin_unlock(&list_lock);
|
||||
} ...
|
||||
if (atomic_dec_and_test(&el->rc))
|
||||
@ -64,3 +64,60 @@ Sometimes, a reference to the element needs to be obtained in the
|
||||
update (write) stream. In such cases, atomic_inc_not_zero() might be
|
||||
overkill, since we hold the update-side spinlock. One might instead
|
||||
use atomic_inc() in such cases.
|
||||
|
||||
It is not always convenient to deal with "FAIL" in the
|
||||
search_and_reference() code path. In such cases, the
|
||||
atomic_dec_and_test() may be moved from delete() to el_free()
|
||||
as follows:
|
||||
|
||||
1. 2.
|
||||
add() search_and_reference()
|
||||
{ {
|
||||
alloc_object rcu_read_lock();
|
||||
... search_for_element
|
||||
atomic_set(&el->rc, 1); atomic_inc(&el->rc);
|
||||
spin_lock(&list_lock); ...
|
||||
|
||||
add_element rcu_read_unlock();
|
||||
... }
|
||||
spin_unlock(&list_lock); 4.
|
||||
} delete()
|
||||
3. {
|
||||
release_referenced() spin_lock(&list_lock);
|
||||
{ ...
|
||||
... remove_element
|
||||
if (atomic_dec_and_test(&el->rc)) spin_unlock(&list_lock);
|
||||
kfree(el); ...
|
||||
... call_rcu(&el->head, el_free);
|
||||
} ...
|
||||
5. }
|
||||
void el_free(struct rcu_head *rhp)
|
||||
{
|
||||
release_referenced();
|
||||
}
|
||||
|
||||
The key point is that the initial reference added by add() is not removed
|
||||
until after a grace period has elapsed following removal. This means that
|
||||
search_and_reference() cannot find this element, which means that the value
|
||||
of el->rc cannot increase. Thus, once it reaches zero, there are no
|
||||
readers that can or ever will be able to reference the element. The
|
||||
element can therefore safely be freed. This in turn guarantees that if
|
||||
any reader finds the element, that reader may safely acquire a reference
|
||||
without checking the value of the reference counter.
|
||||
|
||||
In cases where delete() can sleep, synchronize_rcu() can be called from
|
||||
delete(), so that el_free() can be subsumed into delete as follows:
|
||||
|
||||
4.
|
||||
delete()
|
||||
{
|
||||
spin_lock(&list_lock);
|
||||
...
|
||||
remove_element
|
||||
spin_unlock(&list_lock);
|
||||
...
|
||||
synchronize_rcu();
|
||||
if (atomic_dec_and_test(&el->rc))
|
||||
kfree(el);
|
||||
...
|
||||
}
|
||||
|
@ -10,51 +10,63 @@ for rcutree and next for rcutiny.
|
||||
|
||||
CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats
|
||||
|
||||
These implementations of RCU provides several debugfs files under the
|
||||
These implementations of RCU provide several debugfs directories under the
|
||||
top-level directory "rcu":
|
||||
|
||||
rcu/rcudata:
|
||||
rcu/rcu_bh
|
||||
rcu/rcu_preempt
|
||||
rcu/rcu_sched
|
||||
|
||||
Each directory contains files for the corresponding flavor of RCU.
|
||||
Note that rcu/rcu_preempt is only present for CONFIG_TREE_PREEMPT_RCU.
|
||||
For CONFIG_TREE_RCU, the RCU flavor maps onto the RCU-sched flavor,
|
||||
so that activity for both appears in rcu/rcu_sched.
|
||||
|
||||
In addition, the following file appears in the top-level directory:
|
||||
rcu/rcutorture. This file displays rcutorture test progress. The output
|
||||
of "cat rcu/rcutorture" looks as follows:
|
||||
|
||||
rcutorture test sequence: 0 (test in progress)
|
||||
rcutorture update version number: 615
|
||||
|
||||
The first line shows the number of rcutorture tests that have completed
|
||||
since boot. If a test is currently running, the "(test in progress)"
|
||||
string will appear as shown above. The second line shows the number of
|
||||
update cycles that the current test has started, or zero if there is
|
||||
no test in progress.
|
||||
|
||||
|
||||
Within each flavor directory (rcu/rcu_bh, rcu/rcu_sched, and possibly
|
||||
also rcu/rcu_preempt) the following files will be present:
|
||||
|
||||
rcudata:
|
||||
Displays fields in struct rcu_data.
|
||||
rcu/rcudata.csv:
|
||||
Comma-separated values spreadsheet version of rcudata.
|
||||
rcu/rcugp:
|
||||
rcuexp:
|
||||
Displays statistics for expedited grace periods.
|
||||
rcugp:
|
||||
Displays grace-period counters.
|
||||
rcu/rcuhier:
|
||||
rcuhier:
|
||||
Displays the struct rcu_node hierarchy.
|
||||
rcu/rcu_pending:
|
||||
rcu_pending:
|
||||
Displays counts of the reasons rcu_pending() decided that RCU had
|
||||
work to do.
|
||||
rcu/rcutorture:
|
||||
Displays rcutorture test progress.
|
||||
rcu/rcuboost:
|
||||
rcuboost:
|
||||
Displays RCU boosting statistics. Only present if
|
||||
CONFIG_RCU_BOOST=y.
|
||||
|
||||
The output of "cat rcu/rcudata" looks as follows:
|
||||
The output of "cat rcu/rcu_preempt/rcudata" looks as follows:
|
||||
|
||||
rcu_sched:
|
||||
0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
|
||||
1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
|
||||
2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
|
||||
3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
|
||||
4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
|
||||
5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
|
||||
6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
|
||||
7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
|
||||
rcu_bh:
|
||||
0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
|
||||
1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
|
||||
2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
|
||||
3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
|
||||
4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
|
||||
5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
|
||||
6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
|
||||
7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
|
||||
0!c=30455 g=30456 pq=1 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716
|
||||
1!c=30719 g=30720 pq=1 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982
|
||||
2!c=30150 g=30151 pq=1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458
|
||||
3 c=31249 g=31250 pq=1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622
|
||||
4!c=29502 g=29503 pq=1 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521
|
||||
5 c=31201 g=31202 pq=1 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698
|
||||
6!c=30253 g=30254 pq=1 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353
|
||||
7 c=31178 g=31178 pq=1 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969
|
||||
|
||||
The first section lists the rcu_data structures for rcu_sched, the second
|
||||
for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
|
||||
additional section for rcu_preempt. Each section has one line per CPU,
|
||||
or eight for this 8-CPU system. The fields are as follows:
|
||||
This file has one line per CPU, or eight for this 8-CPU system.
|
||||
The fields are as follows:
|
||||
|
||||
o The number at the beginning of each line is the CPU number.
|
||||
CPUs numbers followed by an exclamation mark are offline,
|
||||
@ -64,11 +76,13 @@ o The number at the beginning of each line is the CPU number.
|
||||
substantially larger than the number of actual CPUs.
|
||||
|
||||
o "c" is the count of grace periods that this CPU believes have
|
||||
completed. Offlined CPUs and CPUs in dynticks idle mode may
|
||||
lag quite a ways behind, for example, CPU 6 under "rcu_sched"
|
||||
above, which has been offline through not quite 40,000 RCU grace
|
||||
periods. It is not unusual to see CPUs lagging by thousands of
|
||||
grace periods.
|
||||
completed. Offlined CPUs and CPUs in dynticks idle mode may lag
|
||||
quite a ways behind, for example, CPU 4 under "rcu_sched" above,
|
||||
which has been offline through 16 RCU grace periods. It is not
|
||||
unusual to see offline CPUs lagging by thousands of grace periods.
|
||||
Note that although the grace-period number is an unsigned long,
|
||||
it is printed out as a signed long to allow more human-friendly
|
||||
representation near boot time.
|
||||
|
||||
o "g" is the count of grace periods that this CPU believes have
|
||||
started. Again, offlined CPUs and CPUs in dynticks idle mode
|
||||
@ -84,30 +98,25 @@ o "pq" indicates that this CPU has passed through a quiescent state
|
||||
CPU has not yet reported that fact, (2) some other CPU has not
|
||||
yet reported for this grace period, or (3) both.
|
||||
|
||||
o "pgp" indicates which grace period the last-observed quiescent
|
||||
state for this CPU corresponds to. This is important for handling
|
||||
the race between CPU 0 reporting an extended dynticks-idle
|
||||
quiescent state for CPU 1 and CPU 1 suddenly waking up and
|
||||
reporting its own quiescent state. If CPU 1 was the last CPU
|
||||
for the current grace period, then the CPU that loses this race
|
||||
will attempt to incorrectly mark CPU 1 as having checked in for
|
||||
the next grace period!
|
||||
|
||||
o "qp" indicates that RCU still expects a quiescent state from
|
||||
this CPU. Offlined CPUs and CPUs in dyntick idle mode might
|
||||
well have qp=1, which is OK: RCU is still ignoring them.
|
||||
|
||||
o "dt" is the current value of the dyntick counter that is incremented
|
||||
when entering or leaving dynticks idle state, either by the
|
||||
scheduler or by irq. This number is even if the CPU is in
|
||||
dyntick idle mode and odd otherwise. The number after the first
|
||||
"/" is the interrupt nesting depth when in dyntick-idle state,
|
||||
or one greater than the interrupt-nesting depth otherwise.
|
||||
The number after the second "/" is the NMI nesting depth.
|
||||
when entering or leaving idle, either due to a context switch or
|
||||
due to an interrupt. This number is even if the CPU is in idle
|
||||
from RCU's viewpoint and odd otherwise. The number after the
|
||||
first "/" is the interrupt nesting depth when in idle state,
|
||||
or a large number added to the interrupt-nesting depth when
|
||||
running a non-idle task. Some architectures do not accurately
|
||||
count interrupt nesting when running in non-idle kernel context,
|
||||
which can result in interesting anomalies such as negative
|
||||
interrupt-nesting levels. The number after the second "/"
|
||||
is the NMI nesting depth.
|
||||
|
||||
o "df" is the number of times that some other CPU has forced a
|
||||
quiescent state on behalf of this CPU due to this CPU being in
|
||||
dynticks-idle state.
|
||||
idle state.
|
||||
|
||||
o "of" is the number of times that some other CPU has forced a
|
||||
quiescent state on behalf of this CPU due to this CPU being
|
||||
@ -120,9 +129,13 @@ o "of" is the number of times that some other CPU has forced a
|
||||
error, so it makes sense to err conservatively.
|
||||
|
||||
o "ql" is the number of RCU callbacks currently residing on
|
||||
this CPU. This is the total number of callbacks, regardless
|
||||
of what state they are in (new, waiting for grace period to
|
||||
start, waiting for grace period to end, ready to invoke).
|
||||
this CPU. The first number is the number of "lazy" callbacks
|
||||
that are known to RCU to only be freeing memory, and the number
|
||||
after the "/" is the total number of callbacks, lazy or not.
|
||||
These counters count callbacks regardless of what phase of
|
||||
grace-period processing that they are in (new, waiting for
|
||||
grace period to start, waiting for grace period to end, ready
|
||||
to invoke).
|
||||
|
||||
o "qs" gives an indication of the state of the callback queue
|
||||
with four characters:
|
||||
@ -150,6 +163,43 @@ o "qs" gives an indication of the state of the callback queue
|
||||
If there are no callbacks in a given one of the above states,
|
||||
the corresponding character is replaced by ".".
|
||||
|
||||
o "b" is the batch limit for this CPU. If more than this number
|
||||
of RCU callbacks is ready to invoke, then the remainder will
|
||||
be deferred.
|
||||
|
||||
o "ci" is the number of RCU callbacks that have been invoked for
|
||||
this CPU. Note that ci+nci+ql is the number of callbacks that have
|
||||
been registered in absence of CPU-hotplug activity.
|
||||
|
||||
o "nci" is the number of RCU callbacks that have been offloaded from
|
||||
this CPU. This will always be zero unless the kernel was built
|
||||
with CONFIG_RCU_NOCB_CPU=y and the "rcu_nocbs=" kernel boot
|
||||
parameter was specified.
|
||||
|
||||
o "co" is the number of RCU callbacks that have been orphaned due to
|
||||
this CPU going offline. These orphaned callbacks have been moved
|
||||
to an arbitrarily chosen online CPU.
|
||||
|
||||
o "ca" is the number of RCU callbacks that have been adopted by this
|
||||
CPU due to other CPUs going offline. Note that ci+co-ca+ql is
|
||||
the number of RCU callbacks registered on this CPU.
|
||||
|
||||
|
||||
Kernels compiled with CONFIG_RCU_BOOST=y display the following from
|
||||
/debug/rcu/rcu_preempt/rcudata:
|
||||
|
||||
0!c=12865 g=12866 pq=1 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871
|
||||
1 c=14407 g=14408 pq=1 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485
|
||||
2 c=14407 g=14408 pq=1 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490
|
||||
3 c=14407 g=14408 pq=1 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290
|
||||
4 c=14405 g=14406 pq=1 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114
|
||||
5!c=14168 g=14169 pq=1 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722
|
||||
6 c=14404 g=14405 pq=1 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811
|
||||
7 c=14407 g=14408 pq=1 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042
|
||||
|
||||
This is similar to the output discussed above, but contains the following
|
||||
additional fields:
|
||||
|
||||
o "kt" is the per-CPU kernel-thread state. The digit preceding
|
||||
the first slash is zero if there is no work pending and 1
|
||||
otherwise. The character between the first pair of slashes is
|
||||
@ -184,35 +234,51 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
|
||||
|
||||
This field is displayed only for CONFIG_RCU_BOOST kernels.
|
||||
|
||||
o "b" is the batch limit for this CPU. If more than this number
|
||||
of RCU callbacks is ready to invoke, then the remainder will
|
||||
be deferred.
|
||||
|
||||
o "ci" is the number of RCU callbacks that have been invoked for
|
||||
this CPU. Note that ci+ql is the number of callbacks that have
|
||||
been registered in absence of CPU-hotplug activity.
|
||||
The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
|
||||
|
||||
o "co" is the number of RCU callbacks that have been orphaned due to
|
||||
this CPU going offline. These orphaned callbacks have been moved
|
||||
to an arbitrarily chosen online CPU.
|
||||
s=21872 d=21872 w=0 tf=0 wd1=0 wd2=0 n=0 sc=21872 dt=21872 dl=0 dx=21872
|
||||
|
||||
o "ca" is the number of RCU callbacks that have been adopted due to
|
||||
other CPUs going offline. Note that ci+co-ca+ql is the number of
|
||||
RCU callbacks registered on this CPU.
|
||||
These fields are as follows:
|
||||
|
||||
There is also an rcu/rcudata.csv file with the same information in
|
||||
comma-separated-variable spreadsheet format.
|
||||
o "s" is the starting sequence number.
|
||||
|
||||
o "d" is the ending sequence number. When the starting and ending
|
||||
numbers differ, there is an expedited grace period in progress.
|
||||
|
||||
o "w" is the number of times that the sequence numbers have been
|
||||
in danger of wrapping.
|
||||
|
||||
o "tf" is the number of times that contention has resulted in a
|
||||
failure to begin an expedited grace period.
|
||||
|
||||
o "wd1" and "wd2" are the number of times that an attempt to
|
||||
start an expedited grace period found that someone else had
|
||||
completed an expedited grace period that satisfies the
|
||||
attempted request. "Our work is done."
|
||||
|
||||
o "n" is number of times that contention was so great that
|
||||
the request was demoted from an expedited grace period to
|
||||
a normal grace period.
|
||||
|
||||
o "sc" is the number of times that the attempt to start a
|
||||
new expedited grace period succeeded.
|
||||
|
||||
o "dt" is the number of times that we attempted to update
|
||||
the "d" counter.
|
||||
|
||||
o "dl" is the number of times that we failed to update the "d"
|
||||
counter.
|
||||
|
||||
o "dx" is the number of times that we succeeded in updating
|
||||
the "d" counter.
|
||||
|
||||
|
||||
The output of "cat rcu/rcugp" looks as follows:
|
||||
The output of "cat rcu/rcu_preempt/rcugp" looks as follows:
|
||||
|
||||
rcu_sched: completed=33062 gpnum=33063
|
||||
rcu_bh: completed=464 gpnum=464
|
||||
completed=31249 gpnum=31250 age=1 max=18
|
||||
|
||||
Again, this output is for both "rcu_sched" and "rcu_bh". Note that
|
||||
kernels built with CONFIG_TREE_PREEMPT_RCU will have an additional
|
||||
"rcu_preempt" line. The fields are taken from the rcu_state structure,
|
||||
and are as follows:
|
||||
These fields are taken from the rcu_state structure, and are as follows:
|
||||
|
||||
o "completed" is the number of grace periods that have completed.
|
||||
It is comparable to the "c" field from rcu/rcudata in that a
|
||||
@ -220,44 +286,42 @@ o "completed" is the number of grace periods that have completed.
|
||||
that the corresponding RCU grace period has completed.
|
||||
|
||||
o "gpnum" is the number of grace periods that have started. It is
|
||||
comparable to the "g" field from rcu/rcudata in that a CPU
|
||||
whose "g" field matches the value of "gpnum" is aware that the
|
||||
corresponding RCU grace period has started.
|
||||
similarly comparable to the "g" field from rcu/rcudata in that
|
||||
a CPU whose "g" field matches the value of "gpnum" is aware that
|
||||
the corresponding RCU grace period has started.
|
||||
|
||||
If these two fields are equal (as they are for "rcu_bh" above),
|
||||
then there is no grace period in progress, in other words, RCU
|
||||
is idle. On the other hand, if the two fields differ (as they
|
||||
do for "rcu_sched" above), then an RCU grace period is in progress.
|
||||
If these two fields are equal, then there is no grace period
|
||||
in progress, in other words, RCU is idle. On the other hand,
|
||||
if the two fields differ (as they are above), then an RCU grace
|
||||
period is in progress.
|
||||
|
||||
o "age" is the number of jiffies that the current grace period
|
||||
has extended for, or zero if there is no grace period currently
|
||||
in effect.
|
||||
|
||||
The output of "cat rcu/rcuhier" looks as follows, with very long lines:
|
||||
o "max" is the age in jiffies of the longest-duration grace period
|
||||
thus far.
|
||||
|
||||
c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
|
||||
1/1 ..>. 0:127 ^0
|
||||
3/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3
|
||||
3/3f ..>. 0:5 ^0 2/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3
|
||||
rcu_bh:
|
||||
c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
|
||||
0/1 ..>. 0:127 ^0
|
||||
0/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3
|
||||
0/3f ..>. 0:5 ^0 0/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3
|
||||
The output of "cat rcu/rcu_preempt/rcuhier" looks as follows:
|
||||
|
||||
This is once again split into "rcu_sched" and "rcu_bh" portions,
|
||||
and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional
|
||||
"rcu_preempt" section. The fields are as follows:
|
||||
c=14407 g=14408 s=0 jfq=2 j=c863 nfqs=12040/nfqsng=0(12040) fqlh=1051 oqlen=0/0
|
||||
3/3 ..>. 0:7 ^0
|
||||
e/e ..>. 0:3 ^0 d/d ..>. 4:7 ^1
|
||||
|
||||
o "c" is exactly the same as "completed" under rcu/rcugp.
|
||||
The fields are as follows:
|
||||
|
||||
o "g" is exactly the same as "gpnum" under rcu/rcugp.
|
||||
o "c" is exactly the same as "completed" under rcu/rcu_preempt/rcugp.
|
||||
|
||||
o "s" is the "signaled" state that drives force_quiescent_state()'s
|
||||
o "g" is exactly the same as "gpnum" under rcu/rcu_preempt/rcugp.
|
||||
|
||||
o "s" is the current state of the force_quiescent_state()
|
||||
state machine.
|
||||
|
||||
o "jfq" is the number of jiffies remaining for this grace period
|
||||
before force_quiescent_state() is invoked to help push things
|
||||
along. Note that CPUs in dyntick-idle mode throughout the grace
|
||||
period will not report on their own, but rather must be check by
|
||||
some other CPU via force_quiescent_state().
|
||||
along. Note that CPUs in idle mode throughout the grace period
|
||||
will not report on their own, but rather must be check by some
|
||||
other CPU via force_quiescent_state().
|
||||
|
||||
o "j" is the low-order four hex digits of the jiffies counter.
|
||||
Yes, Paul did run into a number of problems that turned out to
|
||||
@ -268,7 +332,8 @@ o "nfqs" is the number of calls to force_quiescent_state() since
|
||||
|
||||
o "nfqsng" is the number of useless calls to force_quiescent_state(),
|
||||
where there wasn't actually a grace period active. This can
|
||||
happen due to races. The number in parentheses is the difference
|
||||
no longer happen due to grace-period processing being pushed
|
||||
into a kthread. The number in parentheses is the difference
|
||||
between "nfqs" and "nfqsng", or the number of times that
|
||||
force_quiescent_state() actually did some real work.
|
||||
|
||||
@ -276,28 +341,27 @@ o "fqlh" is the number of calls to force_quiescent_state() that
|
||||
exited immediately (without even being counted in nfqs above)
|
||||
due to contention on ->fqslock.
|
||||
|
||||
o Each element of the form "1/1 0:127 ^0" represents one struct
|
||||
rcu_node. Each line represents one level of the hierarchy, from
|
||||
root to leaves. It is best to think of the rcu_data structures
|
||||
as forming yet another level after the leaves. Note that there
|
||||
might be either one, two, or three levels of rcu_node structures,
|
||||
depending on the relationship between CONFIG_RCU_FANOUT and
|
||||
CONFIG_NR_CPUS.
|
||||
o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node
|
||||
structure. Each line represents one level of the hierarchy,
|
||||
from root to leaves. It is best to think of the rcu_data
|
||||
structures as forming yet another level after the leaves.
|
||||
Note that there might be either one, two, three, or even four
|
||||
levels of rcu_node structures, depending on the relationship
|
||||
between CONFIG_RCU_FANOUT, CONFIG_RCU_FANOUT_LEAF (possibly
|
||||
adjusted using the rcu_fanout_leaf kernel boot parameter), and
|
||||
CONFIG_NR_CPUS (possibly adjusted using the nr_cpu_ids count of
|
||||
possible CPUs for the booting hardware).
|
||||
|
||||
o The numbers separated by the "/" are the qsmask followed
|
||||
by the qsmaskinit. The qsmask will have one bit
|
||||
set for each entity in the next lower level that
|
||||
has not yet checked in for the current grace period.
|
||||
set for each entity in the next lower level that has
|
||||
not yet checked in for the current grace period ("e"
|
||||
indicating CPUs 5, 6, and 7 in the example above).
|
||||
The qsmaskinit will have one bit for each entity that is
|
||||
currently expected to check in during each grace period.
|
||||
The value of qsmaskinit is assigned to that of qsmask
|
||||
at the beginning of each grace period.
|
||||
|
||||
For example, for "rcu_sched", the qsmask of the first
|
||||
entry of the lowest level is 0x14, meaning that we
|
||||
are still waiting for CPUs 2 and 4 to check in for the
|
||||
current grace period.
|
||||
|
||||
o The characters separated by the ">" indicate the state
|
||||
of the blocked-tasks lists. A "G" preceding the ">"
|
||||
indicates that at least one task blocked in an RCU
|
||||
@ -312,48 +376,39 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
|
||||
A "." character appears if the corresponding condition
|
||||
does not hold, so that "..>." indicates that no tasks
|
||||
are blocked. In contrast, "GE>T" indicates maximal
|
||||
inconvenience from blocked tasks.
|
||||
inconvenience from blocked tasks. CONFIG_TREE_RCU
|
||||
builds of the kernel will always show "..>.".
|
||||
|
||||
o The numbers separated by the ":" are the range of CPUs
|
||||
served by this struct rcu_node. This can be helpful
|
||||
in working out how the hierarchy is wired together.
|
||||
|
||||
For example, the first entry at the lowest level shows
|
||||
"0:5", indicating that it covers CPUs 0 through 5.
|
||||
For example, the example rcu_node structure shown above
|
||||
has "0:7", indicating that it covers CPUs 0 through 7.
|
||||
|
||||
o The number after the "^" indicates the bit in the
|
||||
next higher level rcu_node structure that this
|
||||
rcu_node structure corresponds to.
|
||||
|
||||
For example, the first entry at the lowest level shows
|
||||
"^0", indicating that it corresponds to bit zero in
|
||||
the first entry at the middle level.
|
||||
next higher level rcu_node structure that this rcu_node
|
||||
structure corresponds to. For example, the "d/d ..>. 4:7
|
||||
^1" has a "1" in this position, indicating that it
|
||||
corresponds to the "1" bit in the "3" shown in the
|
||||
"3/3 ..>. 0:7 ^0" entry on the next level up.
|
||||
|
||||
|
||||
The output of "cat rcu/rcu_pending" looks as follows:
|
||||
The output of "cat rcu/rcu_sched/rcu_pending" looks as follows:
|
||||
|
||||
rcu_sched:
|
||||
0 np=255892 qsp=53936 rpq=85 cbr=0 cng=14417 gpc=10033 gps=24320 nn=146741
|
||||
1 np=261224 qsp=54638 rpq=33 cbr=0 cng=25723 gpc=16310 gps=2849 nn=155792
|
||||
2 np=237496 qsp=49664 rpq=23 cbr=0 cng=2762 gpc=45478 gps=1762 nn=136629
|
||||
3 np=236249 qsp=48766 rpq=98 cbr=0 cng=286 gpc=48049 gps=1218 nn=137723
|
||||
4 np=221310 qsp=46850 rpq=7 cbr=0 cng=26 gpc=43161 gps=4634 nn=123110
|
||||
5 np=237332 qsp=48449 rpq=9 cbr=0 cng=54 gpc=47920 gps=3252 nn=137456
|
||||
6 np=219995 qsp=46718 rpq=12 cbr=0 cng=50 gpc=42098 gps=6093 nn=120834
|
||||
7 np=249893 qsp=49390 rpq=42 cbr=0 cng=72 gpc=38400 gps=17102 nn=144888
|
||||
rcu_bh:
|
||||
0 np=146741 qsp=1419 rpq=6 cbr=0 cng=6 gpc=0 gps=0 nn=145314
|
||||
1 np=155792 qsp=12597 rpq=3 cbr=0 cng=0 gpc=4 gps=8 nn=143180
|
||||
2 np=136629 qsp=18680 rpq=1 cbr=0 cng=0 gpc=7 gps=6 nn=117936
|
||||
3 np=137723 qsp=2843 rpq=0 cbr=0 cng=0 gpc=10 gps=7 nn=134863
|
||||
4 np=123110 qsp=12433 rpq=0 cbr=0 cng=0 gpc=4 gps=2 nn=110671
|
||||
5 np=137456 qsp=4210 rpq=1 cbr=0 cng=0 gpc=6 gps=5 nn=133235
|
||||
6 np=120834 qsp=9902 rpq=2 cbr=0 cng=0 gpc=6 gps=3 nn=110921
|
||||
7 np=144888 qsp=26336 rpq=0 cbr=0 cng=0 gpc=8 gps=2 nn=118542
|
||||
0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903
|
||||
1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113
|
||||
2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889
|
||||
3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469
|
||||
4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042
|
||||
5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422
|
||||
6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699
|
||||
7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147
|
||||
|
||||
As always, this is once again split into "rcu_sched" and "rcu_bh"
|
||||
portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional
|
||||
"rcu_preempt" section. The fields are as follows:
|
||||
The fields are as follows:
|
||||
|
||||
o The leading number is the CPU number, with "!" indicating
|
||||
an offline CPU.
|
||||
|
||||
o "np" is the number of times that __rcu_pending() has been invoked
|
||||
for the corresponding flavor of RCU.
|
||||
@ -377,38 +432,23 @@ o "gpc" is the number of times that an old grace period had
|
||||
o "gps" is the number of times that a new grace period had started,
|
||||
but this CPU was not yet aware of it.
|
||||
|
||||
o "nn" is the number of times that this CPU needed nothing. Alert
|
||||
readers will note that the rcu "nn" number for a given CPU very
|
||||
closely matches the rcu_bh "np" number for that same CPU. This
|
||||
is due to short-circuit evaluation in rcu_pending().
|
||||
|
||||
|
||||
The output of "cat rcu/rcutorture" looks as follows:
|
||||
|
||||
rcutorture test sequence: 0 (test in progress)
|
||||
rcutorture update version number: 615
|
||||
|
||||
The first line shows the number of rcutorture tests that have completed
|
||||
since boot. If a test is currently running, the "(test in progress)"
|
||||
string will appear as shown above. The second line shows the number of
|
||||
update cycles that the current test has started, or zero if there is
|
||||
no test in progress.
|
||||
o "nn" is the number of times that this CPU needed nothing.
|
||||
|
||||
|
||||
The output of "cat rcu/rcuboost" looks as follows:
|
||||
|
||||
0:5 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f
|
||||
balk: nt=0 egt=989 bt=0 nb=0 ny=0 nos=16
|
||||
6:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f
|
||||
balk: nt=0 egt=225 bt=0 nb=0 ny=0 nos=6
|
||||
0:3 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
|
||||
balk: nt=0 egt=4695 bt=0 nb=0 ny=56 nos=0
|
||||
4:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
|
||||
balk: nt=0 egt=6541 bt=0 nb=0 ny=126 nos=0
|
||||
|
||||
This information is output only for rcu_preempt. Each two-line entry
|
||||
corresponds to a leaf rcu_node strcuture. The fields are as follows:
|
||||
|
||||
o "n:m" is the CPU-number range for the corresponding two-line
|
||||
entry. In the sample output above, the first entry covers
|
||||
CPUs zero through five and the second entry covers CPUs 6
|
||||
and 7.
|
||||
CPUs zero through three and the second entry covers CPUs four
|
||||
through seven.
|
||||
|
||||
o "tasks=TNEB" gives the state of the various segments of the
|
||||
rnp->blocked_tasks list:
|
||||
|
@ -499,6 +499,8 @@ The foo_reclaim() function might appear as follows:
|
||||
{
|
||||
struct foo *fp = container_of(rp, struct foo, rcu);
|
||||
|
||||
foo_cleanup(fp->a);
|
||||
|
||||
kfree(fp);
|
||||
}
|
||||
|
||||
@ -521,6 +523,12 @@ o Use call_rcu() -after- removing a data element from an
|
||||
read-side critical sections that might be referencing that
|
||||
data item.
|
||||
|
||||
If the callback for call_rcu() is not doing anything more than calling
|
||||
kfree() on the structure, you can use kfree_rcu() instead of call_rcu()
|
||||
to avoid having to write your own callback:
|
||||
|
||||
kfree_rcu(old_fp, rcu);
|
||||
|
||||
Again, see checklist.txt for additional rules governing the use of RCU.
|
||||
|
||||
|
||||
@ -773,8 +781,8 @@ a single atomic update, converting to RCU will require special care.
|
||||
|
||||
Also, the presence of synchronize_rcu() means that the RCU version of
|
||||
delete() can now block. If this is a problem, there is a callback-based
|
||||
mechanism that never blocks, namely call_rcu(), that can be used in
|
||||
place of synchronize_rcu().
|
||||
mechanism that never blocks, namely call_rcu() or kfree_rcu(), that can
|
||||
be used in place of synchronize_rcu().
|
||||
|
||||
|
||||
7. FULL LIST OF RCU APIs
|
||||
@ -789,9 +797,7 @@ RCU list traversal:
|
||||
list_for_each_entry_rcu
|
||||
hlist_for_each_entry_rcu
|
||||
hlist_nulls_for_each_entry_rcu
|
||||
|
||||
list_for_each_continue_rcu (to be deprecated in favor of new
|
||||
list_for_each_entry_continue_rcu)
|
||||
list_for_each_entry_continue_rcu
|
||||
|
||||
RCU pointer/list update:
|
||||
|
||||
@ -813,6 +819,7 @@ RCU: Critical sections Grace period Barrier
|
||||
rcu_read_unlock synchronize_rcu
|
||||
rcu_dereference synchronize_rcu_expedited
|
||||
call_rcu
|
||||
kfree_rcu
|
||||
|
||||
|
||||
bh: Critical sections Grace period Barrier
|
||||
|
@ -251,12 +251,13 @@ And there are a number of things that _must_ or _must_not_ be assumed:
|
||||
|
||||
And for:
|
||||
|
||||
*A = X; Y = *A;
|
||||
*A = X; *(A + 4) = Y;
|
||||
|
||||
we may get either of:
|
||||
we may get any of:
|
||||
|
||||
STORE *A = X; Y = LOAD *A;
|
||||
STORE *A = Y = X;
|
||||
STORE *A = X; STORE *(A + 4) = Y;
|
||||
STORE *(A + 4) = Y; STORE *A = X;
|
||||
STORE {*A, *(A + 4) } = {X, Y};
|
||||
|
||||
|
||||
=========================
|
||||
|
@ -648,7 +648,7 @@ static void stack_proc(void *arg)
|
||||
struct task_struct *from = current, *to = arg;
|
||||
|
||||
to->thread.saved_task = from;
|
||||
rcu_switch(from, to);
|
||||
rcu_user_hooks_switch(from, to);
|
||||
switch_to(from, to, from);
|
||||
}
|
||||
|
||||
|
@ -286,23 +286,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
|
||||
&pos->member != (head); \
|
||||
pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
|
||||
|
||||
|
||||
/**
|
||||
* list_for_each_continue_rcu
|
||||
* @pos: the &struct list_head to use as a loop cursor.
|
||||
* @head: the head for your list.
|
||||
*
|
||||
* Iterate over an rcu-protected list, continuing after current point.
|
||||
*
|
||||
* This list-traversal primitive may safely run concurrently with
|
||||
* the _rcu list-mutation primitives such as list_add_rcu()
|
||||
* as long as the traversal is guarded by rcu_read_lock().
|
||||
*/
|
||||
#define list_for_each_continue_rcu(pos, head) \
|
||||
for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
|
||||
(pos) != (head); \
|
||||
(pos) = rcu_dereference_raw(list_next_rcu(pos)))
|
||||
|
||||
/**
|
||||
* list_for_each_entry_continue_rcu - continue iteration over list of given type
|
||||
* @pos: the type * to use as a loop cursor.
|
||||
|
@ -90,6 +90,25 @@ extern void do_trace_rcu_torture_read(char *rcutorturename,
|
||||
* that started after call_rcu() was invoked. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*
|
||||
* Note that all CPUs must agree that the grace period extended beyond
|
||||
* all pre-existing RCU read-side critical section. On systems with more
|
||||
* than one CPU, this means that when "func()" is invoked, each CPU is
|
||||
* guaranteed to have executed a full memory barrier since the end of its
|
||||
* last RCU read-side critical section whose beginning preceded the call
|
||||
* to call_rcu(). It also means that each CPU executing an RCU read-side
|
||||
* critical section that continues beyond the start of "func()" must have
|
||||
* executed a memory barrier after the call_rcu() but before the beginning
|
||||
* of that RCU read-side critical section. Note that these guarantees
|
||||
* include CPUs that are offline, idle, or executing in user mode, as
|
||||
* well as CPUs that are executing in the kernel.
|
||||
*
|
||||
* Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
|
||||
* resulting RCU callback function "func()", then both CPU A and CPU B are
|
||||
* guaranteed to execute a full memory barrier during the time interval
|
||||
* between the call to call_rcu() and the invocation of "func()" -- even
|
||||
* if CPU A and CPU B are the same CPU (but again only if the system has
|
||||
* more than one CPU).
|
||||
*/
|
||||
extern void call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head));
|
||||
@ -118,6 +137,9 @@ extern void call_rcu(struct rcu_head *head,
|
||||
* OR
|
||||
* - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
|
||||
* These may be nested.
|
||||
*
|
||||
* See the description of call_rcu() for more detailed information on
|
||||
* memory ordering guarantees.
|
||||
*/
|
||||
extern void call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head));
|
||||
@ -137,6 +159,9 @@ extern void call_rcu_bh(struct rcu_head *head,
|
||||
* OR
|
||||
* anything that disables preemption.
|
||||
* These may be nested.
|
||||
*
|
||||
* See the description of call_rcu() for more detailed information on
|
||||
* memory ordering guarantees.
|
||||
*/
|
||||
extern void call_rcu_sched(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu));
|
||||
@ -204,6 +229,8 @@ static inline void rcu_user_enter(void) { }
|
||||
static inline void rcu_user_exit(void) { }
|
||||
static inline void rcu_user_enter_after_irq(void) { }
|
||||
static inline void rcu_user_exit_after_irq(void) { }
|
||||
static inline void rcu_user_hooks_switch(struct task_struct *prev,
|
||||
struct task_struct *next) { }
|
||||
#endif /* CONFIG_RCU_USER_QS */
|
||||
|
||||
extern void exit_rcu(void);
|
||||
|
@ -109,6 +109,8 @@ extern void update_cpu_load_nohz(void);
|
||||
|
||||
extern unsigned long get_parent_ip(unsigned long addr);
|
||||
|
||||
extern void dump_cpu_task(int cpu);
|
||||
|
||||
struct seq_file;
|
||||
struct cfs_rq;
|
||||
struct task_group;
|
||||
@ -1844,14 +1846,6 @@ static inline void rcu_copy_process(struct task_struct *p)
|
||||
|
||||
#endif
|
||||
|
||||
static inline void rcu_switch(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
rcu_user_hooks_switch(prev, next);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void tsk_restore_flags(struct task_struct *task,
|
||||
unsigned long orig_flags, unsigned long flags)
|
||||
{
|
||||
|
@ -16,8 +16,10 @@
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2006
|
||||
* Copyright (C) Fujitsu, 2012
|
||||
*
|
||||
* Author: Paul McKenney <paulmck@us.ibm.com>
|
||||
* Lai Jiangshan <laijs@cn.fujitsu.com>
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU/ *.txt
|
||||
@ -40,6 +42,8 @@ struct rcu_batch {
|
||||
struct rcu_head *head, **tail;
|
||||
};
|
||||
|
||||
#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
|
||||
|
||||
struct srcu_struct {
|
||||
unsigned completed;
|
||||
struct srcu_struct_array __percpu *per_cpu_ref;
|
||||
@ -70,12 +74,42 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
|
||||
__init_srcu_struct((sp), #sp, &__srcu_key); \
|
||||
})
|
||||
|
||||
#define __SRCU_DEP_MAP_INIT(srcu_name) .dep_map = { .name = #srcu_name },
|
||||
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
int init_srcu_struct(struct srcu_struct *sp);
|
||||
|
||||
#define __SRCU_DEP_MAP_INIT(srcu_name)
|
||||
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
void process_srcu(struct work_struct *work);
|
||||
|
||||
#define __SRCU_STRUCT_INIT(name) \
|
||||
{ \
|
||||
.completed = -300, \
|
||||
.per_cpu_ref = &name##_srcu_array, \
|
||||
.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
|
||||
.running = false, \
|
||||
.batch_queue = RCU_BATCH_INIT(name.batch_queue), \
|
||||
.batch_check0 = RCU_BATCH_INIT(name.batch_check0), \
|
||||
.batch_check1 = RCU_BATCH_INIT(name.batch_check1), \
|
||||
.batch_done = RCU_BATCH_INIT(name.batch_done), \
|
||||
.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
|
||||
__SRCU_DEP_MAP_INIT(name) \
|
||||
}
|
||||
|
||||
/*
|
||||
* define and init a srcu struct at build time.
|
||||
* dont't call init_srcu_struct() nor cleanup_srcu_struct() on it.
|
||||
*/
|
||||
#define DEFINE_SRCU(name) \
|
||||
static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
|
||||
struct srcu_struct name = __SRCU_STRUCT_INIT(name);
|
||||
|
||||
#define DEFINE_STATIC_SRCU(name) \
|
||||
static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
|
||||
static struct srcu_struct name = __SRCU_STRUCT_INIT(name);
|
||||
|
||||
/**
|
||||
* call_srcu() - Queue a callback for invocation after an SRCU grace period
|
||||
* @sp: srcu_struct in queue the callback
|
||||
|
19
init/Kconfig
19
init/Kconfig
@ -494,11 +494,11 @@ config RCU_USER_QS
|
||||
puts RCU in extended quiescent state when the CPU runs in
|
||||
userspace. It means that when a CPU runs in userspace, it is
|
||||
excluded from the global RCU state machine and thus doesn't
|
||||
to keep the timer tick on for RCU.
|
||||
try to keep the timer tick on for RCU.
|
||||
|
||||
Unless you want to hack and help the development of the full
|
||||
tickless feature, you shouldn't enable this option. It adds
|
||||
unnecessary overhead.
|
||||
tickless feature, you shouldn't enable this option. It also
|
||||
adds unnecessary overhead.
|
||||
|
||||
If unsure say N
|
||||
|
||||
@ -582,14 +582,13 @@ config RCU_FAST_NO_HZ
|
||||
depends on NO_HZ && SMP
|
||||
default n
|
||||
help
|
||||
This option causes RCU to attempt to accelerate grace periods
|
||||
in order to allow CPUs to enter dynticks-idle state more
|
||||
quickly. On the other hand, this option increases the overhead
|
||||
of the dynticks-idle checking, particularly on systems with
|
||||
large numbers of CPUs.
|
||||
This option causes RCU to attempt to accelerate grace periods in
|
||||
order to allow CPUs to enter dynticks-idle state more quickly.
|
||||
On the other hand, this option increases the overhead of the
|
||||
dynticks-idle checking, thus degrading scheduling latency.
|
||||
|
||||
Say Y if energy efficiency is critically important, particularly
|
||||
if you have relatively few CPUs.
|
||||
Say Y if energy efficiency is critically important, and you don't
|
||||
care about real-time response.
|
||||
|
||||
Say N if you are unsure.
|
||||
|
||||
|
@ -141,6 +141,23 @@ static ssize_t fscaps_show(struct kobject *kobj,
|
||||
}
|
||||
KERNEL_ATTR_RO(fscaps);
|
||||
|
||||
int rcu_expedited;
|
||||
static ssize_t rcu_expedited_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", rcu_expedited);
|
||||
}
|
||||
static ssize_t rcu_expedited_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
if (kstrtoint(buf, 0, &rcu_expedited))
|
||||
return -EINVAL;
|
||||
|
||||
return count;
|
||||
}
|
||||
KERNEL_ATTR_RW(rcu_expedited);
|
||||
|
||||
/*
|
||||
* Make /sys/kernel/notes give the raw contents of our kernel .notes section.
|
||||
*/
|
||||
@ -182,6 +199,7 @@ static struct attribute * kernel_attrs[] = {
|
||||
&kexec_crash_size_attr.attr,
|
||||
&vmcoreinfo_attr.attr,
|
||||
#endif
|
||||
&rcu_expedited_attr.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
@ -109,4 +109,6 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
|
||||
}
|
||||
}
|
||||
|
||||
extern int rcu_expedited;
|
||||
|
||||
#endif /* __LINUX_RCU_H */
|
||||
|
@ -46,12 +46,15 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/rcu.h>
|
||||
|
||||
#include "rcu.h"
|
||||
|
||||
module_param(rcu_expedited, int, 0);
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
|
||||
/*
|
||||
|
@ -195,7 +195,7 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
|
||||
*/
|
||||
int rcu_is_cpu_rrupt_from_idle(void)
|
||||
{
|
||||
return rcu_dynticks_nesting <= 0;
|
||||
return rcu_dynticks_nesting <= 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -706,7 +706,10 @@ void synchronize_rcu(void)
|
||||
return;
|
||||
|
||||
/* Once we get past the fastpath checks, same code as rcu_barrier(). */
|
||||
rcu_barrier();
|
||||
if (rcu_expedited)
|
||||
synchronize_rcu_expedited();
|
||||
else
|
||||
rcu_barrier();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
||||
|
||||
|
@ -339,7 +339,6 @@ rcu_stutter_wait(char *title)
|
||||
|
||||
struct rcu_torture_ops {
|
||||
void (*init)(void);
|
||||
void (*cleanup)(void);
|
||||
int (*readlock)(void);
|
||||
void (*read_delay)(struct rcu_random_state *rrsp);
|
||||
void (*readunlock)(int idx);
|
||||
@ -431,7 +430,6 @@ static void rcu_torture_deferred_free(struct rcu_torture *p)
|
||||
|
||||
static struct rcu_torture_ops rcu_ops = {
|
||||
.init = NULL,
|
||||
.cleanup = NULL,
|
||||
.readlock = rcu_torture_read_lock,
|
||||
.read_delay = rcu_read_delay,
|
||||
.readunlock = rcu_torture_read_unlock,
|
||||
@ -475,7 +473,6 @@ static void rcu_sync_torture_init(void)
|
||||
|
||||
static struct rcu_torture_ops rcu_sync_ops = {
|
||||
.init = rcu_sync_torture_init,
|
||||
.cleanup = NULL,
|
||||
.readlock = rcu_torture_read_lock,
|
||||
.read_delay = rcu_read_delay,
|
||||
.readunlock = rcu_torture_read_unlock,
|
||||
@ -493,7 +490,6 @@ static struct rcu_torture_ops rcu_sync_ops = {
|
||||
|
||||
static struct rcu_torture_ops rcu_expedited_ops = {
|
||||
.init = rcu_sync_torture_init,
|
||||
.cleanup = NULL,
|
||||
.readlock = rcu_torture_read_lock,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = rcu_torture_read_unlock,
|
||||
@ -536,7 +532,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
|
||||
|
||||
static struct rcu_torture_ops rcu_bh_ops = {
|
||||
.init = NULL,
|
||||
.cleanup = NULL,
|
||||
.readlock = rcu_bh_torture_read_lock,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = rcu_bh_torture_read_unlock,
|
||||
@ -553,7 +548,6 @@ static struct rcu_torture_ops rcu_bh_ops = {
|
||||
|
||||
static struct rcu_torture_ops rcu_bh_sync_ops = {
|
||||
.init = rcu_sync_torture_init,
|
||||
.cleanup = NULL,
|
||||
.readlock = rcu_bh_torture_read_lock,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = rcu_bh_torture_read_unlock,
|
||||
@ -570,7 +564,6 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
|
||||
|
||||
static struct rcu_torture_ops rcu_bh_expedited_ops = {
|
||||
.init = rcu_sync_torture_init,
|
||||
.cleanup = NULL,
|
||||
.readlock = rcu_bh_torture_read_lock,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = rcu_bh_torture_read_unlock,
|
||||
@ -589,19 +582,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = {
|
||||
* Definitions for srcu torture testing.
|
||||
*/
|
||||
|
||||
static struct srcu_struct srcu_ctl;
|
||||
|
||||
static void srcu_torture_init(void)
|
||||
{
|
||||
init_srcu_struct(&srcu_ctl);
|
||||
rcu_sync_torture_init();
|
||||
}
|
||||
|
||||
static void srcu_torture_cleanup(void)
|
||||
{
|
||||
synchronize_srcu(&srcu_ctl);
|
||||
cleanup_srcu_struct(&srcu_ctl);
|
||||
}
|
||||
DEFINE_STATIC_SRCU(srcu_ctl);
|
||||
|
||||
static int srcu_torture_read_lock(void) __acquires(&srcu_ctl)
|
||||
{
|
||||
@ -672,8 +653,7 @@ static int srcu_torture_stats(char *page)
|
||||
}
|
||||
|
||||
static struct rcu_torture_ops srcu_ops = {
|
||||
.init = srcu_torture_init,
|
||||
.cleanup = srcu_torture_cleanup,
|
||||
.init = rcu_sync_torture_init,
|
||||
.readlock = srcu_torture_read_lock,
|
||||
.read_delay = srcu_read_delay,
|
||||
.readunlock = srcu_torture_read_unlock,
|
||||
@ -687,8 +667,7 @@ static struct rcu_torture_ops srcu_ops = {
|
||||
};
|
||||
|
||||
static struct rcu_torture_ops srcu_sync_ops = {
|
||||
.init = srcu_torture_init,
|
||||
.cleanup = srcu_torture_cleanup,
|
||||
.init = rcu_sync_torture_init,
|
||||
.readlock = srcu_torture_read_lock,
|
||||
.read_delay = srcu_read_delay,
|
||||
.readunlock = srcu_torture_read_unlock,
|
||||
@ -712,8 +691,7 @@ static void srcu_torture_read_unlock_raw(int idx) __releases(&srcu_ctl)
|
||||
}
|
||||
|
||||
static struct rcu_torture_ops srcu_raw_ops = {
|
||||
.init = srcu_torture_init,
|
||||
.cleanup = srcu_torture_cleanup,
|
||||
.init = rcu_sync_torture_init,
|
||||
.readlock = srcu_torture_read_lock_raw,
|
||||
.read_delay = srcu_read_delay,
|
||||
.readunlock = srcu_torture_read_unlock_raw,
|
||||
@ -727,8 +705,7 @@ static struct rcu_torture_ops srcu_raw_ops = {
|
||||
};
|
||||
|
||||
static struct rcu_torture_ops srcu_raw_sync_ops = {
|
||||
.init = srcu_torture_init,
|
||||
.cleanup = srcu_torture_cleanup,
|
||||
.init = rcu_sync_torture_init,
|
||||
.readlock = srcu_torture_read_lock_raw,
|
||||
.read_delay = srcu_read_delay,
|
||||
.readunlock = srcu_torture_read_unlock_raw,
|
||||
@ -747,8 +724,7 @@ static void srcu_torture_synchronize_expedited(void)
|
||||
}
|
||||
|
||||
static struct rcu_torture_ops srcu_expedited_ops = {
|
||||
.init = srcu_torture_init,
|
||||
.cleanup = srcu_torture_cleanup,
|
||||
.init = rcu_sync_torture_init,
|
||||
.readlock = srcu_torture_read_lock,
|
||||
.read_delay = srcu_read_delay,
|
||||
.readunlock = srcu_torture_read_unlock,
|
||||
@ -783,7 +759,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
|
||||
|
||||
static struct rcu_torture_ops sched_ops = {
|
||||
.init = rcu_sync_torture_init,
|
||||
.cleanup = NULL,
|
||||
.readlock = sched_torture_read_lock,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = sched_torture_read_unlock,
|
||||
@ -799,7 +774,6 @@ static struct rcu_torture_ops sched_ops = {
|
||||
|
||||
static struct rcu_torture_ops sched_sync_ops = {
|
||||
.init = rcu_sync_torture_init,
|
||||
.cleanup = NULL,
|
||||
.readlock = sched_torture_read_lock,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = sched_torture_read_unlock,
|
||||
@ -814,7 +788,6 @@ static struct rcu_torture_ops sched_sync_ops = {
|
||||
|
||||
static struct rcu_torture_ops sched_expedited_ops = {
|
||||
.init = rcu_sync_torture_init,
|
||||
.cleanup = NULL,
|
||||
.readlock = sched_torture_read_lock,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = sched_torture_read_unlock,
|
||||
@ -1396,12 +1369,16 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
|
||||
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
|
||||
"test_boost=%d/%d test_boost_interval=%d "
|
||||
"test_boost_duration=%d shutdown_secs=%d "
|
||||
"stall_cpu=%d stall_cpu_holdoff=%d "
|
||||
"n_barrier_cbs=%d "
|
||||
"onoff_interval=%d onoff_holdoff=%d\n",
|
||||
torture_type, tag, nrealreaders, nfakewriters,
|
||||
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
|
||||
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
|
||||
test_boost, cur_ops->can_boost,
|
||||
test_boost_interval, test_boost_duration, shutdown_secs,
|
||||
stall_cpu, stall_cpu_holdoff,
|
||||
n_barrier_cbs,
|
||||
onoff_interval, onoff_holdoff);
|
||||
}
|
||||
|
||||
@ -1502,6 +1479,7 @@ rcu_torture_onoff(void *arg)
|
||||
unsigned long delta;
|
||||
int maxcpu = -1;
|
||||
DEFINE_RCU_RANDOM(rand);
|
||||
int ret;
|
||||
unsigned long starttime;
|
||||
|
||||
VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
|
||||
@ -1522,7 +1500,13 @@ rcu_torture_onoff(void *arg)
|
||||
torture_type, cpu);
|
||||
starttime = jiffies;
|
||||
n_offline_attempts++;
|
||||
if (cpu_down(cpu) == 0) {
|
||||
ret = cpu_down(cpu);
|
||||
if (ret) {
|
||||
if (verbose)
|
||||
pr_alert("%s" TORTURE_FLAG
|
||||
"rcu_torture_onoff task: offline %d failed: errno %d\n",
|
||||
torture_type, cpu, ret);
|
||||
} else {
|
||||
if (verbose)
|
||||
pr_alert("%s" TORTURE_FLAG
|
||||
"rcu_torture_onoff task: offlined %d\n",
|
||||
@ -1936,8 +1920,6 @@ rcu_torture_cleanup(void)
|
||||
|
||||
rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
|
||||
|
||||
if (cur_ops->cleanup)
|
||||
cur_ops->cleanup();
|
||||
if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
|
||||
rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
|
||||
else if (n_online_successes != n_online_attempts ||
|
||||
|
222
kernel/rcutree.c
222
kernel/rcutree.c
@ -68,9 +68,9 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
|
||||
.level = { &sname##_state.node[0] }, \
|
||||
.call = cr, \
|
||||
.fqs_state = RCU_GP_IDLE, \
|
||||
.gpnum = -300, \
|
||||
.completed = -300, \
|
||||
.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
|
||||
.gpnum = 0UL - 300UL, \
|
||||
.completed = 0UL - 300UL, \
|
||||
.orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
|
||||
.orphan_nxttail = &sname##_state.orphan_nxtlist, \
|
||||
.orphan_donetail = &sname##_state.orphan_donelist, \
|
||||
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
|
||||
@ -212,13 +212,13 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
|
||||
#endif
|
||||
};
|
||||
|
||||
static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
|
||||
static int qhimark = 10000; /* If this many pending, ignore blimit. */
|
||||
static int qlowmark = 100; /* Once only this many pending, use blimit. */
|
||||
static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
|
||||
static long qhimark = 10000; /* If this many pending, ignore blimit. */
|
||||
static long qlowmark = 100; /* Once only this many pending, use blimit. */
|
||||
|
||||
module_param(blimit, int, 0444);
|
||||
module_param(qhimark, int, 0444);
|
||||
module_param(qlowmark, int, 0444);
|
||||
module_param(blimit, long, 0444);
|
||||
module_param(qhimark, long, 0444);
|
||||
module_param(qlowmark, long, 0444);
|
||||
|
||||
int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
|
||||
int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
|
||||
@ -313,7 +313,7 @@ static int
|
||||
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
{
|
||||
return *rdp->nxttail[RCU_DONE_TAIL +
|
||||
ACCESS_ONCE(rsp->completed) != rdp->completed] &&
|
||||
(ACCESS_ONCE(rsp->completed) != rdp->completed)] &&
|
||||
!rcu_gp_in_progress(rsp);
|
||||
}
|
||||
|
||||
@ -873,6 +873,29 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
|
||||
rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump stacks of all tasks running on stalled CPUs. This is a fallback
|
||||
* for architectures that do not implement trigger_all_cpu_backtrace().
|
||||
* The NMI-triggered stack traces are more accurate because they are
|
||||
* printed by the target CPU.
|
||||
*/
|
||||
static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
struct rcu_node *rnp;
|
||||
|
||||
rcu_for_each_leaf_node(rsp, rnp) {
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
if (rnp->qsmask != 0) {
|
||||
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
|
||||
if (rnp->qsmask & (1UL << cpu))
|
||||
dump_cpu_task(rnp->grplo + cpu);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
{
|
||||
int cpu;
|
||||
@ -880,6 +903,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
unsigned long flags;
|
||||
int ndetected = 0;
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
long totqlen = 0;
|
||||
|
||||
/* Only let one CPU complain about others per time interval. */
|
||||
|
||||
@ -924,12 +948,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
|
||||
print_cpu_stall_info_end();
|
||||
printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n",
|
||||
smp_processor_id(), (long)(jiffies - rsp->gp_start));
|
||||
for_each_possible_cpu(cpu)
|
||||
totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
|
||||
pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
|
||||
smp_processor_id(), (long)(jiffies - rsp->gp_start),
|
||||
rsp->gpnum, rsp->completed, totqlen);
|
||||
if (ndetected == 0)
|
||||
printk(KERN_ERR "INFO: Stall ended before state dump start\n");
|
||||
else if (!trigger_all_cpu_backtrace())
|
||||
dump_stack();
|
||||
rcu_dump_cpu_stacks(rsp);
|
||||
|
||||
/* Complain about tasks blocking the grace period. */
|
||||
|
||||
@ -940,8 +967,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
|
||||
static void print_cpu_stall(struct rcu_state *rsp)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
long totqlen = 0;
|
||||
|
||||
/*
|
||||
* OK, time to rat on ourselves...
|
||||
@ -952,7 +981,10 @@ static void print_cpu_stall(struct rcu_state *rsp)
|
||||
print_cpu_stall_info_begin();
|
||||
print_cpu_stall_info(rsp, smp_processor_id());
|
||||
print_cpu_stall_info_end();
|
||||
printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start);
|
||||
for_each_possible_cpu(cpu)
|
||||
totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
|
||||
pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
|
||||
jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
|
||||
if (!trigger_all_cpu_backtrace())
|
||||
dump_stack();
|
||||
|
||||
@ -1404,15 +1436,37 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
|
||||
!cpu_needs_another_gp(rsp, rdp)) {
|
||||
/*
|
||||
* Either we have not yet spawned the grace-period
|
||||
* task or this CPU does not need another grace period.
|
||||
* task, this CPU does not need another grace period,
|
||||
* or a grace period is already in progress.
|
||||
* Either way, don't start a new grace period.
|
||||
*/
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because there is no grace period in progress right now,
|
||||
* any callbacks we have up to this point will be satisfied
|
||||
* by the next grace period. So promote all callbacks to be
|
||||
* handled after the end of the next grace period. If the
|
||||
* CPU is not yet aware of the end of the previous grace period,
|
||||
* we need to allow for the callback advancement that will
|
||||
* occur when it does become aware. Deadlock prevents us from
|
||||
* making it aware at this point: We cannot acquire a leaf
|
||||
* rcu_node ->lock while holding the root rcu_node ->lock.
|
||||
*/
|
||||
rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
|
||||
if (rdp->completed == rsp->completed)
|
||||
rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
|
||||
|
||||
rsp->gp_flags = RCU_GP_FLAG_INIT;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
|
||||
|
||||
/* Ensure that CPU is aware of completion of last grace period. */
|
||||
rcu_process_gp_end(rsp, rdp);
|
||||
local_irq_restore(flags);
|
||||
|
||||
/* Wake up rcu_gp_kthread() to start the grace period. */
|
||||
wake_up(&rsp->gp_wq);
|
||||
}
|
||||
|
||||
@ -1573,7 +1627,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
/*
|
||||
* Send the specified CPU's RCU callbacks to the orphanage. The
|
||||
* specified CPU must be offline, and the caller must hold the
|
||||
* ->onofflock.
|
||||
* ->orphan_lock.
|
||||
*/
|
||||
static void
|
||||
rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
|
||||
@ -1581,8 +1635,8 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
|
||||
{
|
||||
/*
|
||||
* Orphan the callbacks. First adjust the counts. This is safe
|
||||
* because ->onofflock excludes _rcu_barrier()'s adoption of
|
||||
* the callbacks, thus no memory barrier is required.
|
||||
* because _rcu_barrier() excludes CPU-hotplug operations, so it
|
||||
* cannot be running now. Thus no memory barrier is required.
|
||||
*/
|
||||
if (rdp->nxtlist != NULL) {
|
||||
rsp->qlen_lazy += rdp->qlen_lazy;
|
||||
@ -1623,7 +1677,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
|
||||
|
||||
/*
|
||||
* Adopt the RCU callbacks from the specified rcu_state structure's
|
||||
* orphanage. The caller must hold the ->onofflock.
|
||||
* orphanage. The caller must hold the ->orphan_lock.
|
||||
*/
|
||||
static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
|
||||
{
|
||||
@ -1702,7 +1756,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
|
||||
|
||||
/* Exclude any attempts to start a new grace period. */
|
||||
mutex_lock(&rsp->onoff_mutex);
|
||||
raw_spin_lock_irqsave(&rsp->onofflock, flags);
|
||||
raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
|
||||
|
||||
/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
|
||||
rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
|
||||
@ -1729,10 +1783,10 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
|
||||
/*
|
||||
* We still hold the leaf rcu_node structure lock here, and
|
||||
* irqs are still disabled. The reason for this subterfuge is
|
||||
* because invoking rcu_report_unblock_qs_rnp() with ->onofflock
|
||||
* because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
|
||||
* held leads to deadlock.
|
||||
*/
|
||||
raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
|
||||
raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
|
||||
rnp = rdp->mynode;
|
||||
if (need_report & RCU_OFL_TASKS_NORM_GP)
|
||||
rcu_report_unblock_qs_rnp(rnp, flags);
|
||||
@ -1769,7 +1823,8 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_head *next, *list, **tail;
|
||||
int bl, count, count_lazy, i;
|
||||
long bl, count, count_lazy;
|
||||
int i;
|
||||
|
||||
/* If no callbacks are ready, just return.*/
|
||||
if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
|
||||
@ -2205,10 +2260,28 @@ static inline int rcu_blocking_is_gp(void)
|
||||
* rcu_read_lock_sched().
|
||||
*
|
||||
* This means that all preempt_disable code sequences, including NMI and
|
||||
* hardware-interrupt handlers, in progress on entry will have completed
|
||||
* before this primitive returns. However, this does not guarantee that
|
||||
* softirq handlers will have completed, since in some kernels, these
|
||||
* handlers can run in process context, and can block.
|
||||
* non-threaded hardware-interrupt handlers, in progress on entry will
|
||||
* have completed before this primitive returns. However, this does not
|
||||
* guarantee that softirq handlers will have completed, since in some
|
||||
* kernels, these handlers can run in process context, and can block.
|
||||
*
|
||||
* Note that this guarantee implies further memory-ordering guarantees.
|
||||
* On systems with more than one CPU, when synchronize_sched() returns,
|
||||
* each CPU is guaranteed to have executed a full memory barrier since the
|
||||
* end of its last RCU-sched read-side critical section whose beginning
|
||||
* preceded the call to synchronize_sched(). In addition, each CPU having
|
||||
* an RCU read-side critical section that extends beyond the return from
|
||||
* synchronize_sched() is guaranteed to have executed a full memory barrier
|
||||
* after the beginning of synchronize_sched() and before the beginning of
|
||||
* that RCU read-side critical section. Note that these guarantees include
|
||||
* CPUs that are offline, idle, or executing in user mode, as well as CPUs
|
||||
* that are executing in the kernel.
|
||||
*
|
||||
* Furthermore, if CPU A invoked synchronize_sched(), which returned
|
||||
* to its caller on CPU B, then both CPU A and CPU B are guaranteed
|
||||
* to have executed a full memory barrier during the execution of
|
||||
* synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
|
||||
* again only if the system has more than one CPU).
|
||||
*
|
||||
* This primitive provides the guarantees made by the (now removed)
|
||||
* synchronize_kernel() API. In contrast, synchronize_rcu() only
|
||||
@ -2224,7 +2297,10 @@ void synchronize_sched(void)
|
||||
"Illegal synchronize_sched() in RCU-sched read-side critical section");
|
||||
if (rcu_blocking_is_gp())
|
||||
return;
|
||||
wait_rcu_gp(call_rcu_sched);
|
||||
if (rcu_expedited)
|
||||
synchronize_sched_expedited();
|
||||
else
|
||||
wait_rcu_gp(call_rcu_sched);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_sched);
|
||||
|
||||
@ -2236,6 +2312,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
|
||||
* and may be nested.
|
||||
*
|
||||
* See the description of synchronize_sched() for more detailed information
|
||||
* on memory ordering guarantees.
|
||||
*/
|
||||
void synchronize_rcu_bh(void)
|
||||
{
|
||||
@ -2245,13 +2324,13 @@ void synchronize_rcu_bh(void)
|
||||
"Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
|
||||
if (rcu_blocking_is_gp())
|
||||
return;
|
||||
wait_rcu_gp(call_rcu_bh);
|
||||
if (rcu_expedited)
|
||||
synchronize_rcu_bh_expedited();
|
||||
else
|
||||
wait_rcu_gp(call_rcu_bh);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
|
||||
|
||||
static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
|
||||
static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
|
||||
|
||||
static int synchronize_sched_expedited_cpu_stop(void *data)
|
||||
{
|
||||
/*
|
||||
@ -2308,10 +2387,32 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
|
||||
*/
|
||||
void synchronize_sched_expedited(void)
|
||||
{
|
||||
int firstsnap, s, snap, trycount = 0;
|
||||
long firstsnap, s, snap;
|
||||
int trycount = 0;
|
||||
struct rcu_state *rsp = &rcu_sched_state;
|
||||
|
||||
/* Note that atomic_inc_return() implies full memory barrier. */
|
||||
firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
|
||||
/*
|
||||
* If we are in danger of counter wrap, just do synchronize_sched().
|
||||
* By allowing sync_sched_expedited_started to advance no more than
|
||||
* ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
|
||||
* that more than 3.5 billion CPUs would be required to force a
|
||||
* counter wrap on a 32-bit system. Quite a few more CPUs would of
|
||||
* course be required on a 64-bit system.
|
||||
*/
|
||||
if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
|
||||
(ulong)atomic_long_read(&rsp->expedited_done) +
|
||||
ULONG_MAX / 8)) {
|
||||
synchronize_sched();
|
||||
atomic_long_inc(&rsp->expedited_wrap);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a ticket. Note that atomic_inc_return() implies a
|
||||
* full memory barrier.
|
||||
*/
|
||||
snap = atomic_long_inc_return(&rsp->expedited_start);
|
||||
firstsnap = snap;
|
||||
get_online_cpus();
|
||||
WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
|
||||
|
||||
@ -2323,48 +2424,65 @@ void synchronize_sched_expedited(void)
|
||||
synchronize_sched_expedited_cpu_stop,
|
||||
NULL) == -EAGAIN) {
|
||||
put_online_cpus();
|
||||
atomic_long_inc(&rsp->expedited_tryfail);
|
||||
|
||||
/* Check to see if someone else did our work for us. */
|
||||
s = atomic_long_read(&rsp->expedited_done);
|
||||
if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
|
||||
/* ensure test happens before caller kfree */
|
||||
smp_mb__before_atomic_inc(); /* ^^^ */
|
||||
atomic_long_inc(&rsp->expedited_workdone1);
|
||||
return;
|
||||
}
|
||||
|
||||
/* No joy, try again later. Or just synchronize_sched(). */
|
||||
if (trycount++ < 10) {
|
||||
udelay(trycount * num_online_cpus());
|
||||
} else {
|
||||
synchronize_sched();
|
||||
wait_rcu_gp(call_rcu_sched);
|
||||
atomic_long_inc(&rsp->expedited_normal);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check to see if someone else did our work for us. */
|
||||
s = atomic_read(&sync_sched_expedited_done);
|
||||
if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
|
||||
smp_mb(); /* ensure test happens before caller kfree */
|
||||
/* Recheck to see if someone else did our work for us. */
|
||||
s = atomic_long_read(&rsp->expedited_done);
|
||||
if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
|
||||
/* ensure test happens before caller kfree */
|
||||
smp_mb__before_atomic_inc(); /* ^^^ */
|
||||
atomic_long_inc(&rsp->expedited_workdone2);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Refetching sync_sched_expedited_started allows later
|
||||
* callers to piggyback on our grace period. We subtract
|
||||
* 1 to get the same token that the last incrementer got.
|
||||
* We retry after they started, so our grace period works
|
||||
* for them, and they started after our first try, so their
|
||||
* grace period works for us.
|
||||
* callers to piggyback on our grace period. We retry
|
||||
* after they started, so our grace period works for them,
|
||||
* and they started after our first try, so their grace
|
||||
* period works for us.
|
||||
*/
|
||||
get_online_cpus();
|
||||
snap = atomic_read(&sync_sched_expedited_started);
|
||||
snap = atomic_long_read(&rsp->expedited_start);
|
||||
smp_mb(); /* ensure read is before try_stop_cpus(). */
|
||||
}
|
||||
atomic_long_inc(&rsp->expedited_stoppedcpus);
|
||||
|
||||
/*
|
||||
* Everyone up to our most recent fetch is covered by our grace
|
||||
* period. Update the counter, but only if our work is still
|
||||
* relevant -- which it won't be if someone who started later
|
||||
* than we did beat us to the punch.
|
||||
* than we did already did their update.
|
||||
*/
|
||||
do {
|
||||
s = atomic_read(&sync_sched_expedited_done);
|
||||
if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
|
||||
smp_mb(); /* ensure test happens before caller kfree */
|
||||
atomic_long_inc(&rsp->expedited_done_tries);
|
||||
s = atomic_long_read(&rsp->expedited_done);
|
||||
if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
|
||||
/* ensure test happens before caller kfree */
|
||||
smp_mb__before_atomic_inc(); /* ^^^ */
|
||||
atomic_long_inc(&rsp->expedited_done_lost);
|
||||
break;
|
||||
}
|
||||
} while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
|
||||
} while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
|
||||
atomic_long_inc(&rsp->expedited_done_exit);
|
||||
|
||||
put_online_cpus();
|
||||
}
|
||||
|
@ -383,9 +383,8 @@ struct rcu_state {
|
||||
|
||||
/* End of fields guarded by root rcu_node's lock. */
|
||||
|
||||
raw_spinlock_t onofflock ____cacheline_internodealigned_in_smp;
|
||||
/* exclude on/offline and */
|
||||
/* starting new GP. */
|
||||
raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
|
||||
/* Protect following fields. */
|
||||
struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */
|
||||
/* need a grace period. */
|
||||
struct rcu_head **orphan_nxttail; /* Tail of above. */
|
||||
@ -394,7 +393,7 @@ struct rcu_state {
|
||||
struct rcu_head **orphan_donetail; /* Tail of above. */
|
||||
long qlen_lazy; /* Number of lazy callbacks. */
|
||||
long qlen; /* Total number of callbacks. */
|
||||
/* End of fields guarded by onofflock. */
|
||||
/* End of fields guarded by orphan_lock. */
|
||||
|
||||
struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */
|
||||
|
||||
@ -405,6 +404,18 @@ struct rcu_state {
|
||||
/* _rcu_barrier(). */
|
||||
/* End of fields guarded by barrier_mutex. */
|
||||
|
||||
atomic_long_t expedited_start; /* Starting ticket. */
|
||||
atomic_long_t expedited_done; /* Done ticket. */
|
||||
atomic_long_t expedited_wrap; /* # near-wrap incidents. */
|
||||
atomic_long_t expedited_tryfail; /* # acquisition failures. */
|
||||
atomic_long_t expedited_workdone1; /* # done by others #1. */
|
||||
atomic_long_t expedited_workdone2; /* # done by others #2. */
|
||||
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
||||
atomic_long_t expedited_stoppedcpus; /* # successful stop_cpus. */
|
||||
atomic_long_t expedited_done_tries; /* # tries to update _done. */
|
||||
atomic_long_t expedited_done_lost; /* # times beaten to _done. */
|
||||
atomic_long_t expedited_done_exit; /* # times exited _done loop. */
|
||||
|
||||
unsigned long jiffies_force_qs; /* Time at which to invoke */
|
||||
/* force_quiescent_state(). */
|
||||
unsigned long n_force_qs; /* Number of calls to */
|
||||
|
@ -670,6 +670,9 @@ EXPORT_SYMBOL_GPL(kfree_call_rcu);
|
||||
* concurrently with new RCU read-side critical sections that began while
|
||||
* synchronize_rcu() was waiting. RCU read-side critical sections are
|
||||
* delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
|
||||
*
|
||||
* See the description of synchronize_sched() for more detailed information
|
||||
* on memory ordering guarantees.
|
||||
*/
|
||||
void synchronize_rcu(void)
|
||||
{
|
||||
@ -679,7 +682,10 @@ void synchronize_rcu(void)
|
||||
"Illegal synchronize_rcu() in RCU read-side critical section");
|
||||
if (!rcu_scheduler_active)
|
||||
return;
|
||||
wait_rcu_gp(call_rcu);
|
||||
if (rcu_expedited)
|
||||
synchronize_rcu_expedited();
|
||||
else
|
||||
wait_rcu_gp(call_rcu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
||||
|
||||
@ -757,7 +763,8 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
* grace period for the specified rcu_node structure. If there are no such
|
||||
* tasks, report it up the rcu_node hierarchy.
|
||||
*
|
||||
* Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
|
||||
* Caller must hold sync_rcu_preempt_exp_mutex and must exclude
|
||||
* CPU hotplug operations.
|
||||
*/
|
||||
static void
|
||||
sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
@ -831,7 +838,7 @@ void synchronize_rcu_expedited(void)
|
||||
udelay(trycount * num_online_cpus());
|
||||
} else {
|
||||
put_online_cpus();
|
||||
synchronize_rcu();
|
||||
wait_rcu_gp(call_rcu);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -875,6 +882,11 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
|
||||
|
||||
/**
|
||||
* rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
|
||||
*
|
||||
* Note that this primitive does not necessarily wait for an RCU grace period
|
||||
* to complete. For example, if there are no RCU callbacks queued anywhere
|
||||
* in the system, then rcu_barrier() is within its rights to return
|
||||
* immediately, without waiting for anything, much less an RCU grace period.
|
||||
*/
|
||||
void rcu_barrier(void)
|
||||
{
|
||||
|
@ -46,29 +46,58 @@
|
||||
#define RCU_TREE_NONCORE
|
||||
#include "rcutree.h"
|
||||
|
||||
static int show_rcubarrier(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
#define ulong2long(a) (*(long *)(&(a)))
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
seq_printf(m, "%s: bcc: %d nbd: %lu\n",
|
||||
rsp->name,
|
||||
atomic_read(&rsp->barrier_cpu_count),
|
||||
rsp->n_barrier_done);
|
||||
static int r_open(struct inode *inode, struct file *file,
|
||||
const struct seq_operations *op)
|
||||
{
|
||||
int ret = seq_open(file, op);
|
||||
if (!ret) {
|
||||
struct seq_file *m = (struct seq_file *)file->private_data;
|
||||
m->private = inode->i_private;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *r_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
||||
*pos = cpumask_next(*pos - 1, cpu_possible_mask);
|
||||
if ((*pos) < nr_cpu_ids)
|
||||
return per_cpu_ptr(rsp->rda, *pos);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *r_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
(*pos)++;
|
||||
return r_start(m, pos);
|
||||
}
|
||||
|
||||
static void r_stop(struct seq_file *m, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
static int show_rcubarrier(struct seq_file *m, void *v)
|
||||
{
|
||||
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
||||
seq_printf(m, "bcc: %d nbd: %lu\n",
|
||||
atomic_read(&rsp->barrier_cpu_count),
|
||||
rsp->n_barrier_done);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rcubarrier_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, show_rcubarrier, NULL);
|
||||
return single_open(file, show_rcubarrier, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations rcubarrier_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rcubarrier_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
.llseek = no_llseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
@ -86,10 +115,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
|
||||
{
|
||||
if (!rdp->beenonline)
|
||||
return;
|
||||
seq_printf(m, "%3d%cc=%lu g=%lu pq=%d qp=%d",
|
||||
seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d",
|
||||
rdp->cpu,
|
||||
cpu_is_offline(rdp->cpu) ? '!' : ' ',
|
||||
rdp->completed, rdp->gpnum,
|
||||
ulong2long(rdp->completed), ulong2long(rdp->gpnum),
|
||||
rdp->passed_quiesce, rdp->qs_pending);
|
||||
seq_printf(m, " dt=%d/%llx/%d df=%lu",
|
||||
atomic_read(&rdp->dynticks->dynticks),
|
||||
@ -118,97 +147,62 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
|
||||
rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
|
||||
}
|
||||
|
||||
static int show_rcudata(struct seq_file *m, void *unused)
|
||||
static int show_rcudata(struct seq_file *m, void *v)
|
||||
{
|
||||
int cpu;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp) {
|
||||
seq_printf(m, "%s:\n", rsp->name);
|
||||
for_each_possible_cpu(cpu)
|
||||
print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu));
|
||||
}
|
||||
print_one_rcu_data(m, (struct rcu_data *)v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations rcudate_op = {
|
||||
.start = r_start,
|
||||
.next = r_next,
|
||||
.stop = r_stop,
|
||||
.show = show_rcudata,
|
||||
};
|
||||
|
||||
static int rcudata_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, show_rcudata, NULL);
|
||||
return r_open(inode, file, &rcudate_op);
|
||||
}
|
||||
|
||||
static const struct file_operations rcudata_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rcudata_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
.llseek = no_llseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
|
||||
static int show_rcuexp(struct seq_file *m, void *v)
|
||||
{
|
||||
if (!rdp->beenonline)
|
||||
return;
|
||||
seq_printf(m, "%d,%s,%lu,%lu,%d,%d",
|
||||
rdp->cpu,
|
||||
cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"",
|
||||
rdp->completed, rdp->gpnum,
|
||||
rdp->passed_quiesce, rdp->qs_pending);
|
||||
seq_printf(m, ",%d,%llx,%d,%lu",
|
||||
atomic_read(&rdp->dynticks->dynticks),
|
||||
rdp->dynticks->dynticks_nesting,
|
||||
rdp->dynticks->dynticks_nmi_nesting,
|
||||
rdp->dynticks_fqs);
|
||||
seq_printf(m, ",%lu", rdp->offline_fqs);
|
||||
seq_printf(m, ",%ld,%ld,\"%c%c%c%c\"", rdp->qlen_lazy, rdp->qlen,
|
||||
".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
|
||||
rdp->nxttail[RCU_NEXT_TAIL]],
|
||||
".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
|
||||
rdp->nxttail[RCU_NEXT_READY_TAIL]],
|
||||
".W"[rdp->nxttail[RCU_DONE_TAIL] !=
|
||||
rdp->nxttail[RCU_WAIT_TAIL]],
|
||||
".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
seq_printf(m, ",%d,\"%c\"",
|
||||
per_cpu(rcu_cpu_has_work, rdp->cpu),
|
||||
convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
|
||||
rdp->cpu)));
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
seq_printf(m, ",%ld", rdp->blimit);
|
||||
seq_printf(m, ",%lu,%lu,%lu\n",
|
||||
rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
|
||||
}
|
||||
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
||||
|
||||
static int show_rcudata_csv(struct seq_file *m, void *unused)
|
||||
{
|
||||
int cpu;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pq\",");
|
||||
seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
|
||||
seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\"");
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
seq_puts(m, "\"kt\",\"ktl\"");
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n");
|
||||
for_each_rcu_flavor(rsp) {
|
||||
seq_printf(m, "\"%s:\"\n", rsp->name);
|
||||
for_each_possible_cpu(cpu)
|
||||
print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu));
|
||||
}
|
||||
seq_printf(m, "s=%lu d=%lu w=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu dt=%lu dl=%lu dx=%lu\n",
|
||||
atomic_long_read(&rsp->expedited_start),
|
||||
atomic_long_read(&rsp->expedited_done),
|
||||
atomic_long_read(&rsp->expedited_wrap),
|
||||
atomic_long_read(&rsp->expedited_tryfail),
|
||||
atomic_long_read(&rsp->expedited_workdone1),
|
||||
atomic_long_read(&rsp->expedited_workdone2),
|
||||
atomic_long_read(&rsp->expedited_normal),
|
||||
atomic_long_read(&rsp->expedited_stoppedcpus),
|
||||
atomic_long_read(&rsp->expedited_done_tries),
|
||||
atomic_long_read(&rsp->expedited_done_lost),
|
||||
atomic_long_read(&rsp->expedited_done_exit));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rcudata_csv_open(struct inode *inode, struct file *file)
|
||||
static int rcuexp_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, show_rcudata_csv, NULL);
|
||||
return single_open(file, show_rcuexp, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations rcudata_csv_fops = {
|
||||
static const struct file_operations rcuexp_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rcudata_csv_open,
|
||||
.open = rcuexp_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
.llseek = no_llseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
@ -254,27 +248,11 @@ static const struct file_operations rcu_node_boost_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rcu_node_boost_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.llseek = no_llseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
/*
|
||||
* Create the rcuboost debugfs entry. Standard error return.
|
||||
*/
|
||||
static int rcu_boost_trace_create_file(struct dentry *rcudir)
|
||||
{
|
||||
return !debugfs_create_file("rcuboost", 0444, rcudir, NULL,
|
||||
&rcu_node_boost_fops);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_RCU_BOOST */
|
||||
|
||||
static int rcu_boost_trace_create_file(struct dentry *rcudir)
|
||||
{
|
||||
return 0; /* There cannot be an error if we didn't create it! */
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_RCU_BOOST */
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
|
||||
static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
|
||||
{
|
||||
@ -283,8 +261,9 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
|
||||
struct rcu_node *rnp;
|
||||
|
||||
gpnum = rsp->gpnum;
|
||||
seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x ",
|
||||
rsp->name, rsp->completed, gpnum, rsp->fqs_state,
|
||||
seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ",
|
||||
ulong2long(rsp->completed), ulong2long(gpnum),
|
||||
rsp->fqs_state,
|
||||
(long)(rsp->jiffies_force_qs - jiffies),
|
||||
(int)(jiffies & 0xffff));
|
||||
seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
|
||||
@ -306,26 +285,24 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
|
||||
seq_puts(m, "\n");
|
||||
}
|
||||
|
||||
static int show_rcuhier(struct seq_file *m, void *unused)
|
||||
static int show_rcuhier(struct seq_file *m, void *v)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
print_one_rcu_state(m, rsp);
|
||||
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
||||
print_one_rcu_state(m, rsp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rcuhier_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, show_rcuhier, NULL);
|
||||
return single_open(file, show_rcuhier, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations rcuhier_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rcuhier_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
.llseek = no_llseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
|
||||
@ -338,42 +315,42 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
|
||||
struct rcu_node *rnp = &rsp->node[0];
|
||||
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
completed = rsp->completed;
|
||||
gpnum = rsp->gpnum;
|
||||
if (rsp->completed == rsp->gpnum)
|
||||
completed = ACCESS_ONCE(rsp->completed);
|
||||
gpnum = ACCESS_ONCE(rsp->gpnum);
|
||||
if (completed == gpnum)
|
||||
gpage = 0;
|
||||
else
|
||||
gpage = jiffies - rsp->gp_start;
|
||||
gpmax = rsp->gp_max;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
seq_printf(m, "%s: completed=%ld gpnum=%lu age=%ld max=%ld\n",
|
||||
rsp->name, completed, gpnum, gpage, gpmax);
|
||||
seq_printf(m, "completed=%ld gpnum=%ld age=%ld max=%ld\n",
|
||||
ulong2long(completed), ulong2long(gpnum), gpage, gpmax);
|
||||
}
|
||||
|
||||
static int show_rcugp(struct seq_file *m, void *unused)
|
||||
static int show_rcugp(struct seq_file *m, void *v)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
show_one_rcugp(m, rsp);
|
||||
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
||||
show_one_rcugp(m, rsp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rcugp_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, show_rcugp, NULL);
|
||||
return single_open(file, show_rcugp, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations rcugp_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rcugp_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
.llseek = no_llseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
|
||||
{
|
||||
if (!rdp->beenonline)
|
||||
return;
|
||||
seq_printf(m, "%3d%cnp=%ld ",
|
||||
rdp->cpu,
|
||||
cpu_is_offline(rdp->cpu) ? '!' : ' ',
|
||||
@ -389,34 +366,30 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
|
||||
rdp->n_rp_need_nothing);
|
||||
}
|
||||
|
||||
static int show_rcu_pending(struct seq_file *m, void *unused)
|
||||
static int show_rcu_pending(struct seq_file *m, void *v)
|
||||
{
|
||||
int cpu;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp) {
|
||||
seq_printf(m, "%s:\n", rsp->name);
|
||||
for_each_possible_cpu(cpu) {
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
if (rdp->beenonline)
|
||||
print_one_rcu_pending(m, rdp);
|
||||
}
|
||||
}
|
||||
print_one_rcu_pending(m, (struct rcu_data *)v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations rcu_pending_op = {
|
||||
.start = r_start,
|
||||
.next = r_next,
|
||||
.stop = r_stop,
|
||||
.show = show_rcu_pending,
|
||||
};
|
||||
|
||||
static int rcu_pending_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, show_rcu_pending, NULL);
|
||||
return r_open(inode, file, &rcu_pending_op);
|
||||
}
|
||||
|
||||
static const struct file_operations rcu_pending_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rcu_pending_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
.llseek = no_llseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static int show_rcutorture(struct seq_file *m, void *unused)
|
||||
@ -446,43 +419,58 @@ static struct dentry *rcudir;
|
||||
|
||||
static int __init rcutree_trace_init(void)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
struct dentry *retval;
|
||||
struct dentry *rspdir;
|
||||
|
||||
rcudir = debugfs_create_dir("rcu", NULL);
|
||||
if (!rcudir)
|
||||
goto free_out;
|
||||
|
||||
retval = debugfs_create_file("rcubarrier", 0444, rcudir,
|
||||
NULL, &rcubarrier_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
for_each_rcu_flavor(rsp) {
|
||||
rspdir = debugfs_create_dir(rsp->name, rcudir);
|
||||
if (!rspdir)
|
||||
goto free_out;
|
||||
|
||||
retval = debugfs_create_file("rcudata", 0444, rcudir,
|
||||
NULL, &rcudata_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
retval = debugfs_create_file("rcudata", 0444,
|
||||
rspdir, rsp, &rcudata_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
|
||||
retval = debugfs_create_file("rcudata.csv", 0444, rcudir,
|
||||
NULL, &rcudata_csv_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
retval = debugfs_create_file("rcuexp", 0444,
|
||||
rspdir, rsp, &rcuexp_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
|
||||
if (rcu_boost_trace_create_file(rcudir))
|
||||
goto free_out;
|
||||
retval = debugfs_create_file("rcu_pending", 0444,
|
||||
rspdir, rsp, &rcu_pending_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
|
||||
retval = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
retval = debugfs_create_file("rcubarrier", 0444,
|
||||
rspdir, rsp, &rcubarrier_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
|
||||
retval = debugfs_create_file("rcuhier", 0444, rcudir,
|
||||
NULL, &rcuhier_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
if (rsp == &rcu_preempt_state) {
|
||||
retval = debugfs_create_file("rcuboost", 0444,
|
||||
rspdir, NULL, &rcu_node_boost_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
}
|
||||
#endif
|
||||
|
||||
retval = debugfs_create_file("rcu_pending", 0444, rcudir,
|
||||
NULL, &rcu_pending_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
retval = debugfs_create_file("rcugp", 0444,
|
||||
rspdir, rsp, &rcugp_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
|
||||
retval = debugfs_create_file("rcuhier", 0444,
|
||||
rspdir, rsp, &rcuhier_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
retval = debugfs_create_file("rcutorture", 0444, rcudir,
|
||||
NULL, &rcutorture_fops);
|
||||
|
@ -1887,7 +1887,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
||||
#endif
|
||||
|
||||
/* Here we just switch the register state and the stack. */
|
||||
rcu_switch(prev, next);
|
||||
rcu_user_hooks_switch(prev, next);
|
||||
switch_to(prev, next, prev);
|
||||
|
||||
barrier();
|
||||
@ -8076,3 +8076,9 @@ struct cgroup_subsys cpuacct_subsys = {
|
||||
.base_cftypes = files,
|
||||
};
|
||||
#endif /* CONFIG_CGROUP_CPUACCT */
|
||||
|
||||
void dump_cpu_task(int cpu)
|
||||
{
|
||||
pr_info("Task dump for CPU %d:\n", cpu);
|
||||
sched_show_task(cpu_curr(cpu));
|
||||
}
|
||||
|
@ -16,8 +16,10 @@
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2006
|
||||
* Copyright (C) Fujitsu, 2012
|
||||
*
|
||||
* Author: Paul McKenney <paulmck@us.ibm.com>
|
||||
* Lai Jiangshan <laijs@cn.fujitsu.com>
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU/ *.txt
|
||||
@ -34,6 +36,10 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/srcu.h>
|
||||
|
||||
#include <trace/events/rcu.h>
|
||||
|
||||
#include "rcu.h"
|
||||
|
||||
/*
|
||||
* Initialize an rcu_batch structure to empty.
|
||||
*/
|
||||
@ -92,9 +98,6 @@ static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
|
||||
}
|
||||
}
|
||||
|
||||
/* single-thread state-machine */
|
||||
static void process_srcu(struct work_struct *work);
|
||||
|
||||
static int init_srcu_struct_fields(struct srcu_struct *sp)
|
||||
{
|
||||
sp->completed = 0;
|
||||
@ -464,7 +467,9 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
|
||||
*/
|
||||
void synchronize_srcu(struct srcu_struct *sp)
|
||||
{
|
||||
__synchronize_srcu(sp, SYNCHRONIZE_SRCU_TRYCOUNT);
|
||||
__synchronize_srcu(sp, rcu_expedited
|
||||
? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
|
||||
: SYNCHRONIZE_SRCU_TRYCOUNT);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_srcu);
|
||||
|
||||
@ -637,7 +642,7 @@ static void srcu_reschedule(struct srcu_struct *sp)
|
||||
/*
|
||||
* This is the work-queue function that handles SRCU grace periods.
|
||||
*/
|
||||
static void process_srcu(struct work_struct *work)
|
||||
void process_srcu(struct work_struct *work)
|
||||
{
|
||||
struct srcu_struct *sp;
|
||||
|
||||
@ -648,3 +653,4 @@ static void process_srcu(struct work_struct *work)
|
||||
srcu_invoke_callbacks(sp);
|
||||
srcu_reschedule(sp);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(process_srcu);
|
||||
|
@ -972,7 +972,7 @@ config RCU_CPU_STALL_TIMEOUT
|
||||
int "RCU CPU stall timeout in seconds"
|
||||
depends on TREE_RCU || TREE_PREEMPT_RCU
|
||||
range 3 300
|
||||
default 60
|
||||
default 21
|
||||
help
|
||||
If a given RCU grace period extends more than the specified
|
||||
number of seconds, a CPU stall warning is printed. If the
|
||||
|
Loading…
Reference in New Issue
Block a user