mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-20 01:27:56 +07:00
daebf24a8e
Currently the Linux Kernel Memory Model gives an incorrect response for the following litmus test: C plain-WWC {} P0(int *x) { WRITE_ONCE(*x, 2); } P1(int *x, int *y) { int r1; int r2; int r3; r1 = READ_ONCE(*x); if (r1 == 2) { smp_rmb(); r2 = *x; } smp_rmb(); r3 = READ_ONCE(*x); WRITE_ONCE(*y, r3 - 1); } P2(int *x, int *y) { int r4; r4 = READ_ONCE(*y); if (r4 > 0) WRITE_ONCE(*x, 1); } exists (x=2 /\ 1:r2=2 /\ 2:r4=1) The memory model says that the plain read of *x in P1 races with the WRITE_ONCE(*x) in P2. The problem is that we have a write W and a read R related by neither fre or rfe, but rather W ->coe W' ->rfe R, where W' is an intermediate write (the WRITE_ONCE() in P0). In this situation there is no particular ordering between W and R, so either a wr-vis link from W to R or an rw-xbstar link from R to W would prove that the accesses aren't concurrent. But the LKMM only looks for a wr-vis link, which is equivalent to assuming that W must execute before R. This is not necessarily true on non-multicopy-atomic systems, as the WWC pattern demonstrates. This patch changes the LKMM to accept either a wr-vis or a reverse rw-xbstar link as a proof of non-concurrency. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Andrea Parri <parri.andrea@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
204 lines
7.1 KiB
Plaintext
204 lines
7.1 KiB
Plaintext
// SPDX-License-Identifier: GPL-2.0+
|
|
(*
|
|
* Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
|
|
* Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
|
|
* Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
|
|
* Andrea Parri <parri.andrea@gmail.com>
|
|
*
|
|
* An earlier version of this file appeared in the companion webpage for
|
|
* "Frightening small children and disconcerting grown-ups: Concurrency
|
|
* in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
|
|
* which appeared in ASPLOS 2018.
|
|
*)
|
|
|
|
"Linux-kernel memory consistency model"
|
|
|
|
(*
|
|
* File "lock.cat" handles locks and is experimental.
|
|
* It can be replaced by include "cos.cat" for tests that do not use locks.
|
|
*)
|
|
|
|
include "lock.cat"
|
|
|
|
(*******************)
|
|
(* Basic relations *)
|
|
(*******************)
|
|
|
|
(* Release Acquire *)
|
|
let acq-po = [Acquire] ; po ; [M]
|
|
let po-rel = [M] ; po ; [Release]
|
|
let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
|
|
|
|
(* Fences *)
|
|
let R4rmb = R \ Noreturn (* Reads for which rmb works *)
|
|
let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb]
|
|
let wmb = [W] ; fencerel(Wmb) ; [W]
|
|
let mb = ([M] ; fencerel(Mb) ; [M]) |
|
|
([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
|
|
([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) |
|
|
([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
|
|
([M] ; po ; [UL] ; (co | po) ; [LKW] ;
|
|
fencerel(After-unlock-lock) ; [M])
|
|
let gp = po ; [Sync-rcu | Sync-srcu] ; po?
|
|
let strong-fence = mb | gp
|
|
|
|
let nonrw-fence = strong-fence | po-rel | acq-po
|
|
let fence = nonrw-fence | wmb | rmb
|
|
let barrier = fencerel(Barrier | Rmb | Wmb | Mb | Sync-rcu | Sync-srcu |
|
|
Before-atomic | After-atomic | Acquire | Release |
|
|
Rcu-lock | Rcu-unlock | Srcu-lock | Srcu-unlock) |
|
|
(po ; [Release]) | ([Acquire] ; po)
|
|
|
|
(**********************************)
|
|
(* Fundamental coherence ordering *)
|
|
(**********************************)
|
|
|
|
(* Sequential Consistency Per Variable *)
|
|
let com = rf | co | fr
|
|
acyclic po-loc | com as coherence
|
|
|
|
(* Atomic Read-Modify-Write *)
|
|
empty rmw & (fre ; coe) as atomic
|
|
|
|
(**********************************)
|
|
(* Instruction execution ordering *)
|
|
(**********************************)
|
|
|
|
(* Preserved Program Order *)
|
|
let dep = addr | data
|
|
let rwdep = (dep | ctrl) ; [W]
|
|
let overwrite = co | fr
|
|
let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb)
|
|
let to-r = addr | (dep ; [Marked] ; rfi)
|
|
let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int)
|
|
|
|
(* Propagation: Ordering from release operations and strong fences. *)
|
|
let A-cumul(r) = (rfe ; [Marked])? ; r
|
|
let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb |
|
|
po-unlock-rf-lock-po) ; [Marked]
|
|
let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ;
|
|
[Marked] ; rfe? ; [Marked]
|
|
|
|
(*
|
|
* Happens Before: Ordering from the passage of time.
|
|
* No fences needed here for prop because relation confined to one process.
|
|
*)
|
|
let hb = [Marked] ; (ppo | rfe | ((prop \ id) & int)) ; [Marked]
|
|
acyclic hb as happens-before
|
|
|
|
(****************************************)
|
|
(* Write and fence propagation ordering *)
|
|
(****************************************)
|
|
|
|
(* Propagation: Each non-rf link needs a strong fence. *)
|
|
let pb = prop ; strong-fence ; hb* ; [Marked]
|
|
acyclic pb as propagation
|
|
|
|
(*******)
|
|
(* RCU *)
|
|
(*******)
|
|
|
|
(*
|
|
* Effects of read-side critical sections proceed from the rcu_read_unlock()
|
|
* or srcu_read_unlock() backwards on the one hand, and from the
|
|
* rcu_read_lock() or srcu_read_lock() forwards on the other hand.
|
|
*
|
|
* In the definition of rcu-fence below, the po term at the left-hand side
|
|
* of each disjunct and the po? term at the right-hand end have been factored
|
|
* out. They have been moved into the definitions of rcu-link and rb.
|
|
* This was necessary in order to apply the "& loc" tests correctly.
|
|
*)
|
|
let rcu-gp = [Sync-rcu] (* Compare with gp *)
|
|
let srcu-gp = [Sync-srcu]
|
|
let rcu-rscsi = rcu-rscs^-1
|
|
let srcu-rscsi = srcu-rscs^-1
|
|
|
|
(*
|
|
* The synchronize_rcu() strong fence is special in that it can order not
|
|
* one but two non-rf relations, but only in conjunction with an RCU
|
|
* read-side critical section.
|
|
*)
|
|
let rcu-link = po? ; hb* ; pb* ; prop ; po
|
|
|
|
(*
|
|
* Any sequence containing at least as many grace periods as RCU read-side
|
|
* critical sections (joined by rcu-link) induces order like a generalized
|
|
* inter-CPU strong fence.
|
|
* Likewise for SRCU grace periods and read-side critical sections, provided
|
|
* the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same
|
|
* struct srcu_struct location.
|
|
*)
|
|
let rec rcu-order = rcu-gp | srcu-gp |
|
|
(rcu-gp ; rcu-link ; rcu-rscsi) |
|
|
((srcu-gp ; rcu-link ; srcu-rscsi) & loc) |
|
|
(rcu-rscsi ; rcu-link ; rcu-gp) |
|
|
((srcu-rscsi ; rcu-link ; srcu-gp) & loc) |
|
|
(rcu-gp ; rcu-link ; rcu-order ; rcu-link ; rcu-rscsi) |
|
|
((srcu-gp ; rcu-link ; rcu-order ; rcu-link ; srcu-rscsi) & loc) |
|
|
(rcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; rcu-gp) |
|
|
((srcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; srcu-gp) & loc) |
|
|
(rcu-order ; rcu-link ; rcu-order)
|
|
let rcu-fence = po ; rcu-order ; po?
|
|
let fence = fence | rcu-fence
|
|
let strong-fence = strong-fence | rcu-fence
|
|
|
|
(* rb orders instructions just as pb does *)
|
|
let rb = prop ; rcu-fence ; hb* ; pb* ; [Marked]
|
|
|
|
irreflexive rb as rcu
|
|
|
|
(*
|
|
* The happens-before, propagation, and rcu constraints are all
|
|
* expressions of temporal ordering. They could be replaced by
|
|
* a single constraint on an "executes-before" relation, xb:
|
|
*
|
|
* let xb = hb | pb | rb
|
|
* acyclic xb as executes-before
|
|
*)
|
|
|
|
(*********************************)
|
|
(* Plain accesses and data races *)
|
|
(*********************************)
|
|
|
|
(* Warn about plain writes and marked accesses in the same region *)
|
|
let mixed-accesses = ([Plain & W] ; (po-loc \ barrier) ; [Marked]) |
|
|
([Marked] ; (po-loc \ barrier) ; [Plain & W])
|
|
flag ~empty mixed-accesses as mixed-accesses
|
|
|
|
(* Executes-before and visibility *)
|
|
let xbstar = (hb | pb | rb)*
|
|
let vis = cumul-fence* ; rfe? ; [Marked] ;
|
|
((strong-fence ; [Marked] ; xbstar) | (xbstar & int))
|
|
|
|
(* Boundaries for lifetimes of plain accesses *)
|
|
let w-pre-bounded = [Marked] ; (addr | fence)?
|
|
let r-pre-bounded = [Marked] ; (addr | nonrw-fence |
|
|
([R4rmb] ; fencerel(Rmb) ; [~Noreturn]))?
|
|
let w-post-bounded = fence? ; [Marked]
|
|
let r-post-bounded = (nonrw-fence | ([~Noreturn] ; fencerel(Rmb) ; [R4rmb]))? ;
|
|
[Marked]
|
|
|
|
(* Visibility and executes-before for plain accesses *)
|
|
let ww-vis = fence | (strong-fence ; xbstar ; w-pre-bounded) |
|
|
(w-post-bounded ; vis ; w-pre-bounded)
|
|
let wr-vis = fence | (strong-fence ; xbstar ; r-pre-bounded) |
|
|
(w-post-bounded ; vis ; r-pre-bounded)
|
|
let rw-xbstar = fence | (r-post-bounded ; xbstar ; w-pre-bounded)
|
|
|
|
(* Potential races *)
|
|
let pre-race = ext & ((Plain * M) | ((M \ IW) * Plain))
|
|
|
|
(* Coherence requirements for plain accesses *)
|
|
let wr-incoh = pre-race & rf & rw-xbstar^-1
|
|
let rw-incoh = pre-race & fr & wr-vis^-1
|
|
let ww-incoh = pre-race & co & ww-vis^-1
|
|
empty (wr-incoh | rw-incoh | ww-incoh) as plain-coherence
|
|
|
|
(* Actual races *)
|
|
let ww-nonrace = ww-vis & ((Marked * W) | rw-xbstar) & ((W * Marked) | wr-vis)
|
|
let ww-race = (pre-race & co) \ ww-nonrace
|
|
let wr-race = (pre-race & (co? ; rf)) \ wr-vis \ rw-xbstar^-1
|
|
let rw-race = (pre-race & fr) \ rw-xbstar
|
|
|
|
flag ~empty (ww-race | wr-race | rw-race) as data-race
|