mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-01 21:30:37 +00:00
TLA+ specification of proposer-acceptor consensus protocol.
And .cfg file for running TLC. ref #293
This commit is contained in:
committed by
Stas Kelvich
parent
791312824d
commit
eb1618f2ed
@@ -33,4 +33,5 @@ safekeepers. The Paxos and crash recovery algorithm ensures that only
|
||||
one primary node can be actively streaming WAL to the quorum of
|
||||
safekeepers.
|
||||
|
||||
See README.md for a more detailed desription of the consensus protocol.
|
||||
See README_PROTO.md for a more detailed desription of the consensus protocol. spec/
|
||||
contains TLA+ specification of it.
|
||||
|
||||
34
walkeeper/spec/ProposerAcceptorConsensus.cfg
Normal file
34
walkeeper/spec/ProposerAcceptorConsensus.cfg
Normal file
@@ -0,0 +1,34 @@
|
||||
\* MV CONSTANT declarations
|
||||
CONSTANT NULL = NULL
|
||||
CONSTANTS
|
||||
p1 = p1
|
||||
p2 = p2
|
||||
p3 = p3
|
||||
a1 = a1
|
||||
a2 = a2
|
||||
a3 = a3
|
||||
\* MV CONSTANT definitions
|
||||
CONSTANT
|
||||
proposers = {p1, p2, p3}
|
||||
acceptors = {a1, a2, a3}
|
||||
\* SYMMETRY definition
|
||||
SYMMETRY perms
|
||||
\* CONSTANT definitions
|
||||
CONSTANT
|
||||
max_entries = 3
|
||||
CONSTANT
|
||||
max_term = 3
|
||||
\* INIT definition
|
||||
INIT
|
||||
Init
|
||||
\* NEXT definition
|
||||
NEXT
|
||||
Next
|
||||
\* INVARIANT definition
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ElectionSafety
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
CommittedNotOverwritten
|
||||
CHECK_DEADLOCK FALSE
|
||||
356
walkeeper/spec/ProposerAcceptorConsensus.tla
Normal file
356
walkeeper/spec/ProposerAcceptorConsensus.tla
Normal file
@@ -0,0 +1,356 @@
|
||||
---- MODULE ProposerAcceptorConsensus ----
|
||||
|
||||
\* Differences from current implementation:
|
||||
\* - unified not-globally-unique epoch & term (node_id)
|
||||
\* Simplifications:
|
||||
\* - instant message delivery
|
||||
\* - feedback is not modeled separately, commit_lsn is updated directly
|
||||
|
||||
EXTENDS Integers, Sequences, FiniteSets, TLC
|
||||
|
||||
VARIABLES
|
||||
prop_state, \* prop_state[p] is state of proposer p
|
||||
acc_state, \* acc_state[a] is state of acceptor a
|
||||
commit_lsns \* map of acceptor -> commit_lsn
|
||||
|
||||
CONSTANT
|
||||
acceptors,
|
||||
proposers,
|
||||
max_entries, \* model constraint: max log entries acceptor/proposer can hold
|
||||
max_term \* model constraint: max allowed term
|
||||
|
||||
CONSTANT NULL
|
||||
|
||||
ASSUME max_entries \in Nat /\ max_term \in Nat
|
||||
|
||||
\* For specifying symmetry set in manual cfg file, see
|
||||
\* https://github.com/tlaplus/tlaplus/issues/404
|
||||
perms == Permutations(proposers) \union Permutations(acceptors)
|
||||
|
||||
\********************************************************************************
|
||||
\* Helpers
|
||||
\********************************************************************************
|
||||
|
||||
Maximum(S) ==
|
||||
(*************************************************************************)
|
||||
(* If S is a set of numbers, then this define Maximum(S) to be the *)
|
||||
(* maximum of those numbers, or -1 if S is empty. *)
|
||||
(*************************************************************************)
|
||||
IF S = {} THEN -1
|
||||
ELSE CHOOSE n \in S : \A m \in S : n \geq m
|
||||
|
||||
\* minimum of numbers in the set, error if set is empty
|
||||
Minimum(S) ==
|
||||
CHOOSE min \in S : \A n \in S : min <= n
|
||||
|
||||
\* Min of two numbers
|
||||
Min(a, b) == IF a < b THEN a ELSE b
|
||||
|
||||
\* Set of values of function f. XXX is there a such builtin?
|
||||
FValues(f) == {f[a] : a \in DOMAIN f}
|
||||
|
||||
\* Sort of 0 for functions
|
||||
EmptyF == [x \in {} |-> 42]
|
||||
IsEmptyF(f) == DOMAIN f = {}
|
||||
|
||||
\* Next entry proposer p will push to acceptor a or NULL.
|
||||
NextEntry(p, a) ==
|
||||
IF Len(prop_state[p].wal) >= prop_state[p].next_send_lsn[a] THEN
|
||||
CHOOSE r \in FValues(prop_state[p].wal) : r.lsn = prop_state[p].next_send_lsn[a]
|
||||
ELSE
|
||||
NULL
|
||||
|
||||
|
||||
\*****************
|
||||
|
||||
NumAccs == Cardinality(acceptors)
|
||||
|
||||
\* does acc_set form the quorum?
|
||||
Quorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2 + 1)
|
||||
\* all quorums of acceptors
|
||||
Quorums == {subset \in SUBSET acceptors: Quorum(subset)}
|
||||
|
||||
\* flush_lsn of acceptor a.
|
||||
FlushLsn(a) == Len(acc_state[a].wal)
|
||||
|
||||
|
||||
\********************************************************************************
|
||||
\* Type assertion
|
||||
\********************************************************************************
|
||||
\* Defining sets of all possible tuples and using them in TypeOk in usual
|
||||
\* all-tuples constructor is not practical because such definitions force
|
||||
\* TLC to enumerate them, while they are are horribly enormous
|
||||
\* (TLC screams "Attempted to construct a set with too many elements").
|
||||
\* So instead check types manually.
|
||||
TypeOk ==
|
||||
/\ \A p \in proposers:
|
||||
/\ DOMAIN prop_state[p] = {"state", "term", "votes", "donor_epoch", "vcl", "wal", "next_send_lsn"}
|
||||
\* in campaign proposer sends RequestVote and waits for acks;
|
||||
\* in leader he is elected
|
||||
/\ prop_state[p].state \in {"campaign", "leader"}
|
||||
\* 0..max_term should be actually Nat in the unbouned model, but TLC won't
|
||||
\* swallow it
|
||||
/\ prop_state[p].term \in 0..max_term
|
||||
\* votes received
|
||||
/\ \A voter \in DOMAIN prop_state[p].votes:
|
||||
/\ voter \in acceptors
|
||||
/\ prop_state[p].votes[voter] \in [epoch: 0..max_term, flush_lsn: 0..max_entries]
|
||||
/\ prop_state[p].donor_epoch \in 0..max_term
|
||||
\* wal is sequence of just <lsn, epoch of author> records
|
||||
/\ \A i \in DOMAIN prop_state[p].wal:
|
||||
prop_state[p].wal[i] \in [lsn: 1..max_entries, epoch: 1..max_term]
|
||||
\* Following implementation, we skew the original Aurora meaning of this;
|
||||
\* here it is lsn of highest definitely committed record as set by proposer
|
||||
\* when it is elected; it doesn't change since then
|
||||
/\ prop_state[p].vcl \in 0..max_entries
|
||||
\* map of acceptor -> next lsn to send
|
||||
/\ \A a \in DOMAIN prop_state[p].next_send_lsn:
|
||||
/\ a \in acceptors
|
||||
/\ prop_state[p].next_send_lsn[a] \in 1..(max_entries + 1)
|
||||
/\ \A a \in acceptors:
|
||||
/\ DOMAIN acc_state[a] = {"term", "epoch", "wal"}
|
||||
/\ acc_state[a].term \in 0..max_term
|
||||
/\ acc_state[a].epoch \in 0..max_term
|
||||
/\ \A i \in DOMAIN acc_state[a].wal:
|
||||
acc_state[a].wal[i] \in [lsn: 1..max_entries, epoch: 1..max_term]
|
||||
/\ \A a \in DOMAIN commit_lsns:
|
||||
/\ a \in acceptors
|
||||
/\ commit_lsns[a] \in 0..max_entries
|
||||
|
||||
\********************************************************************************
|
||||
\* Initial
|
||||
\********************************************************************************
|
||||
|
||||
Init ==
|
||||
/\ prop_state = [p \in proposers |-> [
|
||||
state |-> "campaign",
|
||||
term |-> 1,
|
||||
votes |-> EmptyF,
|
||||
donor_epoch |-> 0,
|
||||
vcl |-> 0,
|
||||
wal |-> << >>,
|
||||
next_send_lsn |-> EmptyF
|
||||
]]
|
||||
/\ acc_state = [a \in acceptors |-> [
|
||||
\* there will be no leader in this term, 1 is the first real
|
||||
term |-> 0,
|
||||
epoch |-> 0,
|
||||
wal |-> << >>
|
||||
]]
|
||||
/\ commit_lsns = [a \in acceptors |-> 0]
|
||||
|
||||
|
||||
\********************************************************************************
|
||||
\* Actions
|
||||
\********************************************************************************
|
||||
|
||||
\* Proposer loses all state.
|
||||
\* For simplicity (and to reduct state space), we assume it immediately gets
|
||||
\* current state from quorum q of acceptors determining the term he will request
|
||||
\* to vote for.
|
||||
RestartProposer(p, q) ==
|
||||
/\ Quorum(q)
|
||||
/\ LET
|
||||
new_term == Maximum({acc_state[a].term : a \in q}) + 1
|
||||
IN
|
||||
/\ new_term <= max_term
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "campaign",
|
||||
![p].term = new_term,
|
||||
![p].votes = EmptyF,
|
||||
![p].donor_epoch = 0,
|
||||
![p].vcl = 0,
|
||||
![p].wal = << >>,
|
||||
![p].next_send_lsn = EmptyF]
|
||||
/\ UNCHANGED <<acc_state, commit_lsns>>
|
||||
|
||||
\* Acceptor a immediately votes for proposer p.
|
||||
Vote(p, a) ==
|
||||
/\ prop_state[p].state = "campaign"
|
||||
/\ acc_state[a].term < prop_state[p].term \* main voting condition
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term]
|
||||
/\ LET
|
||||
vote == [epoch |-> acc_state[a].epoch, flush_lsn |-> FlushLsn(a)]
|
||||
IN
|
||||
prop_state' = [prop_state EXCEPT ![p].votes = prop_state[p].votes @@ (a :> vote)]
|
||||
/\ UNCHANGED <<commit_lsns>>
|
||||
|
||||
|
||||
\* Proposer p gets elected.
|
||||
BecomeLeader(p) ==
|
||||
/\ prop_state[p].state = "campaign"
|
||||
/\ Quorum(DOMAIN prop_state[p].votes)
|
||||
/\ LET
|
||||
max_epoch == Maximum({v.epoch : v \in FValues(prop_state[p].votes)})
|
||||
max_epoch_votes == {v \in FValues(prop_state[p].votes) : v.epoch = max_epoch}
|
||||
donor == CHOOSE dv \in DOMAIN prop_state[p].votes :
|
||||
/\ prop_state[p].votes[dv].epoch = max_epoch
|
||||
/\ \A v \in max_epoch_votes:
|
||||
prop_state[p].votes[dv].flush_lsn >= v.flush_lsn
|
||||
max_vote == prop_state[p].votes[donor]
|
||||
\* Establish lsn to stream from for voters.
|
||||
\* At some point it seemed like we can regard log as correct and only
|
||||
\* append to it if has in the max_epoch, however TLC showed that's not
|
||||
\* the case; we must always stream since first not matching record.
|
||||
next_send_lsn == [voter \in DOMAIN prop_state[p].votes |-> 1]
|
||||
IN
|
||||
\* we fetch log from the most advanced node (this is separate
|
||||
\* roundtrip), make sure node is still on one term with us
|
||||
/\ acc_state[donor].term = prop_state[p].term
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "leader",
|
||||
\* fetch the log from donor
|
||||
![p].wal = acc_state[donor].wal,
|
||||
![p].donor_epoch = max_epoch,
|
||||
![p].vcl = max_vote.flush_lsn,
|
||||
![p].next_send_lsn = next_send_lsn]
|
||||
/\ UNCHANGED <<acc_state, commit_lsns>>
|
||||
|
||||
|
||||
\* acceptor a learns about elected proposer p's term.
|
||||
UpdateTerm(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ acc_state[a].term < prop_state[p].term
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term]
|
||||
/\ UNCHANGED <<prop_state, commit_lsns>>
|
||||
|
||||
|
||||
\* Acceptor a which didn't participate in voting connects to elected proposer p
|
||||
\* and p sets the streaming point
|
||||
HandshakeWithLeader(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ acc_state[a].term = prop_state[p].term
|
||||
/\ a \notin DOMAIN prop_state[p].next_send_lsn
|
||||
/\ LET
|
||||
next_send_lsn == prop_state[p].next_send_lsn @@ (a :> 1)
|
||||
IN
|
||||
prop_state' = [prop_state EXCEPT ![p].next_send_lsn = next_send_lsn]
|
||||
/\ UNCHANGED <<acc_state, commit_lsns>>
|
||||
|
||||
|
||||
\* Append new log entry to elected proposer
|
||||
NewEntry(p) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ Len(prop_state[p].wal) < max_entries \* model constraint
|
||||
/\ LET
|
||||
new_lsn == IF Len(prop_state[p].wal) = 0 THEN
|
||||
prop_state[p].vcl + 1
|
||||
ELSE
|
||||
\* lsn of last record + 1
|
||||
prop_state[p].wal[Len(prop_state[p].wal)].lsn + 1
|
||||
new_entry == [lsn |-> new_lsn, epoch |-> prop_state[p].term]
|
||||
IN
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].wal = Append(prop_state[p].wal, new_entry)]
|
||||
/\ UNCHANGED <<acc_state, commit_lsns>>
|
||||
|
||||
|
||||
\* Try to transfer entry from elected proposer p to acceptor a
|
||||
TransferEntry(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ prop_state[p].term = acc_state[a].term
|
||||
/\ a \in DOMAIN prop_state[p].next_send_lsn
|
||||
/\ LET
|
||||
next_e == NextEntry(p, a)
|
||||
IN
|
||||
/\ next_e /= NULL
|
||||
/\ LET
|
||||
\* Consider bumping epoch if getting this entry recovers the acceptor.
|
||||
\* Both vcl and flush_lsn must be reached in current impl as we don't
|
||||
\* truncate the log.
|
||||
new_epoch ==
|
||||
IF /\ acc_state[a].epoch < prop_state[p].term
|
||||
/\ next_e.lsn >= prop_state[p].vcl
|
||||
/\ next_e.lsn >= Len(acc_state[a].wal)
|
||||
THEN
|
||||
prop_state[p].term
|
||||
ELSE
|
||||
acc_state[a].epoch
|
||||
\* TLC spits warning about non-existent entry if EXCEPT is used here,
|
||||
\* so construct manually.
|
||||
new_wal == (next_e.lsn :> next_e) @@ acc_state[a].wal
|
||||
\* Also check whether this entry allows to advance commit_lsn and
|
||||
\* if so, bump it where possible. Modeling this as separate action
|
||||
\* significantly bloats the space (5m vs 15m on max_entries=3 max_term=3,
|
||||
\* so act immediately.
|
||||
entry_owners == {o \in acceptors:
|
||||
/\ o /= a
|
||||
\* only recovered acceptors advance commit_lsn
|
||||
/\ acc_state[o].epoch = prop_state[p].term
|
||||
/\ next_e \in FValues(acc_state[o].wal)} \cup {a}
|
||||
IN
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].wal = new_wal,
|
||||
![a].epoch = new_epoch]
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].next_send_lsn[a] =
|
||||
prop_state[p].next_send_lsn[a] + 1]
|
||||
/\ commit_lsns' = IF /\ new_epoch = prop_state[p].term
|
||||
/\ Quorum(entry_owners)
|
||||
THEN
|
||||
[acc \in acceptors |->
|
||||
IF /\ acc \in entry_owners
|
||||
/\ next_e.lsn > commit_lsns[acc]
|
||||
THEN
|
||||
next_e.lsn
|
||||
ELSE
|
||||
commit_lsns[acc]]
|
||||
ELSE
|
||||
commit_lsns
|
||||
|
||||
|
||||
\*******************************************************************************
|
||||
\* Final spec
|
||||
\*******************************************************************************
|
||||
|
||||
Next ==
|
||||
\/ \E q \in Quorums: \E p \in proposers: RestartProposer(p, q)
|
||||
\/ \E p \in proposers: \E a \in acceptors: Vote(p, a)
|
||||
\/ \E p \in proposers: BecomeLeader(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: UpdateTerm(p, a)
|
||||
\/ \E p \in proposers: \E a \in acceptors: HandshakeWithLeader(p, a)
|
||||
\/ \E p \in proposers: NewEntry(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: TransferEntry(p, a)
|
||||
|
||||
Spec == Init /\ [][Next]_<<prop_state, acc_state, commit_lsns>>
|
||||
|
||||
|
||||
\********************************************************************************
|
||||
\* Invariants
|
||||
\********************************************************************************
|
||||
|
||||
\* we don't track history, but this property is fairly convincing anyway
|
||||
ElectionSafety ==
|
||||
\A p1, p2 \in proposers:
|
||||
(/\ prop_state[p1].state = "leader"
|
||||
/\ prop_state[p2].state = "leader"
|
||||
/\ prop_state[p1].term = prop_state[p2].term) => (p1 = p2)
|
||||
|
||||
LogIsMonotonic ==
|
||||
\A a \in acceptors:
|
||||
\A i \in DOMAIN acc_state[a].wal: \A j \in DOMAIN acc_state[a].wal:
|
||||
(i > j) => (/\ acc_state[a].wal[i].lsn > acc_state[a].wal[j].lsn
|
||||
\* currently we don't truncate log during recovery, so
|
||||
\* uncommitted part can be non-monotonic; remove the
|
||||
\* precondition once we switch to truncation
|
||||
/\ (i <= commit_lsns[a] /\ j <= commit_lsns[a]) =>
|
||||
(acc_state[a].wal[i].epoch >= acc_state[a].wal[j].epoch))
|
||||
|
||||
\* Main invariant: log under commit_lsn must match everywhere.
|
||||
LogSafety ==
|
||||
\A a1 \in acceptors: \A a2 \in acceptors:
|
||||
LET
|
||||
common_len == Min(commit_lsns[a1], commit_lsns[a2])
|
||||
IN
|
||||
SubSeq(acc_state[a1].wal, 1, common_len) = SubSeq(acc_state[a2].wal, 1, common_len)
|
||||
|
||||
\* Next record we are going to push to acceptor must never overwrite committed
|
||||
\* different record.
|
||||
CommittedNotOverwritten ==
|
||||
\A p \in proposers: \A a \in acceptors:
|
||||
(/\ prop_state[p].state = "leader"
|
||||
/\ prop_state[p].term = acc_state[a].term
|
||||
/\ a \in DOMAIN prop_state[p].next_send_lsn) =>
|
||||
LET
|
||||
next_e == NextEntry(p, a)
|
||||
IN
|
||||
(next_e /= NULL) =>
|
||||
((commit_lsns[a] >= next_e.lsn) => (acc_state[a].wal[next_e.lsn] = next_e))
|
||||
|
||||
|
||||
====
|
||||
Reference in New Issue
Block a user