Apache Mesos
consensus.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __LOG_CONSENSUS_HPP__
18 #define __LOG_CONSENSUS_HPP__
19 
20 #include <stdint.h>
21 
22 #include <process/future.hpp>
23 #include <process/shared.hpp>
24 
25 #include <stout/none.hpp>
26 #include <stout/nothing.hpp>
27 #include <stout/option.hpp>
28 
29 #include "log/network.hpp"
30 
31 #include "messages/log.hpp"
32 
33 // We use Paxos consensus protocol to agree on the value of each entry
34 // in the replicated log. In our system, each replica is both an
35 // acceptor and a learner. There are several types of proposers in the
36 // system. Coordinator is one type of proposers we use to append new
37 // log entries. The 'log::fill' function below creates an internal
38 // proposer each time it is called. These internal proposers are used
39 // to agree on previously written entries in the log.
40 
41 namespace mesos {
42 namespace internal {
43 namespace log {
44 
45 // Runs the promise phase (a.k.a., the prepare phase) in Paxos. This
46 // phase has two purposes. First, the proposer asks promises from a
47 // quorum of replicas not to accept writes from proposers with lower
48 // proposal numbers. Second, the proposer looks for potential
49 // previously agreed values. Only these values can be written in the
50 // next phase. This restriction is used by Paxos to make sure that if
51 // a value has been agreed on for a log position, subsequent writes to
52 // this log position will always have the same value. We can run the
53 // promise phase either for a specified log position ("explicit"
54 // promise), or for all positions that have not yet been promised to
55 // any proposer ("implicit" promise). The latter is a well known
56 // optimization called Multi-Paxos. If the leader is relatively
57 // stable, we can skip the promise phase for future instance of the
58 // protocol with the same leader.
59 //
60 // We re-use PromiseResponse to specify the return value of this
61 // phase. In the case of explicit promise, if a learned action has
62 // been found in a response, this phase succeeds immediately with the
63 // 'okay' field set to true and the 'action' field set to the learned
64 // action. If no learned action has been found in a quorum of
65 // replicas, we first check if some of them reply Nack (i.e., they
66 // refuse to give promise). If yes, we set the 'okay' field to false
67 // and set the 'proposal' field to be the highest proposal number seen
68 // in these Nack responses. If none of them replies Nack, we set the
69 // 'okay' field to true and set the 'action' field to be the action
70 // that is performed by the proposer with the highest proposal number
71 // in these responses. If no action has been found in these responses,
72 // we leave the 'action' field unset.
73 //
74 // In the case of implicit promise, we must wait until a quorum of
75 // replicas have replied. If some of them reply Nack, we set the
76 // 'okay' field to false and set the 'proposal' field to be the
77 // highest proposal number seen in these Nack responses. If none of
78 // them replies Nack, we set the 'okay' field to true and set the
79 // 'position' field to be the highest position (end position) seen in
80 // these responses.
82  size_t quorum,
83  const process::Shared<Network>& network,
84  uint64_t proposal,
85  const Option<uint64_t>& position = None());
86 
87 
88 // Runs the write phase (a.k.a., the propose phase) in Paxos. In this
89 // phase, the proposer broadcasts a write to replicas. This phase
90 // succeeds if a quorum of replicas accept the write. A proposer
91 // cannot write if it hasn't gained enough (i.e., a quorum of)
92 // promises from replicas. We re-use WriteResponse to specify the
93 // return value of this phase. We must wait until a quorum of replicas
94 // have replied. If some of them reply Nack, we set the 'okay' field
95 // to false and set the 'proposal' field to be the highest proposal
96 // number seen in these Nack responses. If none of them replies Nack,
97 // we set the 'okay' field to true.
99  size_t quorum,
100  const process::Shared<Network>& network,
101  uint64_t proposal,
102  const Action& action);
103 
104 
105 // Runs the learn phase (a.k.a, the commit phase) in Paxos. In fact,
106 // this phase is not required, but treated as an optimization. In this
107 // phase, a proposer broadcasts a learned message to replicas,
108 // indicating that a consensus has already been reached for the given
109 // log position. No need to wait for responses from replicas. When
110 // the future is ready, the learned message has been broadcasted.
112  const process::Shared<Network>& network,
113  const Action& action);
114 
115 
116 // Tries to reach consensus for the given log position by running a
117 // full Paxos round (i.e., promise -> write -> learn). If no value has
118 // been previously agreed on for the given log position, a NOP will be
119 // proposed. This function will automatically retry by bumping the
120 // proposal number if the specified proposal number is found to be not
121 // high enough. To ensure liveness, it will inject a random delay
122 // before retrying. A learned action will be returned when the
123 // operation succeeds.
125  size_t quorum,
126  const process::Shared<Network>& network,
127  uint64_t proposal,
128  uint64_t position);
129 
130 } // namespace log {
131 } // namespace internal {
132 } // namespace mesos {
133 
134 #endif // __LOG_CONSENSUS_HPP__
process::Future< PromiseResponse > promise(size_t quorum, const process::Shared< Network > &network, uint64_t proposal, const Option< uint64_t > &position=None())
process::Future< WriteResponse > write(size_t quorum, const process::Shared< Network > &network, uint64_t proposal, const Action &action)
process::Future< Action > fill(size_t quorum, const process::Shared< Network > &network, uint64_t proposal, uint64_t position)
Definition: none.hpp:27
process::Future< Nothing > learn(const process::Shared< Network > &network, const Action &action)
Definition: future.hpp:57