Apache Mesos
sorter.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __MASTER_ALLOCATOR_SORTER_DRF_SORTER_HPP__
18 #define __MASTER_ALLOCATOR_SORTER_DRF_SORTER_HPP__
19 
20 #include <algorithm>
21 #include <set>
22 #include <string>
23 #include <vector>
24 
25 #include <mesos/mesos.hpp>
26 #include <mesos/resources.hpp>
27 #include <mesos/values.hpp>
28 
29 #include <stout/check.hpp>
30 #include <stout/hashmap.hpp>
31 #include <stout/option.hpp>
32 
34 
36 
37 
38 namespace mesos {
39 namespace internal {
40 namespace master {
41 namespace allocator {
42 
43 class DRFSorter : public Sorter
44 {
45 public:
46  DRFSorter();
47 
48  explicit DRFSorter(
49  const process::UPID& allocator,
50  const std::string& metricsPrefix);
51 
52  ~DRFSorter() override;
53 
54  void initialize(
55  const Option<std::set<std::string>>& fairnessExcludeResourceNames)
56  override;
57 
58  void add(const std::string& clientPath) override;
59 
60  void remove(const std::string& clientPath) override;
61 
62  void activate(const std::string& clientPath) override;
63 
64  void deactivate(const std::string& clientPath) override;
65 
66  void updateWeight(const std::string& path, double weight) override;
67 
68  void allocated(
69  const std::string& clientPath,
70  const SlaveID& slaveId,
71  const Resources& resources) override;
72 
73  void update(
74  const std::string& clientPath,
75  const SlaveID& slaveId,
76  const Resources& oldAllocation,
77  const Resources& newAllocation) override;
78 
79  void unallocated(
80  const std::string& clientPath,
81  const SlaveID& slaveId,
82  const Resources& resources) override;
83 
85  const std::string& clientPath) const override;
86 
88  const std::string& clientPath) const override;
89 
91  const SlaveID& slaveId) const override;
92 
94  const std::string& clientPath,
95  const SlaveID& slaveId) const override;
96 
97  const Resources& totalScalarQuantities() const override;
98 
99  void add(const SlaveID& slaveId, const Resources& resources) override;
100 
101  void remove(const SlaveID& slaveId, const Resources& resources) override;
102 
103  std::vector<std::string> sort() override;
104 
105  bool contains(const std::string& clientPath) const override;
106 
107  size_t count() const override;
108 
109 private:
110  // A node in the sorter's tree.
111  struct Node;
112 
113  // Returns the dominant resource share for the node.
114  double calculateShare(const Node* node) const;
115 
116  // Returns the weight associated with the node. If no weight has
117  // been configured for the node's path, the default weight (1.0) is
118  // returned.
119  double getWeight(const Node* node) const;
120 
121  // Returns the client associated with the given path. Returns
122  // nullptr if the path is not found or if the path identifies an
123  // internal node in the tree (not a client).
124  Node* find(const std::string& clientPath) const;
125 
126  // Resources (by name) that will be excluded from fair sharing.
127  Option<std::set<std::string>> fairnessExcludeResourceNames;
128 
129  // If true, sort() will recalculate all shares and resort the tree.
130  bool dirty = false;
131 
132  // The root node in the sorter tree.
133  Node* root;
134 
135  // To speed lookups, we keep a map from client paths to the leaf
136  // node associated with that client. There is an entry in this map
137  // for every leaf node in the client tree (except for the root when
138  // the tree is empty). Paths in this map do NOT contain the trailing
139  // "." label we use for leaf nodes.
141 
142  // Weights associated with role paths. Setting the weight for a path
143  // influences the share of all nodes in the subtree rooted at that
144  // path. This hashmap might include weights for paths that are not
145  // currently in the sorter tree.
147 
148  // Total resources.
149  struct Total
150  {
151  // We need to keep track of the resources (and not just scalar
152  // quantities) to account for multiple copies of the same shared
153  // resources. We need to ensure that we do not update the scalar
154  // quantities for shared resources when the change is only in the
155  // number of copies in the sorter.
156  hashmap<SlaveID, Resources> resources;
157 
158  // NOTE: Scalars can be safely aggregated across slaves. We keep
159  // that to speed up the calculation of shares. See MESOS-2891 for
160  // the reasons why we want to do that.
161  //
162  // NOTE: We omit information about dynamic reservations and
163  // persistent volumes here to enable resources to be aggregated
164  // across slaves more effectively. See MESOS-4833 for more
165  // information.
166  //
167  // Sharedness info is also stripped out when resource identities
168  // are omitted because sharedness inherently refers to the
169  // identities of resources and not quantities.
170  Resources scalarQuantities;
171 
172  // To improve the performance of calculating shares, we store
173  // a redundant but more efficient version of `scalarQuantities`.
174  // See MESOS-4694.
175  //
176  // TODO(bmahler): Can we remove `scalarQuantities` in favor of
177  // using this type whenever scalar quantities are needed?
179  } total_;
180 
181  // Metrics are optionally exposed by the sorter.
182  friend Metrics;
183  Option<Metrics> metrics;
184 };
185 
186 
187 // Represents a node in the sorter's tree. The structure of the tree
188 // reflects the hierarchical relationships between the clients of the
189 // sorter. Some (but not all) nodes correspond to sorter clients; some
190 // nodes only exist to represent the structure of the sorter
191 // tree. Clients are always associated with leaf nodes.
192 //
193 // For example, if there are two sorter clients "a/b" and "c/d", the
194 // tree will contain five nodes: the root node, internal nodes for "a"
195 // and "c", and leaf nodes for the clients "a/b" and "c/d".
197 {
198  // Indicates whether a node is an active leaf node, an inactive leaf
199  // node, or an internal node. Sorter clients always correspond to
200  // leaf nodes, and only leaf nodes can be activated or deactivated.
201  // The root node is always an "internal" node.
202  enum Kind
203  {
206  INTERNAL
207  };
208 
209  Node(const std::string& _name, Kind _kind, Node* _parent)
210  : name(_name), share(0), kind(_kind), parent(_parent)
211  {
212  // Compute the node's path. Three cases:
213  //
214  // (1) If the root node, use the empty string
215  // (2) If a child of the root node, use the child's name
216  // (3) Otherwise, use the parent's name, "/", and the child's name.
217  if (parent == nullptr) {
218  path = "";
219  } else if (parent->parent == nullptr) {
220  path = name;
221  } else {
222  path = strings::join("/", parent->path, name);
223  }
224  }
225 
227  {
228  foreach (Node* child, children) {
229  delete child;
230  }
231  }
232 
233  // The label of the edge from this node's parent to the
234  // node. "Implicit" leaf nodes are always named ".".
235  //
236  // TODO(neilc): Consider naming implicit leaf nodes in a clearer
237  // way, e.g., by making `name` an Option?
238  std::string name;
239 
240  // Complete path from root to node. This includes the trailing "."
241  // label for virtual leaf nodes.
242  std::string path;
243 
244  double share;
245 
246  // Cached weight of the node, access this through `getWeight()`.
247  // The value is cached by `getWeight()` and updated by
248  // `updateWeight()`. Marked mutable since the caching writes
249  // to this are logically const.
251 
253 
255 
256  // Pointers to the child nodes. `children` is only non-empty if
257  // `kind` is INTERNAL_NODE. Two ordering invariants are maintained
258  // on the `children` vector:
259  //
260  // (1) All inactive leaves are stored at the end of the vector; that
261  // is, each `children` vector consists of zero or more active leaves
262  // and internal nodes, followed by zero or more inactive leaves. This
263  // means that code that only wants to iterate over active children
264  // can stop when the first inactive leaf is observed.
265  //
266  // (2) If the tree is not dirty, the active leaves and internal
267  // nodes are kept sorted by DRF share.
268  std::vector<Node*> children;
269 
270  // If this node represents a sorter client, this returns the path of
271  // that client. Unlike the `path` field, this does NOT include the
272  // trailing "." label for virtual leaf nodes.
273  //
274  // For example, if the sorter contains two clients "a" and "a/b",
275  // the tree will contain four nodes: the root node, "a", "a/."
276  // (virtual leaf), and "a/b". The `clientPath()` of "a/." is "a",
277  // because that is the name of the client associated with that
278  // virtual leaf node.
279  std::string clientPath() const
280  {
281  if (name == ".") {
282  CHECK(kind == ACTIVE_LEAF || kind == INACTIVE_LEAF);
283  return CHECK_NOTNULL(parent)->path;
284  }
285 
286  return path;
287  }
288 
289  bool isLeaf() const
290  {
291  if (kind == ACTIVE_LEAF || kind == INACTIVE_LEAF) {
292  CHECK(children.empty());
293  return true;
294  }
295 
296  return false;
297  }
298 
299  void removeChild(const Node* child)
300  {
301  // Sanity check: ensure we are removing an extant node.
302  auto it = std::find(children.begin(), children.end(), child);
303  CHECK(it != children.end());
304 
305  children.erase(it);
306  }
307 
308  void addChild(Node* child)
309  {
310  // Sanity check: don't allow duplicates to be inserted.
311  auto it = std::find(children.begin(), children.end(), child);
312  CHECK(it == children.end());
313 
314  // If we're inserting an inactive leaf, place it at the end of the
315  // `children` vector; otherwise, place it at the beginning. This
316  // maintains ordering invariant (1) above. It is up to the caller
317  // to maintain invariant (2) -- e.g., by marking the tree dirty.
318  if (child->kind == INACTIVE_LEAF) {
319  children.push_back(child);
320  } else {
321  children.insert(children.begin(), child);
322  }
323  }
324 
325  // Allocation for a node.
326  struct Allocation
327  {
328  Allocation() : count(0) {}
329 
330  void add(const SlaveID& slaveId, const Resources& toAdd)
331  {
332  // Add shared resources to the allocated quantities when the same
333  // resources don't already exist in the allocation.
334  const Resources sharedToAdd = toAdd.shared()
335  .filter([this, slaveId](const Resource& resource) {
336  return !resources[slaveId].contains(resource);
337  });
338 
339  const Resources quantitiesToAdd =
340  (toAdd.nonShared() + sharedToAdd).createStrippedScalarQuantity();
341 
342  resources[slaveId] += toAdd;
343  scalarQuantities += quantitiesToAdd;
344 
345  foreach (const Resource& resource, quantitiesToAdd) {
346  totals[resource.name()] += resource.scalar();
347  }
348 
349  count++;
350  }
351 
352  void subtract(const SlaveID& slaveId, const Resources& toRemove)
353  {
354  CHECK(resources.contains(slaveId));
355  CHECK(resources.at(slaveId).contains(toRemove))
356  << "Resources " << resources.at(slaveId) << " at agent " << slaveId
357  << " does not contain " << toRemove;
358 
359  resources[slaveId] -= toRemove;
360 
361  // Remove shared resources from the allocated quantities when there
362  // are no instances of same resources left in the allocation.
363  const Resources sharedToRemove = toRemove.shared()
364  .filter([this, slaveId](const Resource& resource) {
365  return !resources[slaveId].contains(resource);
366  });
367 
368  const Resources quantitiesToRemove =
369  (toRemove.nonShared() + sharedToRemove).createStrippedScalarQuantity();
370 
371  foreach (const Resource& resource, quantitiesToRemove) {
372  totals[resource.name()] -= resource.scalar();
373  }
374 
375  CHECK(scalarQuantities.contains(quantitiesToRemove))
376  << scalarQuantities << " does not contain " << quantitiesToRemove;
377 
378  scalarQuantities -= quantitiesToRemove;
379 
380  if (resources[slaveId].empty()) {
381  resources.erase(slaveId);
382  }
383  }
384 
385  void update(
386  const SlaveID& slaveId,
387  const Resources& oldAllocation,
388  const Resources& newAllocation)
389  {
390  const Resources oldAllocationQuantity =
391  oldAllocation.createStrippedScalarQuantity();
392  const Resources newAllocationQuantity =
393  newAllocation.createStrippedScalarQuantity();
394 
395  CHECK(resources.contains(slaveId));
396  CHECK(resources[slaveId].contains(oldAllocation))
397  << "Resources " << resources[slaveId] << " at agent " << slaveId
398  << " does not contain " << oldAllocation;
399 
400  CHECK(scalarQuantities.contains(oldAllocationQuantity))
401  << scalarQuantities << " does not contain " << oldAllocationQuantity;
402 
403  resources[slaveId] -= oldAllocation;
404  resources[slaveId] += newAllocation;
405 
406  scalarQuantities -= oldAllocationQuantity;
407  scalarQuantities += newAllocationQuantity;
408 
409  foreach (const Resource& resource, oldAllocationQuantity) {
410  totals[resource.name()] -= resource.scalar();
411  }
412 
413  foreach (const Resource& resource, newAllocationQuantity) {
414  totals[resource.name()] += resource.scalar();
415  }
416  }
417 
418  // We store the number of times this client has been chosen for
419  // allocation so that we can fairly share the resources across
420  // clients that have the same share. Note that this information is
421  // not persisted across master failovers, but since the point is
422  // to equalize the `count` across clients of the same `share`
423  // having allocations restart at 0 after a master failover should
424  // be sufficient (famous last words.)
425  uint64_t count;
426 
427  // We maintain multiple copies of each shared resource allocated
428  // to a client, where the number of copies represents the number
429  // of times this shared resource has been allocated to (and has
430  // not been recovered from) a specific client.
432 
433  // Similarly, we aggregate scalars across slaves and omit information
434  // about dynamic reservations, persistent volumes and sharedness of
435  // the corresponding resource. See notes above.
437 
438  // To improve the performance of calculating shares, we store
439  // a redundant but more efficient version of `scalarQuantities`.
440  // See MESOS-4694.
441  //
442  // TODO(bmahler): Can we remove `scalarQuantities` in favor of
443  // using this type whenever scalar quantities are needed?
445  } allocation;
446 
447  // Compares two nodes according to DRF share.
448  static bool compareDRF(const Node* left, const Node* right)
449  {
450  if (left->share != right->share) {
451  return left->share < right->share;
452  }
453 
454  if (left->allocation.count != right->allocation.count) {
455  return left->allocation.count < right->allocation.count;
456  }
457 
458  return left->path < right->path;
459  }
460 };
461 
462 } // namespace allocator {
463 } // namespace master {
464 } // namespace internal {
465 } // namespace mesos {
466 
467 #endif // __MASTER_ALLOCATOR_SORTER_DRF_SORTER_HPP__
Definition: path.hpp:26
bool contains(const std::string &clientPath) const override
Node(const std::string &_name, Kind _kind, Node *_parent)
Definition: sorter.hpp:209
Definition: option.hpp:28
void deactivate(const std::string &clientPath) override
bool isLeaf() const
Definition: sorter.hpp:289
Definition: master.hpp:27
std::stringstream & join(std::stringstream &stream, const std::string &separator, T &&...args)
Definition: strings.hpp:307
std::string clientPath() const
Definition: sorter.hpp:279
const Resources & allocationScalarQuantities(const std::string &clientPath) const override
std::vector< Node * > children
Definition: sorter.hpp:268
void add(const SlaveID &slaveId, const Resources &toAdd)
Definition: sorter.hpp:330
std::string path
Definition: sorter.hpp:242
Resources filter(const lambda::function< bool(const Resource &)> &predicate) const
Definition: resources.hpp:81
void unallocated(const std::string &clientPath, const SlaveID &slaveId, const Resources &resources) override
void update(const std::string &clientPath, const SlaveID &slaveId, const Resources &oldAllocation, const Resources &newAllocation) override
Option< double > weight
Definition: sorter.hpp:250
std::string name
Definition: sorter.hpp:238
const Resources & totalScalarQuantities() const override
Definition: hashmap.hpp:38
const char * kind()
hashmap< SlaveID, Resources > resources
Definition: sorter.hpp:431
struct mesos::internal::master::allocator::DRFSorter::Node::Allocation allocation
An "untyped" PID, used to encapsulate the process ID for lower-layer abstractions (eg...
Definition: pid.hpp:39
void subtract(const SlaveID &slaveId, const Resources &toRemove)
Definition: sorter.hpp:352
void removeChild(const Node *child)
Definition: sorter.hpp:299
void updateWeight(const std::string &path, double weight) override
Resources createStrippedScalarQuantity() const
void initialize(const Option< std::set< std::string >> &fairnessExcludeResourceNames) override
Definition: spec.hpp:26
std::vector< std::string > sort() override
static bool compareDRF(const Node *left, const Node *right)
Definition: sorter.hpp:448
void allocated(const std::string &clientPath, const SlaveID &slaveId, const Resources &resources) override
const hashmap< SlaveID, Resources > & allocation(const std::string &clientPath) const override
Resources shared() const
Definition: attributes.hpp:24
ScalarResourceQuantities totals
Definition: sorter.hpp:444
Resources nonShared() const
std::set< pid_t > children(pid_t, const std::list< Process > &, bool)
Definition: os.hpp:215
void activate(const std::string &clientPath) override
void addChild(Node *child)
Definition: sorter.hpp:308
void update(const SlaveID &slaveId, const Resources &oldAllocation, const Resources &newAllocation)
Definition: sorter.hpp:385
constexpr const char * name
Definition: shell.hpp:43
void add(const std::string &clientPath) override
Try< std::list< std::string > > find(const std::string &directory, const std::string &pattern)
Definition: find.hpp:37