Apache Mesos
sorter.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __MASTER_ALLOCATOR_SORTER_DRF_SORTER_HPP__
18 #define __MASTER_ALLOCATOR_SORTER_DRF_SORTER_HPP__
19 
20 #include <algorithm>
21 #include <set>
22 #include <string>
23 #include <vector>
24 
25 #include <mesos/mesos.hpp>
26 #include <mesos/resources.hpp>
27 #include <mesos/values.hpp>
28 
29 #include <stout/check.hpp>
30 #include <stout/hashmap.hpp>
31 #include <stout/option.hpp>
32 
34 
36 
37 
38 namespace mesos {
39 namespace internal {
40 namespace master {
41 namespace allocator {
42 
43 class DRFSorter : public Sorter
44 {
45 public:
46  DRFSorter();
47 
48  explicit DRFSorter(
49  const process::UPID& allocator,
50  const std::string& metricsPrefix);
51 
52  virtual ~DRFSorter();
53 
54  virtual void initialize(
55  const Option<std::set<std::string>>& fairnessExcludeResourceNames);
56 
57  virtual void add(const std::string& clientPath);
58 
59  virtual void remove(const std::string& clientPath);
60 
61  virtual void activate(const std::string& clientPath);
62 
63  virtual void deactivate(const std::string& clientPath);
64 
65  virtual void updateWeight(const std::string& path, double weight);
66 
67  virtual void allocated(
68  const std::string& clientPath,
69  const SlaveID& slaveId,
70  const Resources& resources);
71 
72  virtual void update(
73  const std::string& clientPath,
74  const SlaveID& slaveId,
75  const Resources& oldAllocation,
76  const Resources& newAllocation);
77 
78  virtual void unallocated(
79  const std::string& clientPath,
80  const SlaveID& slaveId,
81  const Resources& resources);
82 
84  const std::string& clientPath) const;
85 
87  const std::string& clientPath) const;
88 
90  const SlaveID& slaveId) const;
91 
92  virtual Resources allocation(
93  const std::string& clientPath,
94  const SlaveID& slaveId) const;
95 
96  virtual const Resources& totalScalarQuantities() const;
97 
98  virtual void add(const SlaveID& slaveId, const Resources& resources);
99 
100  virtual void remove(const SlaveID& slaveId, const Resources& resources);
101 
102  virtual std::vector<std::string> sort();
103 
104  virtual bool contains(const std::string& clientPath) const;
105 
106  virtual size_t count() const;
107 
108 private:
109  // A node in the sorter's tree.
110  struct Node;
111 
112  // Returns the dominant resource share for the node.
113  double calculateShare(const Node* node) const;
114 
115  // Returns the weight associated with the node. If no weight has
116  // been configured for the node's path, the default weight (1.0) is
117  // returned.
118  double findWeight(const Node* node) const;
119 
120  // Returns the client associated with the given path. Returns
121  // nullptr if the path is not found or if the path identifies an
122  // internal node in the tree (not a client).
123  Node* find(const std::string& clientPath) const;
124 
125  // Resources (by name) that will be excluded from fair sharing.
126  Option<std::set<std::string>> fairnessExcludeResourceNames;
127 
128  // If true, sort() will recalculate all shares and resort the tree.
129  bool dirty = false;
130 
131  // The root node in the sorter tree.
132  Node* root;
133 
134  // To speed lookups, we keep a map from client paths to the leaf
135  // node associated with that client. There is an entry in this map
136  // for every leaf node in the client tree (except for the root when
137  // the tree is empty). Paths in this map do NOT contain the trailing
138  // "." label we use for leaf nodes.
140 
141  // Weights associated with role paths. Setting the weight for a path
142  // influences the share of all nodes in the subtree rooted at that
143  // path. This hashmap might include weights for paths that are not
144  // currently in the sorter tree.
146 
147  // Total resources.
148  struct Total
149  {
150  // We need to keep track of the resources (and not just scalar
151  // quantities) to account for multiple copies of the same shared
152  // resources. We need to ensure that we do not update the scalar
153  // quantities for shared resources when the change is only in the
154  // number of copies in the sorter.
155  hashmap<SlaveID, Resources> resources;
156 
157  // NOTE: Scalars can be safely aggregated across slaves. We keep
158  // that to speed up the calculation of shares. See MESOS-2891 for
159  // the reasons why we want to do that.
160  //
161  // NOTE: We omit information about dynamic reservations and
162  // persistent volumes here to enable resources to be aggregated
163  // across slaves more effectively. See MESOS-4833 for more
164  // information.
165  //
166  // Sharedness info is also stripped out when resource identities
167  // are omitted because sharedness inherently refers to the
168  // identities of resources and not quantities.
169  Resources scalarQuantities;
170 
171  // We also store a map version of `scalarQuantities`, mapping
172  // the `Resource::name` to aggregated scalar. This improves the
173  // performance of calculating shares. See MESOS-4694.
174  //
175  // TODO(bmahler): Ideally we do not store `scalarQuantities`
176  // redundantly here, investigate performance improvements to
177  // `Resources` to make this unnecessary.
179  } total_;
180 
181  // Metrics are optionally exposed by the sorter.
182  friend Metrics;
183  Option<Metrics> metrics;
184 };
185 
186 
187 // Represents a node in the sorter's tree. The structure of the tree
188 // reflects the hierarchical relationships between the clients of the
189 // sorter. Some (but not all) nodes correspond to sorter clients; some
190 // nodes only exist to represent the structure of the sorter
191 // tree. Clients are always associated with leaf nodes.
192 //
193 // For example, if there are two sorter clients "a/b" and "c/d", the
194 // tree will contain five nodes: the root node, internal nodes for "a"
195 // and "c", and leaf nodes for the clients "a/b" and "c/d".
197 {
198  // Indicates whether a node is an active leaf node, an inactive leaf
199  // node, or an internal node. Sorter clients always correspond to
200  // leaf nodes, and only leaf nodes can be activated or deactivated.
201  // The root node is always an "internal" node.
202  enum Kind
203  {
206  INTERNAL
207  };
208 
209  Node(const std::string& _name, Kind _kind, Node* _parent)
210  : name(_name), share(0), kind(_kind), parent(_parent)
211  {
212  // Compute the node's path. Three cases:
213  //
214  // (1) If the root node, use the empty string
215  // (2) If a child of the root node, use the child's name
216  // (3) Otherwise, use the parent's name, "/", and the child's name.
217  if (parent == nullptr) {
218  path = "";
219  } else if (parent->parent == nullptr) {
220  path = name;
221  } else {
222  path = strings::join("/", parent->path, name);
223  }
224  }
225 
227  {
228  foreach (Node* child, children) {
229  delete child;
230  }
231  }
232 
233  // The label of the edge from this node's parent to the
234  // node. "Implicit" leaf nodes are always named ".".
235  //
236  // TODO(neilc): Consider naming implicit leaf nodes in a clearer
237  // way, e.g., by making `name` an Option?
238  std::string name;
239 
240  // Complete path from root to node. This includes the trailing "."
241  // label for virtual leaf nodes.
242  std::string path;
243 
244  double share;
245 
247 
249 
250  // Pointers to the child nodes. `children` is only non-empty if
251  // `kind` is INTERNAL_NODE. Two ordering invariants are maintained
252  // on the `children` vector:
253  //
254  // (1) All inactive leaves are stored at the end of the vector; that
255  // is, each `children` vector consists of zero or more active leaves
256  // and internal nodes, followed by zero or more inactive leaves. This
257  // means that code that only wants to iterate over active children
258  // can stop when the first inactive leaf is observed.
259  //
260  // (2) If the tree is not dirty, the active leaves and internal
261  // nodes are kept sorted by DRF share.
262  std::vector<Node*> children;
263 
264  // If this node represents a sorter client, this returns the path of
265  // that client. Unlike the `path` field, this does NOT include the
266  // trailing "." label for virtual leaf nodes.
267  //
268  // For example, if the sorter contains two clients "a" and "a/b",
269  // the tree will contain four nodes: the root node, "a", "a/."
270  // (virtual leaf), and "a/b". The `clientPath()` of "a/." is "a",
271  // because that is the name of the client associated with that
272  // virtual leaf node.
273  std::string clientPath() const
274  {
275  if (name == ".") {
276  CHECK(kind == ACTIVE_LEAF || kind == INACTIVE_LEAF);
277  return CHECK_NOTNULL(parent)->path;
278  }
279 
280  return path;
281  }
282 
283  bool isLeaf() const
284  {
285  if (kind == ACTIVE_LEAF || kind == INACTIVE_LEAF) {
286  CHECK(children.empty());
287  return true;
288  }
289 
290  return false;
291  }
292 
293  void removeChild(const Node* child)
294  {
295  // Sanity check: ensure we are removing an extant node.
296  auto it = std::find(children.begin(), children.end(), child);
297  CHECK(it != children.end());
298 
299  children.erase(it);
300  }
301 
302  void addChild(Node* child)
303  {
304  // Sanity check: don't allow duplicates to be inserted.
305  auto it = std::find(children.begin(), children.end(), child);
306  CHECK(it == children.end());
307 
308  // If we're inserting an inactive leaf, place it at the end of the
309  // `children` vector; otherwise, place it at the beginning. This
310  // maintains ordering invariant (1) above. It is up to the caller
311  // to maintain invariant (2) -- e.g., by marking the tree dirty.
312  if (child->kind == INACTIVE_LEAF) {
313  children.push_back(child);
314  } else {
315  children.insert(children.begin(), child);
316  }
317  }
318 
319  // Allocation for a node.
320  struct Allocation
321  {
322  Allocation() : count(0) {}
323 
324  void add(const SlaveID& slaveId, const Resources& toAdd)
325  {
326  // Add shared resources to the allocated quantities when the same
327  // resources don't already exist in the allocation.
328  const Resources sharedToAdd = toAdd.shared()
329  .filter([this, slaveId](const Resource& resource) {
330  return !resources[slaveId].contains(resource);
331  });
332 
333  const Resources quantitiesToAdd =
334  (toAdd.nonShared() + sharedToAdd).createStrippedScalarQuantity();
335 
336  resources[slaveId] += toAdd;
337  scalarQuantities += quantitiesToAdd;
338 
339  foreach (const Resource& resource, quantitiesToAdd) {
340  totals[resource.name()] += resource.scalar();
341  }
342 
343  count++;
344  }
345 
346  void subtract(const SlaveID& slaveId, const Resources& toRemove)
347  {
348  CHECK(resources.contains(slaveId));
349  CHECK(resources.at(slaveId).contains(toRemove))
350  << "Resources " << resources.at(slaveId) << " at agent " << slaveId
351  << " does not contain " << toRemove;
352 
353  resources[slaveId] -= toRemove;
354 
355  // Remove shared resources from the allocated quantities when there
356  // are no instances of same resources left in the allocation.
357  const Resources sharedToRemove = toRemove.shared()
358  .filter([this, slaveId](const Resource& resource) {
359  return !resources[slaveId].contains(resource);
360  });
361 
362  const Resources quantitiesToRemove =
363  (toRemove.nonShared() + sharedToRemove).createStrippedScalarQuantity();
364 
365  foreach (const Resource& resource, quantitiesToRemove) {
366  totals[resource.name()] -= resource.scalar();
367  }
368 
369  CHECK(scalarQuantities.contains(quantitiesToRemove))
370  << scalarQuantities << " does not contain " << quantitiesToRemove;
371 
372  scalarQuantities -= quantitiesToRemove;
373 
374  if (resources[slaveId].empty()) {
375  resources.erase(slaveId);
376  }
377  }
378 
379  void update(
380  const SlaveID& slaveId,
381  const Resources& oldAllocation,
382  const Resources& newAllocation)
383  {
384  const Resources oldAllocationQuantity =
385  oldAllocation.createStrippedScalarQuantity();
386  const Resources newAllocationQuantity =
387  newAllocation.createStrippedScalarQuantity();
388 
389  CHECK(resources.contains(slaveId));
390  CHECK(resources[slaveId].contains(oldAllocation))
391  << "Resources " << resources[slaveId] << " at agent " << slaveId
392  << " does not contain " << oldAllocation;
393 
394  CHECK(scalarQuantities.contains(oldAllocationQuantity))
395  << scalarQuantities << " does not contain " << oldAllocationQuantity;
396 
397  resources[slaveId] -= oldAllocation;
398  resources[slaveId] += newAllocation;
399 
400  scalarQuantities -= oldAllocationQuantity;
401  scalarQuantities += newAllocationQuantity;
402 
403  foreach (const Resource& resource, oldAllocationQuantity) {
404  totals[resource.name()] -= resource.scalar();
405  }
406 
407  foreach (const Resource& resource, newAllocationQuantity) {
408  totals[resource.name()] += resource.scalar();
409  }
410  }
411 
412  // We store the number of times this client has been chosen for
413  // allocation so that we can fairly share the resources across
414  // clients that have the same share. Note that this information is
415  // not persisted across master failovers, but since the point is
416  // to equalize the `count` across clients of the same `share`
417  // having allocations restart at 0 after a master failover should
418  // be sufficient (famous last words.)
419  uint64_t count;
420 
421  // We maintain multiple copies of each shared resource allocated
422  // to a client, where the number of copies represents the number
423  // of times this shared resource has been allocated to (and has
424  // not been recovered from) a specific client.
426 
427  // Similarly, we aggregate scalars across slaves and omit information
428  // about dynamic reservations, persistent volumes and sharedness of
429  // the corresponding resource. See notes above.
431 
432  // We also store a map version of `scalarQuantities`, mapping
433  // the `Resource::name` to aggregated scalar. This improves the
434  // performance of calculating shares. See MESOS-4694.
435  //
436  // TODO(bmahler): Ideally we do not store `scalarQuantities`
437  // redundantly here, investigate performance improvements to
438  // `Resources` to make this unnecessary.
440  } allocation;
441 
442  // Compares two nodes according to DRF share.
443  static bool compareDRF(const Node* left, const Node* right)
444  {
445  if (left->share != right->share) {
446  return left->share < right->share;
447  }
448 
449  if (left->allocation.count != right->allocation.count) {
450  return left->allocation.count < right->allocation.count;
451  }
452 
453  return left->path < right->path;
454  }
455 };
456 
457 } // namespace allocator {
458 } // namespace master {
459 } // namespace internal {
460 } // namespace mesos {
461 
462 #endif // __MASTER_ALLOCATOR_SORTER_DRF_SORTER_HPP__
Definition: path.hpp:26
Node(const std::string &_name, Kind _kind, Node *_parent)
Definition: sorter.hpp:209
Definition: option.hpp:28
bool isLeaf() const
Definition: sorter.hpp:283
Definition: master.hpp:27
std::stringstream & join(std::stringstream &stream, const std::string &separator, T &&...args)
Definition: strings.hpp:307
std::string clientPath() const
Definition: sorter.hpp:273
std::vector< Node * > children
Definition: sorter.hpp:262
void add(const SlaveID &slaveId, const Resources &toAdd)
Definition: sorter.hpp:324
std::string path
Definition: sorter.hpp:242
Resources filter(const lambda::function< bool(const Resource &)> &predicate) const
Definition: resources.hpp:79
virtual std::vector< std::string > sort()
std::string name
Definition: sorter.hpp:238
virtual void update(const std::string &clientPath, const SlaveID &slaveId, const Resources &oldAllocation, const Resources &newAllocation)
virtual void deactivate(const std::string &clientPath)
Definition: hashmap.hpp:38
const char * kind()
hashmap< SlaveID, Resources > resources
Definition: sorter.hpp:425
struct mesos::internal::master::allocator::DRFSorter::Node::Allocation allocation
An "untyped" PID, used to encapsulate the process ID for lower-layer abstractions (eg...
Definition: pid.hpp:39
void subtract(const SlaveID &slaveId, const Resources &toRemove)
Definition: sorter.hpp:346
void removeChild(const Node *child)
Definition: sorter.hpp:293
virtual const Resources & totalScalarQuantities() const
Resources createStrippedScalarQuantity() const
virtual const Resources & allocationScalarQuantities(const std::string &clientPath) const
Definition: spec.hpp:30
virtual void unallocated(const std::string &clientPath, const SlaveID &slaveId, const Resources &resources)
hashmap< std::string, Value::Scalar > totals
Definition: sorter.hpp:439
static bool compareDRF(const Node *left, const Node *right)
Definition: sorter.hpp:443
virtual void allocated(const std::string &clientPath, const SlaveID &slaveId, const Resources &resources)
virtual void add(const std::string &clientPath)
Resources shared() const
Definition: attributes.hpp:24
Resources nonShared() const
std::set< pid_t > children(pid_t, const std::list< Process > &, bool)
Definition: os.hpp:215
virtual bool contains(const std::string &clientPath) const
virtual void initialize(const Option< std::set< std::string >> &fairnessExcludeResourceNames)
virtual const hashmap< SlaveID, Resources > & allocation(const std::string &clientPath) const
void addChild(Node *child)
Definition: sorter.hpp:302
void update(const SlaveID &slaveId, const Resources &oldAllocation, const Resources &newAllocation)
Definition: sorter.hpp:379
constexpr const char * name
Definition: shell.hpp:43
Try< std::list< std::string > > find(const std::string &directory, const std::string &pattern)
Definition: find.hpp:37
virtual void activate(const std::string &clientPath)
virtual void updateWeight(const std::string &path, double weight)