Apache Mesos
slave.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __SLAVE_HPP__
18 #define __SLAVE_HPP__
19 
20 #include <stdint.h>
21 
22 #include <list>
23 #include <memory>
24 #include <string>
25 #include <vector>
26 
27 #include <mesos/attributes.hpp>
28 #include <mesos/resources.hpp>
29 #include <mesos/type_utils.hpp>
30 
31 #include <mesos/agent/agent.hpp>
32 
34 
36 
38 
40 
44 
46 
47 #include <process/http.hpp>
48 #include <process/future.hpp>
49 #include <process/owned.hpp>
50 #include <process/limiter.hpp>
51 #include <process/process.hpp>
52 #include <process/protobuf.hpp>
53 #include <process/shared.hpp>
54 #include <process/sequence.hpp>
55 
56 #include <stout/boundedhashmap.hpp>
57 #include <stout/bytes.hpp>
59 #include <stout/linkedhashmap.hpp>
60 #include <stout/hashmap.hpp>
61 #include <stout/hashset.hpp>
62 #include <stout/option.hpp>
63 #include <stout/os.hpp>
64 #include <stout/path.hpp>
65 #include <stout/recordio.hpp>
66 #include <stout/uuid.hpp>
67 
68 #include "common/heartbeater.hpp"
69 #include "common/http.hpp"
71 #include "common/recordio.hpp"
72 
73 #include "files/files.hpp"
74 
75 #include "internal/evolve.hpp"
76 
77 #include "messages/messages.hpp"
78 
81 
82 #include "slave/constants.hpp"
84 #include "slave/flags.hpp"
85 #include "slave/gc.hpp"
86 #include "slave/http.hpp"
87 #include "slave/metrics.hpp"
88 #include "slave/paths.hpp"
89 #include "slave/state.hpp"
90 
92 
93 // `REGISTERING` is used as an enum value, but it's actually defined as a
94 // constant in the Windows SDK.
95 #ifdef __WINDOWS__
96 #undef REGISTERING
97 #undef REGISTERED
98 #endif // __WINDOWS__
99 
100 namespace mesos {
101 
102 // Forward declarations.
103 class Authorizer;
104 class DiskProfileAdaptor;
105 
106 namespace internal {
107 namespace slave {
108 
109 // Some forward declarations.
110 class TaskStatusUpdateManager;
111 class Executor;
112 class Framework;
113 
114 struct ResourceProvider;
115 
116 
117 class Slave : public ProtobufProcess<Slave>
118 {
119 public:
120  Slave(const std::string& id,
121  const Flags& flags,
123  Containerizer* containerizer,
124  Files* files,
125  GarbageCollector* gc,
126  TaskStatusUpdateManager* taskStatusUpdateManager,
127  mesos::slave::ResourceEstimator* resourceEstimator,
128  mesos::slave::QoSController* qosController,
129  mesos::SecretGenerator* secretGenerator,
130  const Option<Authorizer*>& authorizer);
131 
132  ~Slave() override;
133 
134  void shutdown(const process::UPID& from, const std::string& message);
135 
136  void registered(
137  const process::UPID& from,
138  const SlaveID& slaveId,
139  const MasterSlaveConnection& connection);
140 
141  void reregistered(
142  const process::UPID& from,
143  const SlaveID& slaveId,
144  const std::vector<ReconcileTasksMessage>& reconciliations,
145  const MasterSlaveConnection& connection);
146 
147  void doReliableRegistration(Duration maxBackoff);
148 
149  // TODO(mzhu): Combine this with `runTask()' and replace all `runTask()'
150  // mock with `run()` mock.
152  const process::UPID& from,
153  RunTaskMessage&& runTaskMessage);
154 
155  // Made 'virtual' for Slave mocking.
156  virtual void runTask(
157  const process::UPID& from,
158  const FrameworkInfo& frameworkInfo,
159  const FrameworkID& frameworkId,
160  const process::UPID& pid,
161  const TaskInfo& task,
162  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
164 
165  void run(
166  const FrameworkInfo& frameworkInfo,
167  ExecutorInfo executorInfo,
168  Option<TaskInfo> task,
169  Option<TaskGroupInfo> taskGroup,
170  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
171  const process::UPID& pid,
172  const Option<bool>& launchExecutor);
173 
174  // Made 'virtual' for Slave mocking.
175  //
176  // This function returns a future so that we can encapsulate a task(group)
177  // launch operation (from agent receiving the run message to the completion
178  // of `_run()`) into a single future. This includes all the asynchronous
179  // steps (currently two: unschedule GC and task authorization) prior to the
180  // executor launch.
182  const FrameworkInfo& frameworkInfo,
183  const ExecutorInfo& executorInfo,
184  const Option<TaskInfo>& task,
185  const Option<TaskGroupInfo>& taskGroup,
186  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
187  const Option<bool>& launchExecutor);
188 
189  // Made 'virtual' for Slave mocking.
190  virtual void __run(
191  const FrameworkInfo& frameworkInfo,
192  const ExecutorInfo& executorInfo,
193  const Option<TaskInfo>& task,
194  const Option<TaskGroupInfo>& taskGroup,
195  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
196  const Option<bool>& launchExecutor);
197 
198  // This is called when the resource limits of the container have
199  // been updated for the given tasks and task groups. If the update is
200  // successful, we flush the given tasks to the executor by sending
201  // RunTaskMessages or `LAUNCH_GROUP` events.
202  //
203  // Made 'virtual' for Slave mocking.
204  virtual void ___run(
205  const process::Future<Nothing>& future,
206  const FrameworkID& frameworkId,
207  const ExecutorID& executorId,
208  const ContainerID& containerId,
209  const std::vector<TaskInfo>& tasks,
210  const std::vector<TaskGroupInfo>& taskGroups);
211 
212  // TODO(mzhu): Combine this with `runTaskGroup()' and replace all
213  // `runTaskGroup()' mock with `run()` mock.
215  const process::UPID& from,
216  RunTaskGroupMessage&& runTaskGroupMessage);
217 
218  // Made 'virtual' for Slave mocking.
219  virtual void runTaskGroup(
220  const process::UPID& from,
221  const FrameworkInfo& frameworkInfo,
222  const ExecutorInfo& executorInfo,
223  const TaskGroupInfo& taskGroupInfo,
224  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
225  const Option<bool>& launchExecutor);
226 
227  // Made 'virtual' for Slave mocking.
228  virtual void killTask(
229  const process::UPID& from,
230  const KillTaskMessage& killTaskMessage);
231 
232  // Made 'virtual' for Slave mocking.
233  virtual void shutdownExecutor(
234  const process::UPID& from,
235  const FrameworkID& frameworkId,
236  const ExecutorID& executorId);
237 
238  // Shut down an executor. This is a two phase process. First, an
239  // executor receives a shut down message (shut down phase), then
240  // after a configurable timeout the slave actually forces a kill
241  // (kill phase, via the isolator) if the executor has not
242  // exited.
243  //
244  // Made 'virtual' for Slave mocking.
245  virtual void _shutdownExecutor(Framework* framework, Executor* executor);
246 
247  void shutdownFramework(
248  const process::UPID& from,
249  const FrameworkID& frameworkId);
250 
251  void schedulerMessage(
252  const SlaveID& slaveId,
253  const FrameworkID& frameworkId,
254  const ExecutorID& executorId,
255  const std::string& data);
256 
257  void updateFramework(
258  const UpdateFrameworkMessage& message);
259 
260  void checkpointResources(
261  std::vector<Resource> checkpointedResources,
262  bool changeTotal);
263 
265  const std::vector<Resource>& checkpointedResources);
266 
267  void applyOperation(const ApplyOperationMessage& message);
268 
269  // Reconciles pending operations with the master. This is necessary to handle
270  // cases in which operations were dropped in transit, or in which an agent's
271  // `UpdateSlaveMessage` was sent at the same time as an operation was en route
272  // from the master to the agent.
273  void reconcileOperations(const ReconcileOperationsMessage& message);
274 
275  void subscribe(
277  const executor::Call::Subscribe& subscribe,
278  Framework* framework,
279  Executor* executor);
280 
281  void registerExecutor(
282  const process::UPID& from,
283  const FrameworkID& frameworkId,
284  const ExecutorID& executorId);
285 
286  // Called when an executor reregisters with a recovering slave.
287  // 'tasks' : Unacknowledged tasks (i.e., tasks that the executor
288  // driver never received an ACK for.)
289  // 'updates' : Unacknowledged updates.
290  void reregisterExecutor(
291  const process::UPID& from,
292  const FrameworkID& frameworkId,
293  const ExecutorID& executorId,
294  const std::vector<TaskInfo>& tasks,
295  const std::vector<StatusUpdate>& updates);
296 
297  void _reregisterExecutor(
298  const process::Future<Nothing>& future,
299  const FrameworkID& frameworkId,
300  const ExecutorID& executorId,
301  const ContainerID& containerId);
302 
303  void executorMessage(
304  const SlaveID& slaveId,
305  const FrameworkID& frameworkId,
306  const ExecutorID& executorId,
307  const std::string& data);
308 
309  void ping(const process::UPID& from, bool connected);
310 
311  // Handles the task status update.
312  // NOTE: If 'pid' is a valid UPID an ACK is sent to this pid
313  // after the update is successfully handled. If pid == UPID()
314  // no ACK is sent. The latter is used by the slave to send
315  // status updates it generated (e.g., TASK_LOST). If pid == None()
316  // an ACK is sent to the corresponding HTTP based executor.
317  // NOTE: StatusUpdate is passed by value because it is modified
318  // to ensure source field is set.
319  void statusUpdate(StatusUpdate update, const Option<process::UPID>& pid);
320 
321  // Called when the slave receives a `StatusUpdate` from an executor
322  // and the slave needs to retrieve the container status for the
323  // container associated with the executor.
324  void _statusUpdate(
325  StatusUpdate update,
326  const Option<process::UPID>& pid,
327  const ExecutorID& executorId,
328  const Option<process::Future<ContainerStatus>>& containerStatus);
329 
330  // Continue handling the status update after optionally updating the
331  // container's resources.
332  void __statusUpdate(
333  const Option<process::Future<Nothing>>& future,
334  const StatusUpdate& update,
335  const Option<process::UPID>& pid,
336  const ExecutorID& executorId,
337  const ContainerID& containerId,
338  bool checkpoint);
339 
340  // This is called when the task status update manager finishes
341  // handling the update. If the handling is successful, an
342  // acknowledgment is sent to the executor.
343  void ___statusUpdate(
344  const process::Future<Nothing>& future,
345  const StatusUpdate& update,
346  const Option<process::UPID>& pid);
347 
348  // This is called by task status update manager to forward a status
349  // update to the master. Note that the latest state of the task is
350  // added to the update before forwarding.
351  void forward(StatusUpdate update);
352 
353  // This is called by the operation status update manager to forward operation
354  // status updates to the master.
355  void sendOperationStatusUpdate(const UpdateOperationStatusMessage& update);
356 
358  const process::UPID& from,
359  const SlaveID& slaveId,
360  const FrameworkID& frameworkId,
361  const TaskID& taskId,
362  const std::string& uuid);
363 
365  const process::Future<bool>& future,
366  const TaskID& taskId,
367  const FrameworkID& frameworkId,
368  const UUID& uuid);
369 
371  const process::UPID& from,
372  const AcknowledgeOperationStatusMessage& acknowledgement);
373 
374  void executorLaunched(
375  const FrameworkID& frameworkId,
376  const ExecutorID& executorId,
377  const ContainerID& containerId,
379 
380  // Made 'virtual' for Slave mocking.
381  virtual void executorTerminated(
382  const FrameworkID& frameworkId,
383  const ExecutorID& executorId,
384  const process::Future<Option<
385  mesos::slave::ContainerTermination>>& termination);
386 
387  // NOTE: Pulled these to public to make it visible for testing.
388  // TODO(vinod): Make tests friends to this class instead.
389 
390  // Garbage collects the directories based on the current disk usage.
391  // TODO(vinod): Instead of making this function public, we need to
392  // mock both GarbageCollector (and pass it through slave's constructor)
393  // and os calls.
395 
396  // Garbage collect image layers based on the disk usage of image
397  // store.
399 
400  // Invoked whenever the detector detects a change in masters.
401  // Made public for testing purposes.
402  void detected(const process::Future<Option<MasterInfo>>& _master);
403 
404  enum State
405  {
406  RECOVERING, // Slave is doing recovery.
407  DISCONNECTED, // Slave is not connected to the master.
408  RUNNING, // Slave has (re-)registered.
409  TERMINATING, // Slave is shutting down.
410  } state;
411 
412  // Describes information about agent recovery.
414  {
415  // Flag to indicate if recovery, including reconciling
416  // (i.e., reconnect/kill) with executors is finished.
418 
419  // Flag to indicate that HTTP based executors can
420  // subscribe with the agent. We allow them to subscribe
421  // after the agent recovers the containerizer.
422  bool reconnect = false;
423  } recoveryInfo;
424 
425  // TODO(benh): Clang requires members to be public in order to take
426  // their address which we do in tests (for things like
427  // FUTURE_DISPATCH).
428 // protected:
429  void initialize() override;
430  void finalize() override;
431  void exited(const process::UPID& pid) override;
432 
434  const FrameworkID& frameworkId,
435  const ExecutorID& executorId,
436  const ContainerID& containerId);
437 
438  // If an executor is launched for a task group, `taskInfo` would not be set.
439  void launchExecutor(
440  const Option<process::Future<Secret>>& future,
441  const FrameworkID& frameworkId,
442  const ExecutorID& executorId,
443  const Option<TaskInfo>& taskInfo);
444 
445  void fileAttached(const process::Future<Nothing>& result,
446  const std::string& path,
447  const std::string& virtualPath);
448 
449  Nothing detachFile(const std::string& path);
450 
451  // TODO(qianzhang): This is a workaround to make the default executor task's
452  // volume directory visible in MESOS UI. It handles two cases:
453  // 1. The task has disk resources specified. In this case any disk resources
454  // specified for the task are mounted on the top level container since
455  // currently all resources of nested containers are merged in the top
456  // level executor container. To make sure the task can access any volumes
457  // specified in its disk resources from its sandbox, a workaround was
458  // introduced to the default executor in MESOS-7225, i.e., adding a
459  // `SANDBOX_PATH` volume with type `PARENT` to the corresponding nested
460  // container. This volume gets translated into a bind mount in the nested
461  // container's mount namespace, which is not visible in Mesos UI because
462  // it operates in the host namespace. See MESOS-8279 for details.
463  // 2. The executor has disk resources specified and the task's ContainerInfo
464  // has a `SANDBOX_PATH` volume with type `PARENT` specified to share the
465  // executor's disk volume. Similar to the first case, this `SANDBOX_PATH`
466  // volume gets translated into a bind mount which is not visible in Mesos
467  // UI. See MESOS-8565 for details.
468  //
469  // To make the task's volume directory visible in Mesos UI, here we attach the
470  // executor's volume directory to it so that it can be accessed via the /files
471  // endpoint. So when users browse task's volume directory in Mesos UI, what
472  // they actually browse is the executor's volume directory. Note when calling
473  // `Files::attach()`, the third argument `authorized` is not specified because
474  // it is already specified when we do the attach for the executor's sandbox
475  // and it also applies to the executor's tasks. Note that for the second case
476  // we can not do the attach when the task's ContainerInfo has a `SANDBOX_PATH`
477  // volume with type `PARENT` but the executor has NO disk resources, because
478  // in such case the attach will fail due to the executor's volume directory
479  // not existing which will actually be created by the `volume/sandbox_path`
480  // isolator when launching the nested container. But if the executor has disk
481  // resources, then we will not have this issue since the executor's volume
482  // directory will be created by the `filesystem/linux` isolator when launching
483  // the executor before we do the attach.
485  const ExecutorInfo& executorInfo,
486  const ContainerID& executorContainerId,
487  const Task& task);
488 
489  // TODO(qianzhang): Remove the task's volume directory from the /files
490  // endpoint. This is a workaround for MESOS-8279 and MESOS-8565.
492  const ExecutorInfo& executorInfo,
493  const ContainerID& executorContainerId,
494  const std::vector<Task>& tasks);
495 
496  // Triggers a re-detection of the master when the slave does
497  // not receive a ping.
499 
500  // Made virtual for testing purpose.
501  virtual void authenticate(Duration minTimeout, Duration maxTimeout);
502 
503  // Helper routines to lookup a framework/executor.
504  Framework* getFramework(const FrameworkID& frameworkId) const;
505 
507  const FrameworkID& frameworkId,
508  const ExecutorID& executorId) const;
509 
510  Executor* getExecutor(const ContainerID& containerId) const;
511 
512  // Returns the ExecutorInfo associated with a TaskInfo. If the task has no
513  // ExecutorInfo, then we generate an ExecutorInfo corresponding to the
514  // command executor.
515  ExecutorInfo getExecutorInfo(
516  const FrameworkInfo& frameworkInfo,
517  const TaskInfo& task) const;
518 
519  // Shuts down the executor if it did not register yet.
521  const FrameworkID& frameworkId,
522  const ExecutorID& executorId,
523  const ContainerID& containerId);
524 
525  // Cleans up all un-reregistered executors during recovery.
527 
528  // This function returns the max age of executor/slave directories allowed,
529  // given a disk usage. This value could be used to tune gc.
530  Duration age(double usage);
531 
532  // Checks the current disk usage and schedules for gc as necessary.
533  void checkDiskUsage();
534 
535  // Checks the current container image disk usage and trigger image
536  // gc if necessary.
537  void checkImageDiskUsage();
538 
539  // Recovers the slave, task status update manager and isolator.
541 
542  // This is called after 'recover()'. If 'flags.reconnect' is
543  // 'reconnect', the slave attempts to reconnect to any old live
544  // executors. Otherwise, the slave attempts to shutdown/kill them.
546 
547  // This is a helper to call recover() on the containerizer at the end of
548  // recover() and before __recover().
549  // TODO(idownes): Remove this when we support defers to objects.
551  const Option<state::SlaveState>& state);
552 
553  // This is called when recovery finishes.
554  // Made 'virtual' for Slave mocking.
555  virtual void __recover(const process::Future<Nothing>& future);
556 
557  // Helper to recover a framework from the specified state.
558  void recoverFramework(
559  const state::FrameworkState& state,
560  const hashset<ExecutorID>& executorsToRecheckpoint,
561  const hashmap<ExecutorID, hashset<TaskID>>& tasksToRecheckpoint);
562 
563  // Removes and garbage collects the executor.
564  void removeExecutor(Framework* framework, Executor* executor);
565 
566  // Removes and garbage collects the framework.
567  // Made 'virtual' for Slave mocking.
568  virtual void removeFramework(Framework* framework);
569 
570  // Schedules a 'path' for gc based on its modification time.
571  process::Future<Nothing> garbageCollect(const std::string& path);
572 
573  // Called when the slave was signaled from the specified user.
574  void signaled(int signal, int uid);
575 
576  // Made 'virtual' for Slave mocking.
577  virtual void qosCorrections();
578 
579  // Made 'virtual' for Slave mocking.
580  virtual void _qosCorrections(
582  mesos::slave::QoSCorrection>>& correction);
583 
584  // Returns the resource usage information for all executors.
586 
587  // Handle the second phase of shutting down an executor for those
588  // executors that have not properly shutdown within a timeout.
590  const FrameworkID& frameworkId,
591  const ExecutorID& executorId,
592  const ContainerID& containerId);
593 
594 private:
595  friend class Executor;
596  friend class Framework;
597  friend class Http;
598 
599  friend struct Metrics;
600 
601  Slave(const Slave&) = delete;
602  Slave& operator=(const Slave&) = delete;
603 
604  void _authenticate(Duration currentMinTimeout, Duration currentMaxTimeout);
605 
606  // Process creation of persistent volumes (for CREATE) and/or deletion
607  // of persistent volumes (for DESTROY) as a part of handling
608  // checkpointed resources, and commit the checkpointed resources on
609  // successful completion of all the operations.
610  Try<Nothing> syncCheckpointedResources(
611  const Resources& newCheckpointedResources);
612 
613  process::Future<bool> authorizeTask(
614  const TaskInfo& task,
615  const FrameworkInfo& frameworkInfo);
616 
617  process::Future<bool> authorizeSandboxAccess(
619  const FrameworkID& frameworkId,
620  const ExecutorID& executorId);
621 
622  void sendExecutorTerminatedStatusUpdate(
623  const TaskID& taskId,
624  const process::Future<Option<
625  mesos::slave::ContainerTermination>>& termination,
626  const FrameworkID& frameworkId,
627  const Executor* executor);
628 
629  void sendExitedExecutorMessage(
630  const FrameworkID& frameworkId,
631  const ExecutorID& executorId,
632  const Option<int>& status = None());
633 
634  // Forwards the current total of oversubscribed resources.
635  void forwardOversubscribed();
636  void _forwardOversubscribed(
637  const process::Future<Resources>& oversubscribable);
638 
639  // Helper functions to generate `UpdateSlaveMessage` for either
640  // just updates to resource provider-related information, or both
641  // resource provider-related information and oversubscribed
642  // resources.
643  UpdateSlaveMessage generateResourceProviderUpdate() const;
644  UpdateSlaveMessage generateUpdateSlaveMessage() const;
645 
646  void handleResourceProviderMessage(
648 
649  void addOperation(Operation* operation);
650 
651  // Transitions the operation, and recovers resource if the operation becomes
652  // terminal.
653  void updateOperation(
654  Operation* operation,
655  const UpdateOperationStatusMessage& update);
656 
657  // Update the `latest_status` of the operation if it is not terminal.
658  void updateOperationLatestStatus(
659  Operation* operation,
660  const OperationStatus& status);
661 
662  void removeOperation(Operation* operation);
663 
664  Operation* getOperation(const UUID& uuid) const;
665 
666  void addResourceProvider(ResourceProvider* resourceProvider);
667  ResourceProvider* getResourceProvider(const ResourceProviderID& id) const;
668 
669  void apply(Operation* operation);
670 
671  // Publish all resources that are needed to run the current set of
672  // tasks and executors on the agent.
673  // NOTE: The `additionalResources` parameter is for publishing
674  // additional task resources when launching executors. Consider
675  // removing this parameter once we revisited MESOS-600.
676  process::Future<Nothing> publishResources(
677  const Option<Resources>& additionalResources = None());
678 
679  // PullGauge methods.
680  double _frameworks_active()
681  {
682  return static_cast<double>(frameworks.size());
683  }
684 
685  double _uptime_secs()
686  {
687  return (process::Clock::now() - startTime).secs();
688  }
689 
690  double _registered()
691  {
692  return master.isSome() ? 1 : 0;
693  }
694 
695  double _tasks_staging();
696  double _tasks_starting();
697  double _tasks_running();
698  double _tasks_killing();
699 
700  double _executors_registering();
701  double _executors_running();
702  double _executors_terminating();
703 
704  double _executor_directory_max_allowed_age_secs();
705 
706  double _resources_total(const std::string& name);
707  double _resources_used(const std::string& name);
708  double _resources_percent(const std::string& name);
709 
710  double _resources_revocable_total(const std::string& name);
711  double _resources_revocable_used(const std::string& name);
712  double _resources_revocable_percent(const std::string& name);
713 
714  // Checks whether the two `SlaveInfo` objects are considered
715  // compatible based on the value of the `--configuration_policy`
716  // flag.
717  Try<Nothing> compatible(
718  const SlaveInfo& previous,
719  const SlaveInfo& current) const;
720 
721  void initializeResourceProviderManager(
722  const Flags& flags,
723  const SlaveID& slaveId);
724 
725  protobuf::master::Capabilities requiredMasterCapabilities;
726 
727  const Flags flags;
728 
729  const Http http;
730 
731  SlaveInfo info;
732 
733  protobuf::slave::Capabilities capabilities;
734 
735  // Resources that are checkpointed by the slave.
736  Resources checkpointedResources;
737 
738  // The current total resources of the agent, i.e., `info.resources()` with
739  // checkpointed resources applied and resource provider resources.
740  Resources totalResources;
741 
743 
745 
746  // Note that these frameworks are "completed" only in that
747  // they no longer have any active tasks or executors on this
748  // particular agent.
749  //
750  // TODO(bmahler): Implement a more accurate framework lifecycle
751  // in the agent code, ideally the master can inform the agent
752  // when a framework is actually completed, and the agent can
753  // perhaps store a cache of "idle" frameworks. See MESOS-7890.
755 
757 
758  Containerizer* containerizer;
759 
760  Files* files;
761 
763 
764  process::Time startTime;
765 
766  GarbageCollector* gc;
767 
768  TaskStatusUpdateManager* taskStatusUpdateManager;
769 
770  OperationStatusUpdateManager operationStatusUpdateManager;
771 
772  // Master detection future.
774 
775  // Master's ping timeout value, updated on reregistration.
776  Duration masterPingTimeout;
777 
778  // Timer for triggering re-detection when no ping is received from
779  // the master.
780  process::Timer pingTimer;
781 
782  // Timer for triggering agent (re)registration after detecting a new master.
783  process::Timer agentRegistrationTimer;
784 
785  // Root meta directory containing checkpointed data.
786  const std::string metaDir;
787 
788  // Indicates the number of errors ignored in "--no-strict" recovery mode.
789  unsigned int recoveryErrors;
790 
791  Option<Credential> credential;
792 
793  // Authenticatee name as supplied via flags.
794  std::string authenticateeName;
795 
796  Authenticatee* authenticatee;
797 
798  // Indicates if an authentication attempt is in progress.
799  Option<process::Future<bool>> authenticating;
800 
801  // Indicates if the authentication is successful.
802  bool authenticated;
803 
804  // Indicates if a new authentication attempt should be enforced.
805  bool reauthenticate;
806 
807  // Maximum age of executor directories. Will be recomputed
808  // periodically every flags.disk_watch_interval.
809  Duration executorDirectoryMaxAllowedAge;
810 
811  mesos::slave::ResourceEstimator* resourceEstimator;
812 
813  mesos::slave::QoSController* qosController;
814 
815  std::shared_ptr<DiskProfileAdaptor> diskProfileAdaptor;
816 
817  mesos::SecretGenerator* secretGenerator;
818 
819  const Option<Authorizer*> authorizer;
820 
821  // The most recent estimate of the total amount of oversubscribed
822  // (allocated and oversubscribable) resources.
823  Option<Resources> oversubscribedResources;
824 
825  process::Owned<ResourceProviderManager> resourceProviderManager;
826  process::Owned<LocalResourceProviderDaemon> localResourceProviderDaemon;
827 
828  // Local resource providers known by the agent.
830 
831  // Used to establish the relationship between the operation and the
832  // resources that the operation is operating on. Each resource
833  // provider will keep a resource version UUID, and change it when it
834  // believes that the resources from this resource provider are out
835  // of sync from the master's view. The master will keep track of
836  // the last known resource version UUID for each resource provider,
837  // and attach the resource version UUID in each operation it sends
838  // out. The resource provider should reject operations that have a
839  // different resource version UUID than that it maintains, because
840  // this means the operation is operating on resources that might
841  // have already been invalidated.
842  UUID resourceVersion;
843 
844  // Keeps track of the following:
845  // (1) Pending operations for resources from the agent.
846  // (2) Pending operations or terminal operations that have
847  // unacknowledged status updates for resource provider
848  // provided resources.
849  hashmap<UUID, Operation*> operations;
850 };
851 
852 
853 std::ostream& operator<<(std::ostream& stream, const Executor& executor);
854 
855 
856 // Information describing an executor.
857 class Executor
858 {
859 public:
860  Executor(
861  Slave* slave,
862  const FrameworkID& frameworkId,
863  const ExecutorInfo& info,
864  const ContainerID& containerId,
865  const std::string& directory,
866  const Option<std::string>& user,
867  bool checkpoint);
868 
869  ~Executor();
870 
871  // Note that these tasks will also be tracked within `queuedTasks`.
872  void enqueueTaskGroup(const TaskGroupInfo& taskGroup);
873 
874  void enqueueTask(const TaskInfo& task);
875  Option<TaskInfo> dequeueTask(const TaskID& taskId);
876  Task* addLaunchedTask(const TaskInfo& task);
877  void completeTask(const TaskID& taskId);
878  void checkpointExecutor();
879  void checkpointTask(const TaskInfo& task);
880  void checkpointTask(const Task& task);
881 
882  void recoverTask(const state::TaskState& state, bool recheckpointTask);
883 
884  Try<Nothing> updateTaskState(const TaskStatus& status);
885 
886  // Returns true if there are any queued/launched/terminated tasks.
887  bool incompleteTasks();
888 
889  // Returns true if the agent ever sent any tasks to this executor.
890  // More precisely, this function returns whether either:
891  //
892  // (1) There are terminated/completed tasks with a
893  // SOURCE_EXECUTOR status.
894  //
895  // (2) `launchedTasks` is not empty.
896  //
897  // If this function returns false and there are no queued tasks,
898  // we probably (see TODO below) have killed or dropped all of its
899  // initial tasks, in which case the agent will shut down the executor.
900  //
901  // TODO(mzhu): Note however, that since we look through the cache
902  // of completed tasks, we can have false positives when a task
903  // that was delivered to the executor has been evicted from the
904  // completed task cache by tasks that have been killed by the
905  // agent before delivery. This should be fixed.
906  //
907  // NOTE: This function checks whether any tasks has ever been sent,
908  // this does not necessarily mean the executor has ever received
909  // any tasks. Specifically, tasks in `launchedTasks` may be dropped
910  // before received by the executor if the agent restarts.
911  bool everSentTask() const;
912 
913  // Sends a message to the connected executor.
914  template <typename Message>
915  void send(const Message& message)
916  {
917  if (state == REGISTERING || state == TERMINATED) {
918  LOG(WARNING) << "Attempting to send message to disconnected"
919  << " executor " << *this << " in state " << state;
920  }
921 
922  if (http.isSome()) {
923  if (!http->send(message)) {
924  LOG(WARNING) << "Unable to send event to executor " << *this
925  << ": connection closed";
926  }
927  } else if (pid.isSome()) {
928  slave->send(pid.get(), message);
929  } else {
930  LOG(WARNING) << "Unable to send event to executor " << *this
931  << ": unknown connection type";
932  }
933  }
934 
935  // Returns true if this executor is generated by Mesos for a command
936  // task (either command executor for MesosContainerizer or docker
937  // executor for DockerContainerizer).
938  bool isGeneratedForCommandTask() const;
939 
940  // Closes the HTTP connection.
941  void closeHttpConnection();
942 
943  // Returns the task group associated with the task.
944  Option<TaskGroupInfo> getQueuedTaskGroup(const TaskID& taskId);
945 
947 
948  enum State
949  {
950  REGISTERING, // Executor is launched but not (re-)registered yet.
951  RUNNING, // Executor has (re-)registered.
952  TERMINATING, // Executor is being shutdown/killed.
953  TERMINATED, // Executor has terminated but there might be pending updates.
954  } state;
955 
956  // We store the pointer to 'Slave' to get access to its methods
957  // variables. One could imagine 'Executor' as being an inner class
958  // of the 'Slave' class.
960 
961  const ExecutorID id;
962  const ExecutorInfo info;
963 
964  const FrameworkID frameworkId;
965 
966  const ContainerID containerId;
967 
968  const std::string directory;
969 
970  // The sandbox will be owned by this user and the executor will
971  // run as this user. This can be set to None when --switch_user
972  // is false or when compiled for Windows.
974 
975  const bool checkpoint;
976 
977  // An Executor can either be connected via HTTP or by libprocess
978  // message passing. The following are the possible states:
979  //
980  // Agent State Executor State http pid Executor Type
981  // ----------- -------------- ---- ---- -------------
982  // RECOVERING REGISTERING None UPID() Unknown
983  // REGISTERING None Some Libprocess
984  // REGISTERING None None HTTP
985  //
986  // * REGISTERING None None Not known yet
987  // * * None Some Libprocess
988  // * * Some None HTTP
992 
994 
995  // Tasks can be found in one of the following four data structures:
996  //
997  // TODO(bmahler): Make these private to enforce that the task
998  // lifecycle helper functions are not being bypassed, and provide
999  // public views into them.
1000 
1001  // Not yet launched tasks. This also includes tasks from `queuedTaskGroups`.
1003 
1004  // Not yet launched task groups. This is needed for correctly sending
1005  // TASK_KILLED status updates for all tasks in the group if any of the
1006  // tasks were killed before the executor could register with the agent.
1007  std::vector<TaskGroupInfo> queuedTaskGroups;
1008 
1009  // Running.
1011 
1012  // Terminated but pending updates.
1014 
1015  // Terminated and updates acked.
1016  // NOTE: We use a shared pointer for Task because clang doesn't like
1017  // stout's implementation of circular_buffer with Task (the Boost code
1018  // used internally by stout attempts to do some memset's which are unsafe).
1019  circular_buffer<std::shared_ptr<Task>> completedTasks;
1020 
1021  // When the slave initiates a destroy of the container, we expect a
1022  // termination to occur. The 'pendingTermation' indicates why the
1023  // slave initiated the destruction and will influence the
1024  // information sent in the status updates for any remaining
1025  // non-terminal tasks.
1027 
1028 private:
1029  Executor(const Executor&) = delete;
1030  Executor& operator=(const Executor&) = delete;
1031 
1032  bool isGeneratedForCommandTask_;
1033 };
1034 
1035 
1036 // Information about a framework.
1038 {
1039 public:
1040  Framework(
1041  Slave* slave,
1042  const Flags& slaveFlags,
1043  const FrameworkInfo& info,
1044  const Option<process::UPID>& pid);
1045 
1046  ~Framework();
1047 
1048  // Returns whether the framework is idle, where idle is
1049  // defined as having no activity:
1050  // (1) The framework has no non-terminal tasks and executors.
1051  // (2) All status updates have been acknowledged.
1052  //
1053  // TODO(bmahler): The framework should also not be considered
1054  // idle if there are unacknowledged updates for "pending" tasks.
1055  bool idle() const;
1056 
1057  void checkpointFramework() const;
1058 
1059  const FrameworkID id() const { return info.id(); }
1060 
1061  Try<Executor*> addExecutor(const ExecutorInfo& executorInfo);
1062  Executor* getExecutor(const ExecutorID& executorId) const;
1063  Executor* getExecutor(const TaskID& taskId) const;
1064 
1065  void destroyExecutor(const ExecutorID& executorId);
1066 
1067  void recoverExecutor(
1068  const state::ExecutorState& state,
1069  bool recheckpointExecutor,
1070  const hashset<TaskID>& tasksToRecheckpoint);
1071 
1072  void addPendingTask(
1073  const ExecutorID& executorId,
1074  const TaskInfo& task);
1075 
1076  // Note that these tasks will also be tracked within `pendingTasks`.
1077  void addPendingTaskGroup(
1078  const ExecutorID& executorId,
1079  const TaskGroupInfo& taskGroup);
1080 
1081  bool hasTask(const TaskID& taskId) const;
1082  bool isPending(const TaskID& taskId) const;
1083 
1084  // Returns the task group associated with a pending task.
1085  Option<TaskGroupInfo> getTaskGroupForPendingTask(const TaskID& taskId);
1086 
1087  // Returns whether the pending task was removed.
1088  bool removePendingTask(const TaskID& taskId);
1089 
1090  Option<ExecutorID> getExecutorIdForPendingTask(const TaskID& taskId) const;
1091 
1092  Resources allocatedResources() const;
1093 
1094  enum State
1095  {
1096  RUNNING, // First state of a newly created framework.
1097  TERMINATING, // Framework is shutting down in the cluster.
1098  } state;
1099 
1100  // We store the pointer to 'Slave' to get access to its methods and
1101  // variables. One could imagine 'Framework' being an inner class of
1102  // the 'Slave' class.
1104 
1105  FrameworkInfo info;
1106 
1108 
1109  // Frameworks using the scheduler driver will have a 'pid',
1110  // which allows us to send executor messages directly to the
1111  // driver. Frameworks using the HTTP API (in 0.24.0) will
1112  // not have a 'pid', in which case executor messages are
1113  // sent through the master.
1115 
1116  // Executors can be found in one of the following
1117  // data structures:
1118  //
1119  // TODO(bmahler): Make these private to enforce that
1120  // the executors lifecycle helper functions are not
1121  // being bypassed, and provide public views into them.
1122 
1123  // Executors with pending tasks.
1125 
1126  // Sequences in this map are used to enforce the order of tasks launched on
1127  // each executor.
1128  //
1129  // Note on the lifecycle of the sequence: if the corresponding executor struct
1130  // has not been created, we tie the lifecycle of the sequence to the first
1131  // task in the sequence (which must have the `launch_executor` flag set to
1132  // true modulo MESOS-3870). If the task fails to launch before creating the
1133  // executor struct, we will delete the sequence. Once the executor struct is
1134  // created, we tie the lifecycle of the sequence to the executor struct.
1135  //
1136  // TODO(mzhu): Create the executor struct early and put the sequence in it.
1138 
1139  // Pending task groups. This is needed for correctly sending
1140  // TASK_KILLED status updates for all tasks in the group if
1141  // any of the tasks are killed while pending.
1142  std::vector<TaskGroupInfo> pendingTaskGroups;
1143 
1144  // Current running executors.
1146 
1147  circular_buffer<process::Owned<Executor>> completedExecutors;
1148 
1149 private:
1150  Framework(const Framework&) = delete;
1151  Framework& operator=(const Framework&) = delete;
1152 };
1153 
1154 
1156 {
1158  const ResourceProviderInfo& _info,
1159  const Resources& _totalResources,
1160  const Option<UUID>& _resourceVersion)
1161  : info(_info),
1162  totalResources(_totalResources),
1163  resourceVersion(_resourceVersion) {}
1164 
1165  void addOperation(Operation* operation);
1166  void removeOperation(Operation* operation);
1167 
1168  ResourceProviderInfo info;
1170 
1171  // Used to establish the relationship between the operation and the
1172  // resources that the operation is operating on. Each resource
1173  // provider will keep a resource version UUID, and change it when it
1174  // believes that the resources from this resource provider are out
1175  // of sync from the master's view. The master will keep track of
1176  // the last known resource version UUID for each resource provider,
1177  // and attach the resource version UUID in each operation it sends
1178  // out. The resource provider should reject operations that have a
1179  // different resource version UUID than that it maintains, because
1180  // this means the operation is operating on resources that might
1181  // have already been invalidated.
1183 
1184  // Pending operations or terminal operations that have
1185  // unacknowledged status updates.
1187 };
1188 
1189 
1203 std::map<std::string, std::string> executorEnvironment(
1204  const Flags& flags,
1205  const ExecutorInfo& executorInfo,
1206  const std::string& directory,
1207  const SlaveID& slaveId,
1208  const process::PID<Slave>& slavePid,
1209  const Option<Secret>& authenticationToken,
1210  bool checkpoint);
1211 
1212 
1213 std::ostream& operator<<(std::ostream& stream, Executor::State state);
1214 std::ostream& operator<<(std::ostream& stream, Framework::State state);
1215 std::ostream& operator<<(std::ostream& stream, Slave::State state);
1216 
1217 } // namespace slave {
1218 } // namespace internal {
1219 } // namespace mesos {
1220 
1221 #endif // __SLAVE_HPP__
void schedulerMessage(const SlaveID &slaveId, const FrameworkID &frameworkId, const ExecutorID &executorId, const std::string &data)
struct mesos::internal::slave::Slave::RecoveryInfo recoveryInfo
Definition: path.hpp:26
virtual process::Future< ResourceUsage > usage()
void executorMessage(const SlaveID &slaveId, const FrameworkID &frameworkId, const ExecutorID &executorId, const std::string &data)
circular_buffer< process::Owned< Executor > > completedExecutors
Definition: slave.hpp:1147
void subscribe(StreamingHttpConnection< v1::executor::Event > http, const executor::Call::Subscribe &subscribe, Framework *framework, Executor *executor)
void sendOperationStatusUpdate(const UpdateOperationStatusMessage &update)
Try< uid_t > uid(const std::string &path, const FollowSymlink follow=FollowSymlink::FOLLOW_SYMLINK)
Definition: stat.hpp:215
Definition: nothing.hpp:16
Definition: http.hpp:42
Definition: option.hpp:28
Try< bool > update(const std::string &link, const Handle &parent, uint16_t protocol, const action::Mirror &mirror)
State
Definition: slave.hpp:948
circular_buffer< std::shared_ptr< Task > > completedTasks
Definition: slave.hpp:1019
process::Owned< ResponseHeartbeater< executor::Event, v1::executor::Event > > heartbeater
Definition: slave.hpp:991
Try< Nothing > checkpoint(const std::string &path, const std::string &message, bool sync)
Definition: state.hpp:123
void _reregisterExecutor(const process::Future< Nothing > &future, const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId)
Duration age(double usage)
Definition: master.hpp:27
const Option< std::string > user
Definition: slave.hpp:973
std::ostream & operator<<(std::ostream &stream, const MesosContainerizerProcess::State &state)
const ExecutorInfo info
Definition: slave.hpp:962
virtual void removeFramework(Framework *framework)
Definition: check.hpp:33
void send(const Message &message)
Definition: slave.hpp:915
virtual void shutdownExecutor(const process::UPID &from, const FrameworkID &frameworkId, const ExecutorID &executorId)
void detachTaskVolumeDirectories(const ExecutorInfo &executorInfo, const ContainerID &executorContainerId, const std::vector< Task > &tasks)
void run(const FrameworkInfo &frameworkInfo, ExecutorInfo executorInfo, Option< TaskInfo > task, Option< TaskGroupInfo > taskGroup, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const process::UPID &pid, const Option< bool > &launchExecutor)
void _checkDiskUsage(const process::Future< double > &usage)
Framework * getFramework(const FrameworkID &frameworkId) const
Definition: hashset.hpp:53
virtual void ___run(const process::Future< Nothing > &future, const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId, const std::vector< TaskInfo > &tasks, const std::vector< TaskGroupInfo > &taskGroups)
Definition: protobuf_utils.hpp:263
void _statusUpdateAcknowledgement(const process::Future< bool > &future, const TaskID &taskId, const FrameworkID &frameworkId, const UUID &uuid)
ResourceProvider(const ResourceProviderInfo &_info, const Resources &_totalResources, const Option< UUID > &_resourceVersion)
Definition: slave.hpp:1157
void removeExecutor(Framework *framework, Executor *executor)
process::Future< Nothing > _recoverContainerizer(const Option< state::SlaveState > &state)
Result< std::string > user(Option< uid_t > uid=None())
Definition: su.hpp:284
hashmap< ExecutorID, process::Sequence > taskLaunchSequences
Definition: slave.hpp:1137
Result< ProcessStatus > status(pid_t pid)
Definition: proc.hpp:166
Option< UUID > resourceVersion
Definition: slave.hpp:1182
Option< StreamingHttpConnection< v1::executor::Event > > http
Definition: slave.hpp:989
Definition: resources.hpp:83
friend class Executor
Definition: slave.hpp:595
void handleRunTaskGroupMessage(const process::UPID &from, RunTaskGroupMessage &&runTaskGroupMessage)
virtual void runTaskGroup(const process::UPID &from, const FrameworkInfo &frameworkInfo, const ExecutorInfo &executorInfo, const TaskGroupInfo &taskGroupInfo, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const Option< bool > &launchExecutor)
bool reconnect
Definition: slave.hpp:422
virtual void executorTerminated(const FrameworkID &frameworkId, const ExecutorID &executorId, const process::Future< Option< mesos::slave::ContainerTermination >> &termination)
Option< process::UPID > pid
Definition: slave.hpp:1114
Definition: files.hpp:73
hashmap< ExecutorID, hashmap< TaskID, TaskInfo > > pendingTasks
Definition: slave.hpp:1124
enum mesos::internal::slave::Slave::State state
Operation
Definition: cgroups.hpp:458
void _checkImageDiskUsage(const process::Future< double > &usage)
void reregisterExecutor(const process::UPID &from, const FrameworkID &frameworkId, const ExecutorID &executorId, const std::vector< TaskInfo > &tasks, const std::vector< StatusUpdate > &updates)
Definition: flags.hpp:39
Slave * slave
Definition: slave.hpp:1103
Definition: duration.hpp:32
Option< mesos::slave::ContainerTermination > pendingTermination
Definition: slave.hpp:1026
std::map< std::string, std::string > executorEnvironment(const Flags &flags, const ExecutorInfo &executorInfo, const std::string &directory, const SlaveID &slaveId, const process::PID< Slave > &slavePid, const Option< Secret > &authenticationToken, bool checkpoint)
Returns a map of environment variables necessary in order to launch an executor.
void recoverFramework(const state::FrameworkState &state, const hashset< ExecutorID > &executorsToRecheckpoint, const hashmap< ExecutorID, hashset< TaskID >> &tasksToRecheckpoint)
Definition: protobuf_utils.hpp:407
void send(const process::UPID &to, const google::protobuf::Message &message)
Definition: protobuf.hpp:118
bool isSome() const
Definition: option.hpp:115
Definition: task_status_update_manager.hpp:58
LinkedHashMap< TaskID, TaskInfo > queuedTasks
Definition: slave.hpp:1002
void statusUpdateAcknowledgement(const process::UPID &from, const SlaveID &slaveId, const FrameworkID &frameworkId, const TaskID &taskId, const std::string &uuid)
const bool checkpoint
Definition: slave.hpp:975
void signaled(int signal, int uid)
virtual void _shutdownExecutor(Framework *framework, Executor *executor)
void checkpointResourcesMessage(const std::vector< Resource > &checkpointedResources)
Definition: hashmap.hpp:38
process::Future< Nothing > garbageCollect(const std::string &path)
void finalize() override
Invoked when a process is terminated.
void operationStatusAcknowledgement(const process::UPID &from, const AcknowledgeOperationStatusMessage &acknowledgement)
Definition: resource_estimator.hpp:37
process::Future< Secret > generateSecret(const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId)
hashmap< std::string, MessageHandler > message
Definition: process.hpp:451
Resources allocatedResources(const Resources &resources, const std::string &role)
Definition: containerizer.hpp:59
void shutdownExecutorTimeout(const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId)
void attachTaskVolumeDirectory(const ExecutorInfo &executorInfo, const ContainerID &executorContainerId, const Task &task)
Definition: slave.hpp:117
void registered(const process::UPID &from, const SlaveID &slaveId, const MasterSlaveConnection &connection)
An "untyped" PID, used to encapsulate the process ID for lower-layer abstractions (eg...
Definition: pid.hpp:39
void pingTimeout(process::Future< Option< MasterInfo >> future)
void handleRunTaskMessage(const process::UPID &from, RunTaskMessage &&runTaskMessage)
const ContainerID containerId
Definition: slave.hpp:966
Option< process::UPID > pid
Definition: slave.hpp:993
bool send(const Message &message)
Definition: http.hpp:162
ExecutorInfo getExecutorInfo(const FrameworkInfo &frameworkInfo, const TaskInfo &task) const
void forward(StatusUpdate update)
Definition: protobuf_utils.hpp:478
void reconcileOperations(const ReconcileOperationsMessage &message)
Definition: spec.hpp:26
const FrameworkID id() const
Definition: slave.hpp:1059
void launchExecutor(const Option< process::Future< Secret >> &future, const FrameworkID &frameworkId, const ExecutorID &executorId, const Option< TaskInfo > &taskInfo)
void initialize() override
Invoked when a process gets spawned.
const T & get() const &
Definition: option.hpp:118
protobuf::framework::Capabilities capabilities
Definition: slave.hpp:1107
process::Future< Nothing > _recover()
virtual process::Future< Nothing > _run(const FrameworkInfo &frameworkInfo, const ExecutorInfo &executorInfo, const Option< TaskInfo > &task, const Option< TaskGroupInfo > &taskGroup, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const Option< bool > &launchExecutor)
void exited(const process::UPID &pid) override
Invoked when a linked process has exited.
ResourceProviderInfo info
Definition: slave.hpp:1168
The SecretGenerator interface represents a mechanism to create a secret from a principal.
Definition: secret_generator.hpp:34
Definition: protobuf.hpp:100
const ExecutorID id
Definition: slave.hpp:961
State
Definition: slave.hpp:1094
void executorLaunched(const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId, const process::Future< Containerizer::LaunchResult > &future)
Definition: time.hpp:23
void reregistered(const process::UPID &from, const SlaveID &slaveId, const std::vector< ReconcileTasksMessage > &reconciliations, const MasterSlaveConnection &connection)
void fileAttached(const process::Future< Nothing > &result, const std::string &path, const std::string &virtualPath)
std::vector< TaskGroupInfo > pendingTaskGroups
Definition: slave.hpp:1142
Try< std::vector< Entry > > list(const std::string &hierarchy, const std::string &cgroup)
void _statusUpdate(StatusUpdate update, const Option< process::UPID > &pid, const ExecutorID &executorId, const Option< process::Future< ContainerStatus >> &containerStatus)
Definition: boundedhashmap.hpp:27
const FrameworkID frameworkId
Definition: slave.hpp:964
void detected(const process::Future< Option< MasterInfo >> &_master)
#define flags
Definition: decoder.hpp:18
A "process identifier" used to uniquely identify a process when dispatching messages.
Definition: pid.hpp:279
hashmap< ExecutorID, Executor * > executors
Definition: slave.hpp:1145
Definition: none.hpp:27
Definition: attributes.hpp:24
Slave * slave
Definition: slave.hpp:959
LinkedHashMap< TaskID, Task * > launchedTasks
Definition: slave.hpp:1010
process::Future< Nothing > recover(const Try< state::State > &state)
void ___statusUpdate(const process::Future< Nothing > &future, const StatusUpdate &update, const Option< process::UPID > &pid)
Definition: timer.hpp:30
void shutdownFramework(const process::UPID &from, const FrameworkID &frameworkId)
void applyOperation(const ApplyOperationMessage &message)
Nothing detachFile(const std::string &path)
virtual void killTask(const process::UPID &from, const KillTaskMessage &killTaskMessage)
friend class Framework
Definition: slave.hpp:596
virtual void __recover(const process::Future< Nothing > &future)
Slave(const std::string &id, const Flags &flags, mesos::master::detector::MasterDetector *detector, Containerizer *containerizer, Files *files, GarbageCollector *gc, TaskStatusUpdateManager *taskStatusUpdateManager, mesos::slave::ResourceEstimator *resourceEstimator, mesos::slave::QoSController *qosController, mesos::SecretGenerator *secretGenerator, const Option< Authorizer * > &authorizer)
Resources totalResources
Definition: slave.hpp:1169
std::vector< TaskGroupInfo > queuedTaskGroups
Definition: slave.hpp:1007
An abstraction of a Master detector which can be used to detect the leading master from a group...
Definition: detector.hpp:38
static Time now()
The current clock time for either the current process that makes this call or the global clock time i...
hashmap< UUID, Operation * > operations
Definition: slave.hpp:1186
virtual void __run(const FrameworkInfo &frameworkInfo, const ExecutorInfo &executorInfo, const Option< TaskInfo > &task, const Option< TaskGroupInfo > &taskGroup, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const Option< bool > &launchExecutor)
void ping(const process::UPID &from, bool connected)
Definition: metrics.hpp:32
void checkpointResources(std::vector< Resource > checkpointedResources, bool changeTotal)
void __statusUpdate(const Option< process::Future< Nothing >> &future, const StatusUpdate &update, const Option< process::UPID > &pid, const ExecutorID &executorId, const ContainerID &containerId, bool checkpoint)
void doReliableRegistration(Duration maxBackoff)
process::Promise< Nothing > recovered
Definition: slave.hpp:417
Definition: qos_controller.hpp:44
const std::string directory
Definition: slave.hpp:968
void statusUpdate(StatusUpdate update, const Option< process::UPID > &pid)
URI http(const std::string &host, const std::string &path="/", const Option< int > &port=None(), const Option< std::string > &query=None(), const Option< std::string > &fragment=None(), const Option< std::string > &user=None(), const Option< std::string > &password=None())
Creates an http URI with the given parameters.
Definition: http.hpp:35
State
Definition: slave.hpp:404
virtual void authenticate(Duration minTimeout, Duration maxTimeout)
virtual void runTask(const process::UPID &from, const FrameworkInfo &frameworkInfo, const FrameworkID &frameworkId, const process::UPID &pid, const TaskInfo &task, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const Option< bool > &launchExecutor)
FrameworkInfo info
Definition: slave.hpp:1105
Definition: owned.hpp:36
void shutdown(const process::UPID &from, const std::string &message)
Definition: slave.hpp:857
virtual void _qosCorrections(const process::Future< std::list< mesos::slave::QoSCorrection >> &correction)
Executor * getExecutor(const FrameworkID &frameworkId, const ExecutorID &executorId) const
Definition: parse.hpp:33
void registerExecutor(const process::UPID &from, const FrameworkID &frameworkId, const ExecutorID &executorId)
PID< MetricsProcess > metrics
void updateFramework(const UpdateFrameworkMessage &message)
LinkedHashMap< TaskID, Task * > terminatedTasks
Definition: slave.hpp:1013
constexpr const char * name
Definition: shell.hpp:43
void registerExecutorTimeout(const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId)
Definition: slave.hpp:1037
Definition: authenticatee.hpp:29