Apache Mesos
slave.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __SLAVE_HPP__
18 #define __SLAVE_HPP__
19 
20 #include <stdint.h>
21 
22 #include <list>
23 #include <memory>
24 #include <string>
25 #include <vector>
26 
27 #include <boost/circular_buffer.hpp>
28 
29 #include <mesos/attributes.hpp>
30 #include <mesos/resources.hpp>
31 #include <mesos/type_utils.hpp>
32 
33 #include <mesos/agent/agent.hpp>
34 
36 
38 
40 
42 
46 
48 
49 #include <process/http.hpp>
50 #include <process/future.hpp>
51 #include <process/owned.hpp>
52 #include <process/limiter.hpp>
53 #include <process/process.hpp>
54 #include <process/protobuf.hpp>
55 #include <process/shared.hpp>
56 #include <process/sequence.hpp>
57 
58 #include <stout/boundedhashmap.hpp>
59 #include <stout/bytes.hpp>
60 #include <stout/linkedhashmap.hpp>
61 #include <stout/hashmap.hpp>
62 #include <stout/hashset.hpp>
63 #include <stout/option.hpp>
64 #include <stout/os.hpp>
65 #include <stout/path.hpp>
66 #include <stout/recordio.hpp>
67 #include <stout/uuid.hpp>
68 
69 #include "common/http.hpp"
71 #include "common/recordio.hpp"
72 
73 #include "files/files.hpp"
74 
75 #include "internal/evolve.hpp"
76 
77 #include "messages/messages.hpp"
78 
81 
82 #include "slave/constants.hpp"
84 #include "slave/flags.hpp"
85 #include "slave/gc.hpp"
86 #include "slave/http.hpp"
87 #include "slave/metrics.hpp"
88 #include "slave/paths.hpp"
89 #include "slave/state.hpp"
90 
91 // `REGISTERING` is used as an enum value, but it's actually defined as a
92 // constant in the Windows SDK.
93 #ifdef __WINDOWS__
94 #undef REGISTERING
95 #undef REGISTERED
96 #endif // __WINDOWS__
97 
98 namespace mesos {
99 
100 // Forward declarations.
101 class Authorizer;
102 class DiskProfileAdaptor;
103 
104 namespace internal {
105 namespace slave {
106 
107 // Some forward declarations.
108 class TaskStatusUpdateManager;
109 class Executor;
110 class Framework;
111 
112 struct HttpConnection;
113 struct ResourceProvider;
114 
115 
116 class Slave : public ProtobufProcess<Slave>
117 {
118 public:
119  Slave(const std::string& id,
120  const Flags& flags,
122  Containerizer* containerizer,
123  Files* files,
124  GarbageCollector* gc,
125  TaskStatusUpdateManager* taskStatusUpdateManager,
126  mesos::slave::ResourceEstimator* resourceEstimator,
127  mesos::slave::QoSController* qosController,
128  mesos::SecretGenerator* secretGenerator,
129  const Option<Authorizer*>& authorizer);
130 
131  virtual ~Slave();
132 
133  void shutdown(const process::UPID& from, const std::string& message);
134 
135  void registered(
136  const process::UPID& from,
137  const SlaveID& slaveId,
138  const MasterSlaveConnection& connection);
139 
140  void reregistered(
141  const process::UPID& from,
142  const SlaveID& slaveId,
143  const std::vector<ReconcileTasksMessage>& reconciliations,
144  const MasterSlaveConnection& connection);
145 
146  void doReliableRegistration(Duration maxBackoff);
147 
148  // TODO(mzhu): Combine this with `runTask()' and replace all `runTask()'
149  // mock with `run()` mock.
151  const process::UPID& from,
152  RunTaskMessage&& runTaskMessage);
153 
154  // Made 'virtual' for Slave mocking.
155  virtual void runTask(
156  const process::UPID& from,
157  const FrameworkInfo& frameworkInfo,
158  const FrameworkID& frameworkId,
159  const process::UPID& pid,
160  const TaskInfo& task,
161  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
163 
164  void run(
165  const FrameworkInfo& frameworkInfo,
166  ExecutorInfo executorInfo,
167  Option<TaskInfo> task,
168  Option<TaskGroupInfo> taskGroup,
169  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
170  const process::UPID& pid,
171  const Option<bool>& launchExecutor);
172 
173  // Made 'virtual' for Slave mocking.
174  //
175  // This function returns a future so that we can encapsulate a task(group)
176  // launch operation (from agent receiving the run message to the completion
177  // of `_run()`) into a single future. This includes all the asynchronous
178  // steps (currently two: unschedule GC and task authorization) prior to the
179  // executor launch.
181  const FrameworkInfo& frameworkInfo,
182  const ExecutorInfo& executorInfo,
183  const Option<TaskInfo>& task,
184  const Option<TaskGroupInfo>& taskGroup,
185  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
186  const Option<bool>& launchExecutor);
187 
188  // Made 'virtual' for Slave mocking.
189  virtual void __run(
190  const FrameworkInfo& frameworkInfo,
191  const ExecutorInfo& executorInfo,
192  const Option<TaskInfo>& task,
193  const Option<TaskGroupInfo>& taskGroup,
194  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
195  const Option<bool>& launchExecutor);
196 
197  // This is called when the resource limits of the container have
198  // been updated for the given tasks and task groups. If the update is
199  // successful, we flush the given tasks to the executor by sending
200  // RunTaskMessages or `LAUNCH_GROUP` events.
201  //
202  // Made 'virtual' for Slave mocking.
203  virtual void ___run(
204  const process::Future<Nothing>& future,
205  const FrameworkID& frameworkId,
206  const ExecutorID& executorId,
207  const ContainerID& containerId,
208  const std::vector<TaskInfo>& tasks,
209  const std::vector<TaskGroupInfo>& taskGroups);
210 
211  // TODO(mzhu): Combine this with `runTaskGroup()' and replace all
212  // `runTaskGroup()' mock with `run()` mock.
214  const process::UPID& from,
215  RunTaskGroupMessage&& runTaskGroupMessage);
216 
217  // Made 'virtual' for Slave mocking.
218  virtual void runTaskGroup(
219  const process::UPID& from,
220  const FrameworkInfo& frameworkInfo,
221  const ExecutorInfo& executorInfo,
222  const TaskGroupInfo& taskGroupInfo,
223  const std::vector<ResourceVersionUUID>& resourceVersionUuids,
224  const Option<bool>& launchExecutor);
225 
226  // Made 'virtual' for Slave mocking.
227  virtual void killTask(
228  const process::UPID& from,
229  const KillTaskMessage& killTaskMessage);
230 
231  // Made 'virtual' for Slave mocking.
232  virtual void shutdownExecutor(
233  const process::UPID& from,
234  const FrameworkID& frameworkId,
235  const ExecutorID& executorId);
236 
237  // Shut down an executor. This is a two phase process. First, an
238  // executor receives a shut down message (shut down phase), then
239  // after a configurable timeout the slave actually forces a kill
240  // (kill phase, via the isolator) if the executor has not
241  // exited.
242  //
243  // Made 'virtual' for Slave mocking.
244  virtual void _shutdownExecutor(Framework* framework, Executor* executor);
245 
246  void shutdownFramework(
247  const process::UPID& from,
248  const FrameworkID& frameworkId);
249 
250  void schedulerMessage(
251  const SlaveID& slaveId,
252  const FrameworkID& frameworkId,
253  const ExecutorID& executorId,
254  const std::string& data);
255 
256  void updateFramework(
257  const UpdateFrameworkMessage& message);
258 
259  void checkpointResources(
260  std::vector<Resource> checkpointedResources,
261  bool changeTotal);
262 
264  const std::vector<Resource>& checkpointedResources);
265 
266  void applyOperation(const ApplyOperationMessage& message);
267 
268  // Reconciles pending operations with the master. This is necessary to handle
269  // cases in which operations were dropped in transit, or in which an agent's
270  // `UpdateSlaveMessage` was sent at the same time as an operation was en route
271  // from the master to the agent.
272  void reconcileOperations(const ReconcileOperationsMessage& message);
273 
274  void subscribe(
275  HttpConnection http,
276  const executor::Call::Subscribe& subscribe,
277  Framework* framework,
278  Executor* executor);
279 
280  void registerExecutor(
281  const process::UPID& from,
282  const FrameworkID& frameworkId,
283  const ExecutorID& executorId);
284 
285  // Called when an executor reregisters with a recovering slave.
286  // 'tasks' : Unacknowledged tasks (i.e., tasks that the executor
287  // driver never received an ACK for.)
288  // 'updates' : Unacknowledged updates.
289  void reregisterExecutor(
290  const process::UPID& from,
291  const FrameworkID& frameworkId,
292  const ExecutorID& executorId,
293  const std::vector<TaskInfo>& tasks,
294  const std::vector<StatusUpdate>& updates);
295 
296  void _reregisterExecutor(
297  const process::Future<Nothing>& future,
298  const FrameworkID& frameworkId,
299  const ExecutorID& executorId,
300  const ContainerID& containerId);
301 
302  void executorMessage(
303  const SlaveID& slaveId,
304  const FrameworkID& frameworkId,
305  const ExecutorID& executorId,
306  const std::string& data);
307 
308  void ping(const process::UPID& from, bool connected);
309 
310  // Handles the task status update.
311  // NOTE: If 'pid' is a valid UPID an ACK is sent to this pid
312  // after the update is successfully handled. If pid == UPID()
313  // no ACK is sent. The latter is used by the slave to send
314  // status updates it generated (e.g., TASK_LOST). If pid == None()
315  // an ACK is sent to the corresponding HTTP based executor.
316  // NOTE: StatusUpdate is passed by value because it is modified
317  // to ensure source field is set.
318  void statusUpdate(StatusUpdate update, const Option<process::UPID>& pid);
319 
320  // Called when the slave receives a `StatusUpdate` from an executor
321  // and the slave needs to retrieve the container status for the
322  // container associated with the executor.
323  void _statusUpdate(
324  StatusUpdate update,
325  const Option<process::UPID>& pid,
326  const ExecutorID& executorId,
327  const Option<process::Future<ContainerStatus>>& containerStatus);
328 
329  // Continue handling the status update after optionally updating the
330  // container's resources.
331  void __statusUpdate(
332  const Option<process::Future<Nothing>>& future,
333  const StatusUpdate& update,
334  const Option<process::UPID>& pid,
335  const ExecutorID& executorId,
336  const ContainerID& containerId,
337  bool checkpoint);
338 
339  // This is called when the task status update manager finishes
340  // handling the update. If the handling is successful, an
341  // acknowledgment is sent to the executor.
342  void ___statusUpdate(
343  const process::Future<Nothing>& future,
344  const StatusUpdate& update,
345  const Option<process::UPID>& pid);
346 
347  // This is called by task status update manager to forward a status
348  // update to the master. Note that the latest state of the task is
349  // added to the update before forwarding.
350  void forward(StatusUpdate update);
351 
353  const process::UPID& from,
354  const SlaveID& slaveId,
355  const FrameworkID& frameworkId,
356  const TaskID& taskId,
357  const std::string& uuid);
358 
360  const process::Future<bool>& future,
361  const TaskID& taskId,
362  const FrameworkID& frameworkId,
363  const UUID& uuid);
364 
366  const process::UPID& from,
367  const AcknowledgeOperationStatusMessage& acknowledgement);
368 
369  void executorLaunched(
370  const FrameworkID& frameworkId,
371  const ExecutorID& executorId,
372  const ContainerID& containerId,
374 
375  // Made 'virtual' for Slave mocking.
376  virtual void executorTerminated(
377  const FrameworkID& frameworkId,
378  const ExecutorID& executorId,
379  const process::Future<Option<
380  mesos::slave::ContainerTermination>>& termination);
381 
382  // NOTE: Pulled these to public to make it visible for testing.
383  // TODO(vinod): Make tests friends to this class instead.
384 
385  // Garbage collects the directories based on the current disk usage.
386  // TODO(vinod): Instead of making this function public, we need to
387  // mock both GarbageCollector (and pass it through slave's constructor)
388  // and os calls.
390 
391  // Garbage collect image layers based on the disk usage of image
392  // store.
394 
395  // Invoked whenever the detector detects a change in masters.
396  // Made public for testing purposes.
397  void detected(const process::Future<Option<MasterInfo>>& _master);
398 
399  enum State
400  {
401  RECOVERING, // Slave is doing recovery.
402  DISCONNECTED, // Slave is not connected to the master.
403  RUNNING, // Slave has (re-)registered.
404  TERMINATING, // Slave is shutting down.
405  } state;
406 
407  // Describes information about agent recovery.
409  {
410  // Flag to indicate if recovery, including reconciling
411  // (i.e., reconnect/kill) with executors is finished.
413 
414  // Flag to indicate that HTTP based executors can
415  // subscribe with the agent. We allow them to subscribe
416  // after the agent recovers the containerizer.
417  bool reconnect = false;
418  } recoveryInfo;
419 
420  // TODO(benh): Clang requires members to be public in order to take
421  // their address which we do in tests (for things like
422  // FUTURE_DISPATCH).
423 // protected:
424  virtual void initialize();
425  virtual void finalize();
426  virtual void exited(const process::UPID& pid);
427 
429  const FrameworkID& frameworkId,
430  const ExecutorID& executorId,
431  const ContainerID& containerId);
432 
433  // If an executor is launched for a task group, `taskInfo` would not be set.
434  void launchExecutor(
435  const Option<process::Future<Secret>>& future,
436  const FrameworkID& frameworkId,
437  const ExecutorID& executorId,
438  const Option<TaskInfo>& taskInfo);
439 
440  void fileAttached(const process::Future<Nothing>& result,
441  const std::string& path,
442  const std::string& virtualPath);
443 
444  Nothing detachFile(const std::string& path);
445 
446  // TODO(qianzhang): This is a workaround to make the default executor task's
447  // volume directory visible in MESOS UI. It handles two cases:
448  // 1. The task has disk resources specified. In this case any disk resources
449  // specified for the task are mounted on the top level container since
450  // currently all resources of nested containers are merged in the top
451  // level executor container. To make sure the task can access any volumes
452  // specified in its disk resources from its sandbox, a workaround was
453  // introduced to the default executor in MESOS-7225, i.e., adding a
454  // `SANDBOX_PATH` volume with type `PARENT` to the corresponding nested
455  // container. This volume gets translated into a bind mount in the nested
456  // container's mount namespace, which is not visible in Mesos UI because
457  // it operates in the host namespace. See MESOS-8279 for details.
458  // 2. The executor has disk resources specified and the task's ContainerInfo
459  // has a `SANDBOX_PATH` volume with type `PARENT` specified to share the
460  // executor's disk volume. Similar to the first case, this `SANDBOX_PATH`
461  // volume gets translated into a bind mount which is not visible in Mesos
462  // UI. See MESOS-8565 for details.
463  //
464  // To make the task's volume directory visible in Mesos UI, here we attach the
465  // executor's volume directory to it so that it can be accessed via the /files
466  // endpoint. So when users browse task's volume directory in Mesos UI, what
467  // they actually browse is the executor's volume directory. Note when calling
468  // `Files::attach()`, the third argument `authorized` is not specified because
469  // it is already specified when we do the attach for the executor's sandbox
470  // and it also applies to the executor's tasks. Note that for the second case
471  // we can not do the attach when the task's ContainerInfo has a `SANDBOX_PATH`
472  // volume with type `PARENT` but the executor has NO disk resources, because
473  // in such case the attach will fail due to the executor's volume directory
474  // not existing which will actually be created by the `volume/sandbox_path`
475  // isolator when launching the nested container. But if the executor has disk
476  // resources, then we will not have this issue since the executor's volume
477  // directory will be created by the `filesystem/linux` isolator when launching
478  // the executor before we do the attach.
480  const ExecutorInfo& executorInfo,
481  const ContainerID& executorContainerId,
482  const Task& task);
483 
484  // TODO(qianzhang): Remove the task's volume directory from the /files
485  // endpoint. This is a workaround for MESOS-8279 and MESOS-8565.
487  const ExecutorInfo& executorInfo,
488  const ContainerID& executorContainerId,
489  const std::vector<Task>& tasks);
490 
491  // Triggers a re-detection of the master when the slave does
492  // not receive a ping.
494 
495  void authenticate();
496 
497  // Helper routines to lookup a framework/executor.
498  Framework* getFramework(const FrameworkID& frameworkId) const;
499 
501  const FrameworkID& frameworkId,
502  const ExecutorID& executorId) const;
503 
504  Executor* getExecutor(const ContainerID& containerId) const;
505 
506  // Returns the ExecutorInfo associated with a TaskInfo. If the task has no
507  // ExecutorInfo, then we generate an ExecutorInfo corresponding to the
508  // command executor.
509  ExecutorInfo getExecutorInfo(
510  const FrameworkInfo& frameworkInfo,
511  const TaskInfo& task) const;
512 
513  // Shuts down the executor if it did not register yet.
515  const FrameworkID& frameworkId,
516  const ExecutorID& executorId,
517  const ContainerID& containerId);
518 
519  // Cleans up all un-reregistered executors during recovery.
521 
522  // This function returns the max age of executor/slave directories allowed,
523  // given a disk usage. This value could be used to tune gc.
524  Duration age(double usage);
525 
526  // Checks the current disk usage and schedules for gc as necessary.
527  void checkDiskUsage();
528 
529  // Checks the current container image disk usage and trigger image
530  // gc if necessary.
531  void checkImageDiskUsage();
532 
533  // Recovers the slave, task status update manager and isolator.
535 
536  // This is called after 'recover()'. If 'flags.reconnect' is
537  // 'reconnect', the slave attempts to reconnect to any old live
538  // executors. Otherwise, the slave attempts to shutdown/kill them.
540 
541  // This is a helper to call recover() on the containerizer at the end of
542  // recover() and before __recover().
543  // TODO(idownes): Remove this when we support defers to objects.
545  const Option<state::SlaveState>& state);
546 
547  // This is called when recovery finishes.
548  // Made 'virtual' for Slave mocking.
549  virtual void __recover(const process::Future<Nothing>& future);
550 
551  // Helper to recover a framework from the specified state.
552  void recoverFramework(
553  const state::FrameworkState& state,
554  const hashset<ExecutorID>& executorsToRecheckpoint,
555  const hashmap<ExecutorID, hashset<TaskID>>& tasksToRecheckpoint);
556 
557  // Removes and garbage collects the executor.
558  void removeExecutor(Framework* framework, Executor* executor);
559 
560  // Removes and garbage collects the framework.
561  // Made 'virtual' for Slave mocking.
562  virtual void removeFramework(Framework* framework);
563 
564  // Schedules a 'path' for gc based on its modification time.
565  process::Future<Nothing> garbageCollect(const std::string& path);
566 
567  // Called when the slave was signaled from the specified user.
568  void signaled(int signal, int uid);
569 
570  // Made 'virtual' for Slave mocking.
571  virtual void qosCorrections();
572 
573  // Made 'virtual' for Slave mocking.
574  virtual void _qosCorrections(
576  mesos::slave::QoSCorrection>>& correction);
577 
578  // Returns the resource usage information for all executors.
580 
581  // Handle the second phase of shutting down an executor for those
582  // executors that have not properly shutdown within a timeout.
584  const FrameworkID& frameworkId,
585  const ExecutorID& executorId,
586  const ContainerID& containerId);
587 
588 private:
589  friend class Executor;
590  friend class Framework;
591  friend class Http;
592 
593  friend struct Metrics;
594 
595  Slave(const Slave&) = delete;
596  Slave& operator=(const Slave&) = delete;
597 
598  void _authenticate();
599  void authenticationTimeout(process::Future<bool> future);
600 
601  // Process creation of persistent volumes (for CREATE) and/or deletion
602  // of persistent volumes (for DESTROY) as a part of handling
603  // checkpointed resources, and commit the checkpointed resources on
604  // successful completion of all the operations.
605  Try<Nothing> syncCheckpointedResources(
606  const Resources& newCheckpointedResources);
607 
608  process::Future<bool> authorizeTask(
609  const TaskInfo& task,
610  const FrameworkInfo& frameworkInfo);
611 
612  process::Future<bool> authorizeLogAccess(
614 
615  process::Future<bool> authorizeSandboxAccess(
617  const FrameworkID& frameworkId,
618  const ExecutorID& executorId);
619 
620  void sendExecutorTerminatedStatusUpdate(
621  const TaskID& taskId,
622  const process::Future<Option<
623  mesos::slave::ContainerTermination>>& termination,
624  const FrameworkID& frameworkId,
625  const Executor* executor);
626 
627  void sendExitedExecutorMessage(
628  const FrameworkID& frameworkId,
629  const ExecutorID& executorId,
630  const Option<int>& status = None());
631 
632  // Forwards the current total of oversubscribed resources.
633  void forwardOversubscribed();
634  void _forwardOversubscribed(
635  const process::Future<Resources>& oversubscribable);
636 
637  // Helper functions to generate `UpdateSlaveMessage` for either
638  // just updates to resource provider-related information, or both
639  // resource provider-related information and oversubscribed
640  // resources.
641  UpdateSlaveMessage generateResourceProviderUpdate() const;
642  UpdateSlaveMessage generateUpdateSlaveMessage() const;
643 
644  void handleResourceProviderMessage(
646 
647  void addOperation(Operation* operation);
648 
649  // Transitions the operation, and recovers resource if the operation becomes
650  // terminal.
651  void updateOperation(
652  Operation* operation,
653  const UpdateOperationStatusMessage& update);
654 
655  // Update the `latest_status` of the operation if it is not terminal.
656  void updateOperationLatestStatus(
657  Operation* operation,
658  const OperationStatus& status);
659 
660  void removeOperation(Operation* operation);
661 
662  Operation* getOperation(const UUID& uuid) const;
663 
664  void addResourceProvider(ResourceProvider* resourceProvider);
665  ResourceProvider* getResourceProvider(const ResourceProviderID& id) const;
666 
667  void apply(Operation* operation);
668 
669  // Publish all resources that are needed to run the current set of
670  // tasks and executors on the agent.
671  // NOTE: The `additionalResources` parameter is for publishing
672  // additional task resources when launching executors. Consider
673  // removing this parameter once we revisited MESOS-600.
674  process::Future<Nothing> publishResources(
675  const Option<Resources>& additionalResources = None());
676 
677  // PullGauge methods.
678  double _frameworks_active()
679  {
680  return static_cast<double>(frameworks.size());
681  }
682 
683  double _uptime_secs()
684  {
685  return (process::Clock::now() - startTime).secs();
686  }
687 
688  double _registered()
689  {
690  return master.isSome() ? 1 : 0;
691  }
692 
693  double _tasks_staging();
694  double _tasks_starting();
695  double _tasks_running();
696  double _tasks_killing();
697 
698  double _executors_registering();
699  double _executors_running();
700  double _executors_terminating();
701 
702  double _executor_directory_max_allowed_age_secs();
703 
704  double _resources_total(const std::string& name);
705  double _resources_used(const std::string& name);
706  double _resources_percent(const std::string& name);
707 
708  double _resources_revocable_total(const std::string& name);
709  double _resources_revocable_used(const std::string& name);
710  double _resources_revocable_percent(const std::string& name);
711 
712  // Checks whether the two `SlaveInfo` objects are considered
713  // compatible based on the value of the `--configuration_policy`
714  // flag.
715  Try<Nothing> compatible(
716  const SlaveInfo& previous,
717  const SlaveInfo& current) const;
718 
719  void initializeResourceProviderManager(
720  const Flags& flags,
721  const SlaveID& slaveId);
722 
723  protobuf::master::Capabilities requiredMasterCapabilities;
724 
725  const Flags flags;
726 
727  const Http http;
728 
729  SlaveInfo info;
730 
731  protobuf::slave::Capabilities capabilities;
732 
733  // Resources that are checkpointed by the slave.
734  Resources checkpointedResources;
735 
736  // The current total resources of the agent, i.e., `info.resources()` with
737  // checkpointed resources applied and resource provider resources.
738  Resources totalResources;
739 
741 
743 
744  // Note that these frameworks are "completed" only in that
745  // they no longer have any active tasks or executors on this
746  // particular agent.
747  //
748  // TODO(bmahler): Implement a more accurate framework lifecycle
749  // in the agent code, ideally the master can inform the agent
750  // when a framework is actually completed, and the agent can
751  // perhaps store a cache of "idle" frameworks. See MESOS-7890.
753 
755 
756  Containerizer* containerizer;
757 
758  Files* files;
759 
761 
762  process::Time startTime;
763 
764  GarbageCollector* gc;
765 
766  TaskStatusUpdateManager* taskStatusUpdateManager;
767 
768  // Master detection future.
770 
771  // Master's ping timeout value, updated on reregistration.
772  Duration masterPingTimeout;
773 
774  // Timer for triggering re-detection when no ping is received from
775  // the master.
776  process::Timer pingTimer;
777 
778  // Timer for triggering agent (re)registration after detecting a new master.
779  process::Timer agentRegistrationTimer;
780 
781  // Root meta directory containing checkpointed data.
782  const std::string metaDir;
783 
784  // Indicates the number of errors ignored in "--no-strict" recovery mode.
785  unsigned int recoveryErrors;
786 
787  Option<Credential> credential;
788 
789  // Authenticatee name as supplied via flags.
790  std::string authenticateeName;
791 
792  Authenticatee* authenticatee;
793 
794  // Indicates if an authentication attempt is in progress.
795  Option<process::Future<bool>> authenticating;
796 
797  // Indicates if the authentication is successful.
798  bool authenticated;
799 
800  // Indicates if a new authentication attempt should be enforced.
801  bool reauthenticate;
802 
803  // Indicates the number of failed authentication attempts.
804  uint64_t failedAuthentications;
805 
806  // Maximum age of executor directories. Will be recomputed
807  // periodically every flags.disk_watch_interval.
808  Duration executorDirectoryMaxAllowedAge;
809 
810  mesos::slave::ResourceEstimator* resourceEstimator;
811 
812  mesos::slave::QoSController* qosController;
813 
814  std::shared_ptr<DiskProfileAdaptor> diskProfileAdaptor;
815 
816  mesos::SecretGenerator* secretGenerator;
817 
818  const Option<Authorizer*> authorizer;
819 
820  // The most recent estimate of the total amount of oversubscribed
821  // (allocated and oversubscribable) resources.
822  Option<Resources> oversubscribedResources;
823 
824  process::Owned<ResourceProviderManager> resourceProviderManager;
825  process::Owned<LocalResourceProviderDaemon> localResourceProviderDaemon;
826 
827  // Local resource providers known by the agent.
829 
830  // Used to establish the relationship between the operation and the
831  // resources that the operation is operating on. Each resource
832  // provider will keep a resource version UUID, and change it when it
833  // believes that the resources from this resource provider are out
834  // of sync from the master's view. The master will keep track of
835  // the last known resource version UUID for each resource provider,
836  // and attach the resource version UUID in each operation it sends
837  // out. The resource provider should reject operations that have a
838  // different resource version UUID than that it maintains, because
839  // this means the operation is operating on resources that might
840  // have already been invalidated.
841  UUID resourceVersion;
842 
843  // Keeps track of the following:
844  // (1) Pending operations for resources from the agent.
845  // (2) Pending operations or terminal operations that have
846  // unacknowledged status updates for resource provider
847  // provided resources.
848  hashmap<UUID, Operation*> operations;
849 };
850 
851 
852 // Represents the streaming HTTP connection to an executor.
854 {
856  ContentType _contentType)
857  : writer(_writer),
858  contentType(_contentType),
859  encoder(lambda::bind(serialize, contentType, lambda::_1)) {}
860 
861  // Converts the message to an Event before sending.
862  template <typename Message>
863  bool send(const Message& message)
864  {
865  // We need to evolve the internal 'message' into a
866  // 'v1::executor::Event'.
867  return writer.write(encoder.encode(evolve(message)));
868  }
869 
870  bool close()
871  {
872  return writer.close();
873  }
874 
876  {
877  return writer.readerClosed();
878  }
879 
883 };
884 
885 
886 std::ostream& operator<<(std::ostream& stream, const Executor& executor);
887 
888 
889 // Information describing an executor.
890 class Executor
891 {
892 public:
893  Executor(
894  Slave* slave,
895  const FrameworkID& frameworkId,
896  const ExecutorInfo& info,
897  const ContainerID& containerId,
898  const std::string& directory,
899  const Option<std::string>& user,
900  bool checkpoint);
901 
902  ~Executor();
903 
904  // Note that these tasks will also be tracked within `queuedTasks`.
905  void enqueueTaskGroup(const TaskGroupInfo& taskGroup);
906 
907  void enqueueTask(const TaskInfo& task);
908  Option<TaskInfo> dequeueTask(const TaskID& taskId);
909  Task* addLaunchedTask(const TaskInfo& task);
910  void completeTask(const TaskID& taskId);
911  void checkpointExecutor();
912  void checkpointTask(const TaskInfo& task);
913  void checkpointTask(const Task& task);
914 
915  void recoverTask(const state::TaskState& state, bool recheckpointTask);
916 
917  Try<Nothing> updateTaskState(const TaskStatus& status);
918 
919  // Returns true if there are any queued/launched/terminated tasks.
920  bool incompleteTasks();
921 
922  // Returns true if the agent ever sent any tasks to this executor.
923  // More precisely, this function returns whether either:
924  //
925  // (1) There are terminated/completed tasks with a
926  // SOURCE_EXECUTOR status.
927  //
928  // (2) `launchedTasks` is not empty.
929  //
930  // If this function returns false and there are no queued tasks,
931  // we probably (see TODO below) have killed or dropped all of its
932  // initial tasks, in which case the agent will shut down the executor.
933  //
934  // TODO(mzhu): Note however, that since we look through the cache
935  // of completed tasks, we can have false positives when a task
936  // that was delivered to the executor has been evicted from the
937  // completed task cache by tasks that have been killed by the
938  // agent before delivery. This should be fixed.
939  //
940  // NOTE: This function checks whether any tasks has ever been sent,
941  // this does not necessarily mean the executor has ever received
942  // any tasks. Specifically, tasks in `launchedTasks` may be dropped
943  // before received by the executor if the agent restarts.
944  bool everSentTask() const;
945 
946  // Sends a message to the connected executor.
947  template <typename Message>
948  void send(const Message& message)
949  {
950  if (state == REGISTERING || state == TERMINATED) {
951  LOG(WARNING) << "Attempting to send message to disconnected"
952  << " executor " << *this << " in state " << state;
953  }
954 
955  if (http.isSome()) {
956  if (!http->send(message)) {
957  LOG(WARNING) << "Unable to send event to executor " << *this
958  << ": connection closed";
959  }
960  } else if (pid.isSome()) {
961  slave->send(pid.get(), message);
962  } else {
963  LOG(WARNING) << "Unable to send event to executor " << *this
964  << ": unknown connection type";
965  }
966  }
967 
968  // Returns true if this executor is generated by Mesos for a command
969  // task (either command executor for MesosContainerizer or docker
970  // executor for DockerContainerizer).
971  bool isGeneratedForCommandTask() const;
972 
973  // Closes the HTTP connection.
974  void closeHttpConnection();
975 
976  // Returns the task group associated with the task.
977  Option<TaskGroupInfo> getQueuedTaskGroup(const TaskID& taskId);
978 
980 
981  enum State
982  {
983  REGISTERING, // Executor is launched but not (re-)registered yet.
984  RUNNING, // Executor has (re-)registered.
985  TERMINATING, // Executor is being shutdown/killed.
986  TERMINATED, // Executor has terminated but there might be pending updates.
987  } state;
988 
989  // We store the pointer to 'Slave' to get access to its methods
990  // variables. One could imagine 'Executor' as being an inner class
991  // of the 'Slave' class.
993 
994  const ExecutorID id;
995  const ExecutorInfo info;
996 
997  const FrameworkID frameworkId;
998 
999  const ContainerID containerId;
1000 
1001  const std::string directory;
1002 
1003  // The sandbox will be owned by this user and the executor will
1004  // run as this user. This can be set to None when --switch_user
1005  // is false or when compiled for Windows.
1007 
1008  const bool checkpoint;
1009 
1010  // An Executor can either be connected via HTTP or by libprocess
1011  // message passing. The following are the possible states:
1012  //
1013  // Agent State Executor State http pid Executor Type
1014  // ----------- -------------- ---- ---- -------------
1015  // RECOVERING REGISTERING None UPID() Unknown
1016  // REGISTERING None Some Libprocess
1017  // REGISTERING None None HTTP
1018  //
1019  // * REGISTERING None None Not known yet
1020  // * * None Some Libprocess
1021  // * * Some None HTTP
1024 
1025  // Tasks can be found in one of the following four data structures:
1026  //
1027  // TODO(bmahler): Make these private to enforce that the task
1028  // lifecycle helper functions are not being bypassed, and provide
1029  // public views into them.
1030 
1031  // Not yet launched tasks. This also includes tasks from `queuedTaskGroups`.
1033 
1034  // Not yet launched task groups. This is needed for correctly sending
1035  // TASK_KILLED status updates for all tasks in the group if any of the
1036  // tasks were killed before the executor could register with the agent.
1037  std::vector<TaskGroupInfo> queuedTaskGroups;
1038 
1039  // Running.
1041 
1042  // Terminated but pending updates.
1044 
1045  // Terminated and updates acked.
1046  // NOTE: We use a shared pointer for Task because clang doesn't like
1047  // Boost's implementation of circular_buffer with Task (Boost
1048  // attempts to do some memset's which are unsafe).
1049  boost::circular_buffer<std::shared_ptr<Task>> completedTasks;
1050 
1051  // When the slave initiates a destroy of the container, we expect a
1052  // termination to occur. The 'pendingTermation' indicates why the
1053  // slave initiated the destruction and will influence the
1054  // information sent in the status updates for any remaining
1055  // non-terminal tasks.
1057 
1058 private:
1059  Executor(const Executor&) = delete;
1060  Executor& operator=(const Executor&) = delete;
1061 
1062  bool isGeneratedForCommandTask_;
1063 };
1064 
1065 
1066 // Information about a framework.
1068 {
1069 public:
1070  Framework(
1071  Slave* slave,
1072  const Flags& slaveFlags,
1073  const FrameworkInfo& info,
1074  const Option<process::UPID>& pid);
1075 
1076  ~Framework();
1077 
1078  // Returns whether the framework is idle, where idle is
1079  // defined as having no activity:
1080  // (1) The framework has no non-terminal tasks and executors.
1081  // (2) All status updates have been acknowledged.
1082  //
1083  // TODO(bmahler): The framework should also not be considered
1084  // idle if there are unacknowledged updates for "pending" tasks.
1085  bool idle() const;
1086 
1087  void checkpointFramework() const;
1088 
1089  const FrameworkID id() const { return info.id(); }
1090 
1091  Try<Executor*> addExecutor(const ExecutorInfo& executorInfo);
1092  Executor* getExecutor(const ExecutorID& executorId) const;
1093  Executor* getExecutor(const TaskID& taskId) const;
1094 
1095  void destroyExecutor(const ExecutorID& executorId);
1096 
1097  void recoverExecutor(
1098  const state::ExecutorState& state,
1099  bool recheckpointExecutor,
1100  const hashset<TaskID>& tasksToRecheckpoint);
1101 
1102  void addPendingTask(
1103  const ExecutorID& executorId,
1104  const TaskInfo& task);
1105 
1106  // Note that these tasks will also be tracked within `pendingTasks`.
1107  void addPendingTaskGroup(
1108  const ExecutorID& executorId,
1109  const TaskGroupInfo& taskGroup);
1110 
1111  bool hasTask(const TaskID& taskId) const;
1112  bool isPending(const TaskID& taskId) const;
1113 
1114  // Returns the task group associated with a pending task.
1115  Option<TaskGroupInfo> getTaskGroupForPendingTask(const TaskID& taskId);
1116 
1117  // Returns whether the pending task was removed.
1118  bool removePendingTask(const TaskID& taskId);
1119 
1120  Option<ExecutorID> getExecutorIdForPendingTask(const TaskID& taskId) const;
1121 
1122  Resources allocatedResources() const;
1123 
1124  enum State
1125  {
1126  RUNNING, // First state of a newly created framework.
1127  TERMINATING, // Framework is shutting down in the cluster.
1128  } state;
1129 
1130  // We store the pointer to 'Slave' to get access to its methods and
1131  // variables. One could imagine 'Framework' being an inner class of
1132  // the 'Slave' class.
1134 
1135  FrameworkInfo info;
1136 
1138 
1139  // Frameworks using the scheduler driver will have a 'pid',
1140  // which allows us to send executor messages directly to the
1141  // driver. Frameworks using the HTTP API (in 0.24.0) will
1142  // not have a 'pid', in which case executor messages are
1143  // sent through the master.
1145 
1146  // Executors can be found in one of the following
1147  // data structures:
1148  //
1149  // TODO(bmahler): Make these private to enforce that
1150  // the executors lifecycle helper functions are not
1151  // being bypassed, and provide public views into them.
1152 
1153  // Executors with pending tasks.
1155 
1156  // Sequences in this map are used to enforce the order of tasks launched on
1157  // each executor.
1158  //
1159  // Note on the lifecycle of the sequence: if the corresponding executor struct
1160  // has not been created, we tie the lifecycle of the sequence to the first
1161  // task in the sequence (which must have the `launch_executor` flag set to
1162  // true modulo MESOS-3870). If the task fails to launch before creating the
1163  // executor struct, we will delete the sequence. Once the executor struct is
1164  // created, we tie the lifecycle of the sequence to the executor struct.
1165  //
1166  // TODO(mzhu): Create the executor struct early and put the sequence in it.
1168 
1169  // Pending task groups. This is needed for correctly sending
1170  // TASK_KILLED status updates for all tasks in the group if
1171  // any of the tasks are killed while pending.
1172  std::vector<TaskGroupInfo> pendingTaskGroups;
1173 
1174  // Current running executors.
1176 
1177  boost::circular_buffer<process::Owned<Executor>> completedExecutors;
1178 
1179 private:
1180  Framework(const Framework&) = delete;
1181  Framework& operator=(const Framework&) = delete;
1182 };
1183 
1184 
1186 {
1188  const ResourceProviderInfo& _info,
1189  const Resources& _totalResources,
1190  const UUID& _resourceVersion)
1191  : info(_info),
1192  totalResources(_totalResources),
1193  resourceVersion(_resourceVersion) {}
1194 
1195  void addOperation(Operation* operation);
1196  void removeOperation(Operation* operation);
1197 
1198  ResourceProviderInfo info;
1200 
1201  // Used to establish the relationship between the operation and the
1202  // resources that the operation is operating on. Each resource
1203  // provider will keep a resource version UUID, and change it when it
1204  // believes that the resources from this resource provider are out
1205  // of sync from the master's view. The master will keep track of
1206  // the last known resource version UUID for each resource provider,
1207  // and attach the resource version UUID in each operation it sends
1208  // out. The resource provider should reject operations that have a
1209  // different resource version UUID than that it maintains, because
1210  // this means the operation is operating on resources that might
1211  // have already been invalidated.
1213 
1214  // Pending operations or terminal operations that have
1215  // unacknowledged status updates.
1217 };
1218 
1219 
1233 std::map<std::string, std::string> executorEnvironment(
1234  const Flags& flags,
1235  const ExecutorInfo& executorInfo,
1236  const std::string& directory,
1237  const SlaveID& slaveId,
1238  const process::PID<Slave>& slavePid,
1239  const Option<Secret>& authenticationToken,
1240  bool checkpoint);
1241 
1242 
1243 std::ostream& operator<<(std::ostream& stream, Executor::State state);
1244 std::ostream& operator<<(std::ostream& stream, Framework::State state);
1245 std::ostream& operator<<(std::ostream& stream, Slave::State state);
1246 
1247 } // namespace slave {
1248 } // namespace internal {
1249 } // namespace mesos {
1250 
1251 #endif // __SLAVE_HPP__
void schedulerMessage(const SlaveID &slaveId, const FrameworkID &frameworkId, const ExecutorID &executorId, const std::string &data)
struct mesos::internal::slave::Slave::RecoveryInfo recoveryInfo
Definition: path.hpp:26
virtual process::Future< ResourceUsage > usage()
void executorMessage(const SlaveID &slaveId, const FrameworkID &frameworkId, const ExecutorID &executorId, const std::string &data)
ResourceProvider(const ResourceProviderInfo &_info, const Resources &_totalResources, const UUID &_resourceVersion)
Definition: slave.hpp:1187
Try< uid_t > uid(const std::string &path, const FollowSymlink follow=FollowSymlink::FOLLOW_SYMLINK)
Definition: stat.hpp:215
Definition: nothing.hpp:16
ContentType
Definition: http.hpp:43
Definition: http.hpp:42
Definition: option.hpp:28
Try< bool > update(const std::string &link, const Handle &parent, uint16_t protocol, const action::Mirror &mirror)
State
Definition: slave.hpp:981
virtual void exited(const process::UPID &pid)
Invoked when a linked process has exited.
void _reregisterExecutor(const process::Future< Nothing > &future, const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId)
Duration age(double usage)
Definition: master.hpp:27
const Option< std::string > user
Definition: slave.hpp:1006
std::ostream & operator<<(std::ostream &stream, const MesosContainerizerProcess::State &state)
virtual void finalize()
Invoked when a process is terminated.
const ExecutorInfo info
Definition: slave.hpp:995
virtual void removeFramework(Framework *framework)
Definition: check.hpp:33
void send(const Message &message)
Definition: slave.hpp:948
virtual void shutdownExecutor(const process::UPID &from, const FrameworkID &frameworkId, const ExecutorID &executorId)
void detachTaskVolumeDirectories(const ExecutorInfo &executorInfo, const ContainerID &executorContainerId, const std::vector< Task > &tasks)
void run(const FrameworkInfo &frameworkInfo, ExecutorInfo executorInfo, Option< TaskInfo > task, Option< TaskGroupInfo > taskGroup, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const process::UPID &pid, const Option< bool > &launchExecutor)
void _checkDiskUsage(const process::Future< double > &usage)
Framework * getFramework(const FrameworkID &frameworkId) const
Definition: hashset.hpp:53
virtual void ___run(const process::Future< Nothing > &future, const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId, const std::vector< TaskInfo > &tasks, const std::vector< TaskGroupInfo > &taskGroups)
Definition: protobuf_utils.hpp:261
void _statusUpdateAcknowledgement(const process::Future< bool > &future, const TaskID &taskId, const FrameworkID &frameworkId, const UUID &uuid)
void removeExecutor(Framework *framework, Executor *executor)
process::Future< Nothing > _recoverContainerizer(const Option< state::SlaveState > &state)
process::Future< Nothing > closed() const
Definition: slave.hpp:875
Result< std::string > user(Option< uid_t > uid=None())
Definition: su.hpp:277
hashmap< ExecutorID, process::Sequence > taskLaunchSequences
Definition: slave.hpp:1167
v1::AgentID evolve(const SlaveID &slaveId)
Result< ProcessStatus > status(pid_t pid)
Definition: proc.hpp:166
Definition: resources.hpp:79
UUID resourceVersion
Definition: slave.hpp:1212
friend class Executor
Definition: slave.hpp:589
void handleRunTaskGroupMessage(const process::UPID &from, RunTaskGroupMessage &&runTaskGroupMessage)
virtual void runTaskGroup(const process::UPID &from, const FrameworkInfo &frameworkInfo, const ExecutorInfo &executorInfo, const TaskGroupInfo &taskGroupInfo, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const Option< bool > &launchExecutor)
bool reconnect
Definition: slave.hpp:417
virtual void executorTerminated(const FrameworkID &frameworkId, const ExecutorID &executorId, const process::Future< Option< mesos::slave::ContainerTermination >> &termination)
Definition: lambda.hpp:30
Option< process::UPID > pid
Definition: slave.hpp:1144
Definition: files.hpp:73
void subscribe(HttpConnection http, const executor::Call::Subscribe &subscribe, Framework *framework, Executor *executor)
hashmap< ExecutorID, hashmap< TaskID, TaskInfo > > pendingTasks
Definition: slave.hpp:1154
enum mesos::internal::slave::Slave::State state
Operation
Definition: cgroups.hpp:441
void _checkImageDiskUsage(const process::Future< double > &usage)
void reregisterExecutor(const process::UPID &from, const FrameworkID &frameworkId, const ExecutorID &executorId, const std::vector< TaskInfo > &tasks, const std::vector< StatusUpdate > &updates)
Definition: flags.hpp:39
Slave * slave
Definition: slave.hpp:1133
Definition: duration.hpp:32
Option< mesos::slave::ContainerTermination > pendingTermination
Definition: slave.hpp:1056
std::map< std::string, std::string > executorEnvironment(const Flags &flags, const ExecutorInfo &executorInfo, const std::string &directory, const SlaveID &slaveId, const process::PID< Slave > &slavePid, const Option< Secret > &authenticationToken, bool checkpoint)
Returns a map of environment variables necessary in order to launch an executor.
void recoverFramework(const state::FrameworkState &state, const hashset< ExecutorID > &executorsToRecheckpoint, const hashmap< ExecutorID, hashset< TaskID >> &tasksToRecheckpoint)
Definition: protobuf_utils.hpp:384
void send(const process::UPID &to, const google::protobuf::Message &message)
Definition: protobuf.hpp:118
bool isSome() const
Definition: option.hpp:115
Definition: task_status_update_manager.hpp:58
LinkedHashMap< TaskID, TaskInfo > queuedTasks
Definition: slave.hpp:1032
void statusUpdateAcknowledgement(const process::UPID &from, const SlaveID &slaveId, const FrameworkID &frameworkId, const TaskID &taskId, const std::string &uuid)
const bool checkpoint
Definition: slave.hpp:1008
void signaled(int signal, int uid)
bool close()
Definition: slave.hpp:870
ContentType contentType
Definition: slave.hpp:881
virtual void _shutdownExecutor(Framework *framework, Executor *executor)
void checkpointResourcesMessage(const std::vector< Resource > &checkpointedResources)
Definition: hashmap.hpp:38
process::Future< Nothing > garbageCollect(const std::string &path)
void operationStatusAcknowledgement(const process::UPID &from, const AcknowledgeOperationStatusMessage &acknowledgement)
Definition: resource_estimator.hpp:37
boost::circular_buffer< std::shared_ptr< Task > > completedTasks
Definition: slave.hpp:1049
process::Future< Secret > generateSecret(const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId)
hashmap< std::string, MessageHandler > message
Definition: process.hpp:451
Resources allocatedResources(const Resources &resources, const std::string &role)
Definition: containerizer.hpp:57
void shutdownExecutorTimeout(const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId)
void attachTaskVolumeDirectory(const ExecutorInfo &executorInfo, const ContainerID &executorContainerId, const Task &task)
Definition: slave.hpp:116
void registered(const process::UPID &from, const SlaveID &slaveId, const MasterSlaveConnection &connection)
An "untyped" PID, used to encapsulate the process ID for lower-layer abstractions (eg...
Definition: pid.hpp:39
Option< HttpConnection > http
Definition: slave.hpp:1022
void pingTimeout(process::Future< Option< MasterInfo >> future)
virtual void initialize()
Invoked when a process gets spawned.
void handleRunTaskMessage(const process::UPID &from, RunTaskMessage &&runTaskMessage)
Definition: http.hpp:339
const ContainerID containerId
Definition: slave.hpp:999
Option< process::UPID > pid
Definition: slave.hpp:1023
ExecutorInfo getExecutorInfo(const FrameworkInfo &frameworkInfo, const TaskInfo &task) const
void forward(StatusUpdate update)
Definition: protobuf_utils.hpp:455
void reconcileOperations(const ReconcileOperationsMessage &message)
Definition: spec.hpp:30
const FrameworkID id() const
Definition: slave.hpp:1089
void launchExecutor(const Option< process::Future< Secret >> &future, const FrameworkID &frameworkId, const ExecutorID &executorId, const Option< TaskInfo > &taskInfo)
const T & get() const &
Definition: option.hpp:118
protobuf::framework::Capabilities capabilities
Definition: slave.hpp:1137
process::Future< Nothing > _recover()
virtual process::Future< Nothing > _run(const FrameworkInfo &frameworkInfo, const ExecutorInfo &executorInfo, const Option< TaskInfo > &task, const Option< TaskGroupInfo > &taskGroup, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const Option< bool > &launchExecutor)
ResourceProviderInfo info
Definition: slave.hpp:1198
The SecretGenerator interface represents a mechanism to create a secret from a principal.
Definition: secret_generator.hpp:34
Definition: protobuf.hpp:100
const ExecutorID id
Definition: slave.hpp:994
HttpConnection(const process::http::Pipe::Writer &_writer, ContentType _contentType)
Definition: slave.hpp:855
State
Definition: slave.hpp:1124
::recordio::Encoder< v1::executor::Event > encoder
Definition: slave.hpp:882
void executorLaunched(const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId, const process::Future< Containerizer::LaunchResult > &future)
Definition: time.hpp:23
void reregistered(const process::UPID &from, const SlaveID &slaveId, const std::vector< ReconcileTasksMessage > &reconciliations, const MasterSlaveConnection &connection)
void fileAttached(const process::Future< Nothing > &result, const std::string &path, const std::string &virtualPath)
std::vector< TaskGroupInfo > pendingTaskGroups
Definition: slave.hpp:1172
Try< std::vector< Entry > > list(const std::string &hierarchy, const std::string &cgroup)
void _statusUpdate(StatusUpdate update, const Option< process::UPID > &pid, const ExecutorID &executorId, const Option< process::Future< ContainerStatus >> &containerStatus)
Definition: boundedhashmap.hpp:27
const FrameworkID frameworkId
Definition: slave.hpp:997
void detected(const process::Future< Option< MasterInfo >> &_master)
Try< Nothing > checkpoint(const std::string &path, const std::string &message)
Definition: state.hpp:123
#define flags
Definition: decoder.hpp:18
A "process identifier" used to uniquely identify a process when dispatching messages.
Definition: pid.hpp:279
hashmap< ExecutorID, Executor * > executors
Definition: slave.hpp:1175
Definition: none.hpp:27
Definition: attributes.hpp:24
boost::circular_buffer< process::Owned< Executor > > completedExecutors
Definition: slave.hpp:1177
Slave * slave
Definition: slave.hpp:992
LinkedHashMap< TaskID, Task * > launchedTasks
Definition: slave.hpp:1040
process::Future< Nothing > recover(const Try< state::State > &state)
void ___statusUpdate(const process::Future< Nothing > &future, const StatusUpdate &update, const Option< process::UPID > &pid)
Definition: timer.hpp:30
void shutdownFramework(const process::UPID &from, const FrameworkID &frameworkId)
void applyOperation(const ApplyOperationMessage &message)
Nothing detachFile(const std::string &path)
virtual void killTask(const process::UPID &from, const KillTaskMessage &killTaskMessage)
bool send(const Message &message)
Definition: slave.hpp:863
friend class Framework
Definition: slave.hpp:590
virtual void __recover(const process::Future< Nothing > &future)
Slave(const std::string &id, const Flags &flags, mesos::master::detector::MasterDetector *detector, Containerizer *containerizer, Files *files, GarbageCollector *gc, TaskStatusUpdateManager *taskStatusUpdateManager, mesos::slave::ResourceEstimator *resourceEstimator, mesos::slave::QoSController *qosController, mesos::SecretGenerator *secretGenerator, const Option< Authorizer * > &authorizer)
Resources totalResources
Definition: slave.hpp:1199
std::vector< TaskGroupInfo > queuedTaskGroups
Definition: slave.hpp:1037
An abstraction of a Master detector which can be used to detect the leading master from a group...
Definition: detector.hpp:38
static Time now()
The current clock time for either the current process that makes this call or the global clock time i...
hashmap< UUID, Operation * > operations
Definition: slave.hpp:1216
virtual void __run(const FrameworkInfo &frameworkInfo, const ExecutorInfo &executorInfo, const Option< TaskInfo > &task, const Option< TaskGroupInfo > &taskGroup, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const Option< bool > &launchExecutor)
void ping(const process::UPID &from, bool connected)
Definition: metrics.hpp:32
void checkpointResources(std::vector< Resource > checkpointedResources, bool changeTotal)
void __statusUpdate(const Option< process::Future< Nothing >> &future, const StatusUpdate &update, const Option< process::UPID > &pid, const ExecutorID &executorId, const ContainerID &containerId, bool checkpoint)
void doReliableRegistration(Duration maxBackoff)
Try< Nothing > bind(int_fd s, const Address &address)
Definition: network.hpp:46
process::Promise< Nothing > recovered
Definition: slave.hpp:412
Definition: qos_controller.hpp:44
std::string serialize(ContentType contentType, const google::protobuf::Message &message)
const std::string directory
Definition: slave.hpp:1001
void statusUpdate(StatusUpdate update, const Option< process::UPID > &pid)
URI http(const std::string &host, const std::string &path="/", const Option< int > &port=None(), const Option< std::string > &query=None(), const Option< std::string > &fragment=None(), const Option< std::string > &user=None(), const Option< std::string > &password=None())
Creates an http URI with the given parameters.
Definition: http.hpp:35
State
Definition: slave.hpp:399
virtual void runTask(const process::UPID &from, const FrameworkInfo &frameworkInfo, const FrameworkID &frameworkId, const process::UPID &pid, const TaskInfo &task, const std::vector< ResourceVersionUUID > &resourceVersionUuids, const Option< bool > &launchExecutor)
FrameworkInfo info
Definition: slave.hpp:1135
Definition: owned.hpp:36
process::http::Pipe::Writer writer
Definition: slave.hpp:880
void shutdown(const process::UPID &from, const std::string &message)
Definition: slave.hpp:890
virtual void _qosCorrections(const process::Future< std::list< mesos::slave::QoSCorrection >> &correction)
Executor * getExecutor(const FrameworkID &frameworkId, const ExecutorID &executorId) const
Definition: parse.hpp:33
void registerExecutor(const process::UPID &from, const FrameworkID &frameworkId, const ExecutorID &executorId)
PID< MetricsProcess > metrics
void updateFramework(const UpdateFrameworkMessage &message)
LinkedHashMap< TaskID, Task * > terminatedTasks
Definition: slave.hpp:1043
constexpr const char * name
Definition: shell.hpp:43
void registerExecutorTimeout(const FrameworkID &frameworkId, const ExecutorID &executorId, const ContainerID &containerId)
Definition: slave.hpp:1067
Definition: authenticatee.hpp:29