Apache Mesos
containerizer.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __MESOS_CONTAINERIZER_HPP__
18 #define __MESOS_CONTAINERIZER_HPP__
19 
20 #include <vector>
21 
23 
24 #include <mesos/slave/isolator.hpp>
25 
26 #include <process/clock.hpp>
27 #include <process/http.hpp>
28 #include <process/id.hpp>
29 #include <process/sequence.hpp>
30 #include <process/shared.hpp>
31 #include <process/time.hpp>
32 
34 
35 #include <stout/hashmap.hpp>
36 #include <stout/multihashmap.hpp>
37 #include <stout/os/int_fd.hpp>
38 
39 #include "slave/gc.hpp"
40 #include "slave/state.hpp"
41 
43 
45 
47 
49 
51 
52 namespace mesos {
53 namespace internal {
54 namespace slave {
55 
56 // If the container class is not of type `DEBUG` (i.e., it is not set or
57 // `DEFAULT`), we log the line at the INFO level. Otherwise, we use VLOG(1).
58 // The purpose of this macro is to avoid polluting agent logs with information
59 // related to `DEBUG` containers as this type of container can run periodically.
60 #define LOG_BASED_ON_CLASS(containerClass) \
61  LOG_IF(INFO, (containerClass != ContainerClass::DEBUG) || VLOG_IS_ON(1))
62 
63 // Forward declaration.
64 class MesosContainerizerProcess;
65 
66 
68 {
69 public:
71  const Flags& flags,
72  bool local,
73  Fetcher* fetcher,
74  GarbageCollector* gc = nullptr,
75  SecretResolver* secretResolver = nullptr,
76  const Option<NvidiaComponents>& nvidia = None(),
77  VolumeGidManager* volumeGidManager = nullptr,
78  PendingFutureTracker* futureTracker = nullptr);
79 
81  const Flags& flags,
82  bool local,
83  Fetcher* fetcher,
84  GarbageCollector* gc,
85  const process::Owned<Launcher>& launcher,
86  const process::Shared<Provisioner>& provisioner,
87  const std::vector<process::Owned<mesos::slave::Isolator>>& isolators,
88  VolumeGidManager* volumeGidManager = nullptr);
89 
90  ~MesosContainerizer() override;
91 
93  const Option<state::SlaveState>& state) override;
94 
96  const ContainerID& containerId,
97  const mesos::slave::ContainerConfig& containerConfig,
98  const std::map<std::string, std::string>& environment,
99  const Option<std::string>& pidCheckpointPath) override;
100 
102  const ContainerID& containerId) override;
103 
105  const ContainerID& containerId,
106  const Resources& resources) override;
107 
109  const ContainerID& containerId) override;
110 
112  const ContainerID& containerId) override;
113 
115  const ContainerID& containerId) override;
116 
118  const ContainerID& containerId) override;
119 
121  const ContainerID& containerId,
122  int signal) override;
123 
125 
126  process::Future<Nothing> remove(const ContainerID& containerId) override;
127 
129  const std::vector<Image>& excludedImages) override;
130 
131 private:
132  explicit MesosContainerizer(
134 
136 };
137 
138 
140  : public process::Process<MesosContainerizerProcess>
141 {
142 public:
144  const Flags& _flags,
145  Fetcher* _fetcher,
146  GarbageCollector* _gc,
147  IOSwitchboard* _ioSwitchboard,
148  const process::Owned<Launcher>& _launcher,
149  const process::Shared<Provisioner>& _provisioner,
150  const std::vector<process::Owned<mesos::slave::Isolator>>& _isolators,
151  VolumeGidManager* _volumeGidManager,
152  const Option<int_fd>& _initMemFd,
153  const Option<int_fd>& _commandExecutorMemFd)
154  : ProcessBase(process::ID::generate("mesos-containerizer")),
155  flags(_flags),
156  fetcher(_fetcher),
157  gc(_gc),
158  ioSwitchboard(_ioSwitchboard),
159  launcher(_launcher),
160  provisioner(_provisioner),
161  isolators(_isolators),
162  volumeGidManager(_volumeGidManager),
163  initMemFd(_initMemFd),
164  commandExecutorMemFd(_commandExecutorMemFd) {}
165 
167  {
168  if (initMemFd.isSome()) {
169  Try<Nothing> close = os::close(initMemFd.get());
170  if (close.isError()) {
171  LOG(WARNING) << "Failed to close memfd '" << stringify(initMemFd.get())
172  << "': " << close.error();
173  }
174  }
175 
176  if (commandExecutorMemFd.isSome()) {
177  Try<Nothing> close = os::close(commandExecutorMemFd.get());
178  if (close.isError()) {
179  LOG(WARNING) << "Failed to close memfd '"
180  << stringify(commandExecutorMemFd.get())
181  << "': " << close.error();
182  }
183  }
184  }
185 
187  const Option<state::SlaveState>& state);
188 
190  const ContainerID& containerId,
191  const mesos::slave::ContainerConfig& containerConfig,
192  const std::map<std::string, std::string>& environment,
193  const Option<std::string>& pidCheckpointPath);
194 
196  const ContainerID& containerId);
197 
199  const ContainerID& containerId,
200  const Resources& resources);
201 
203  const ContainerID& containerId);
204 
206  const ContainerID& containerId);
207 
209  const ContainerID& containerId);
210 
212  const ContainerID& containerId,
213  int_fd pipeWrite);
214 
216  const ContainerID& containerId,
217  const Option<mesos::slave::ContainerTermination>& termination);
218 
219  virtual process::Future<bool> kill(
220  const ContainerID& containerId,
221  int signal);
222 
223  virtual process::Future<Nothing> remove(const ContainerID& containerId);
224 
226 
228  const std::vector<Image>& excludedImages);
229 
230 private:
231  enum State
232  {
233  STARTING,
234  PROVISIONING,
235  PREPARING,
236  ISOLATING,
237  FETCHING,
238  RUNNING,
239  DESTROYING
240  };
241 
242  friend std::ostream& operator<<(std::ostream& stream, const State& state);
243 
244  process::Future<Nothing> _recover(
245  const std::vector<mesos::slave::ContainerState>& recoverable,
246  const hashset<ContainerID>& orphans);
247 
248  process::Future<std::vector<Nothing>> recoverIsolators(
249  const std::vector<mesos::slave::ContainerState>& recoverable,
250  const hashset<ContainerID>& orphans);
251 
252  process::Future<Nothing> recoverProvisioner(
253  const std::vector<mesos::slave::ContainerState>& recoverable,
254  const hashset<ContainerID>& orphans);
255 
256  process::Future<Nothing> __recover(
257  const std::vector<mesos::slave::ContainerState>& recovered,
258  const hashset<ContainerID>& orphans);
259 
261  const ContainerID& containerId,
262  const Option<ProvisionInfo>& provisionInfo);
263 
265  const ContainerID& containerId);
266 
268  const ContainerID& containerId,
269  const Option<mesos::slave::ContainerIO>& containerIO,
270  const std::map<std::string, std::string>& environment,
271  const Option<std::string>& pidCheckpointPath);
272 
274  const ContainerID& containerId,
275  pid_t _pid);
276 
277  // Continues 'destroy()' once nested containers are handled.
278  void _destroy(
279  const ContainerID& containerId,
281  const State& previousState,
282  const std::vector<
284 
285  // Continues '_destroy()' once isolators has completed.
286  void __destroy(
287  const ContainerID& containerId,
288  const Option<mesos::slave::ContainerTermination>& termination);
289 
290  // Continues '__destroy()' once all processes have been killed
291  // by the launcher.
292  void ___destroy(
293  const ContainerID& containerId,
295  const process::Future<Nothing>& future);
296 
297  // Continues '___destroy()' once we get the exit status of the container.
298  void ____destroy(
299  const ContainerID& containerId,
300  const Option<mesos::slave::ContainerTermination>& termination);
301 
302  // Continues '____destroy()' once all isolators have completed
303  // cleanup.
304  void _____destroy(
305  const ContainerID& containerId,
307  const process::Future<std::vector<process::Future<Nothing>>>& cleanups);
308 
309  // Continues '_____destroy()' once provisioner have completed destroy.
310  void ______destroy(
311  const ContainerID& containerId,
313  const process::Future<bool>& destroy);
314 
315  // Schedules a path for garbage collection based on its modification time.
316  // Equivalent to the `Slave::garbageCollect` method.
317  process::Future<Nothing> garbageCollect(const std::string& path);
318 
319  // Call back for when an isolator limits a container and impacts the
320  // processes. This will trigger container destruction.
321  void limited(
322  const ContainerID& containerId,
324 
325  // Helper for reaping the 'init' process of a container.
327  const ContainerID& containerId,
328  pid_t pid);
329 
330  // Call back for when the executor exits. This will trigger container
331  // destroy.
332  void reaped(const ContainerID& containerId);
333 
334  // TODO(jieyu): Consider introducing an Isolators struct and moving
335  // all isolator related operations to that struct.
337  const ContainerID& containerId);
338 
339  const Flags flags;
340  Fetcher* fetcher;
341 
342  // NOTE: This actor may be nullptr in tests, as not all tests need to
343  // share this actor with the agent.
344  GarbageCollector* gc;
345 
346  IOSwitchboard* ioSwitchboard;
347  const process::Owned<Launcher> launcher;
348  const process::Shared<Provisioner> provisioner;
349  const std::vector<process::Owned<mesos::slave::Isolator>> isolators;
350  VolumeGidManager* volumeGidManager;
351  const Option<int_fd> initMemFd;
352  const Option<int_fd> commandExecutorMemFd;
353 
354  struct Container
355  {
356  Container()
357  : state(STARTING),
358  lastStateTransition(process::Clock::now()),
359  sequence("mesos-container-status-updates") {}
360 
361  // Promise for futures returned from wait().
363 
364  // NOTE: this represents 'PID 1', i.e., the "init" of the
365  // container that we created (it may be for an executor, or any
366  // arbitrary process that has been launched in the event of nested
367  // containers).
368  Option<pid_t> pid;
369 
370  // Sandbox directory for the container. It is optional here because
371  // we don't keep track of sandbox directory for orphan containers.
372  // It is not checkpointed explicitly; on recovery, it is reconstructed
373  // from executor's directory and hierarchy of containers.
374  //
375  // NOTE: This holds the sandbox path in the host mount namespace,
376  // while MESOS_SANDBOX is the path in the container mount namespace.
377  Option<std::string> directory;
378 
379  // We keep track of the future exit status for the container if it
380  // has been launched. If the container has not been launched yet,
381  // 'status' will be set to None().
382  //
383  // NOTE: A container has an exit status does not mean that it has
384  // been properly destroyed. We need to perform cleanup on
385  // isolators and provisioner after that.
387 
388  // We keep track of the future for 'provisioner->provision' so
389  // that we can discard the provisioning for the container which
390  // is destroyed when it is being provisioned.
391  process::Future<ProvisionInfo> provisioning;
392 
393  // We keep track of the future that is waiting for all the
394  // 'isolator->prepare' to finish so that destroy will only start
395  // calling cleanup after all isolators have finished preparing.
397  launchInfos;
398 
399  // We keep track of the future that is waiting for all the
400  // 'isolator->isolate' futures so that destroy will only start
401  // calling cleanup after all isolators have finished isolating.
403 
404  // We keep track of the resources for each container so we can set
405  // the ResourceStatistics limits in usage().
407 
408  // The configuration for the container to be launched.
409  // This can only be None if the underlying container is launched
410  // before we checkpoint `ContainerConfig` in MESOS-6894.
411  // TODO(zhitao): Drop the `Option` part at the end of deprecation
412  // cycle.
414 
415  // The container class that can be `DEFAULT` or `DEBUG`.
416  // Returns `DEFAULT` even if the container class is not defined.
417  mesos::slave::ContainerClass containerClass();
418 
419  // Container's information at the moment it was launched. For example,
420  // used to bootstrap the launch information of future child DEBUG
421  // containers. Checkpointed and restored on recovery. Optional because
422  // it is not set for orphan containers.
423  //
424  // NOTE: Some of these data, may change during the container lifetime,
425  // e.g., the working directory. Such changes are not be captured here,
426  // which might be problematic, e.g., for DEBUG containers relying on
427  // some data in parent working directory.
429 
430  State state;
431  process::Time lastStateTransition;
432 
433  // Used when `status` needs to be collected from isolators
434  // associated with this container. `Sequence` allows us to
435  // maintain the order of `status` requests for a given container.
436  process::Sequence sequence;
437 
438  // Child containers nested under this container.
440  };
441 
443 
444  // Helper to transition container state.
445  void transition(const ContainerID& containerId, const State& state);
446 
447  // Helper to determine if a container is supported by an isolator.
448  bool isSupportedByIsolator(
449  const ContainerID& containerId,
450  bool isolatorSupportsNesting,
451  bool isolatorSupportsStandalone);
452 
453  struct Metrics
454  {
455  Metrics();
456  ~Metrics();
457 
458  process::metrics::Counter container_destroy_errors;
459  } metrics;
460 };
461 
462 
463 std::ostream& operator<<(
464  std::ostream& stream,
465  const MesosContainerizerProcess::State& state);
466 
467 } // namespace slave {
468 } // namespace internal {
469 } // namespace mesos {
470 
471 #endif // __MESOS_CONTAINERIZER_HPP__
process::Future< hashset< ContainerID > > containers() override
Definition: path.hpp:29
Try< Nothing > isolate(const std::string &hierarchy, const std::string &cgroup, pid_t pid)
std::string generate(const std::string &prefix="")
Returns &#39;prefix(N)&#39; where N represents the number of instances where the same prefix (wrt...
process::Future< Containerizer::LaunchResult > launch(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig, const std::map< std::string, std::string > &environment, const Option< std::string > &pidCheckpointPath) override
~MesosContainerizerProcess() override
Definition: containerizer.hpp:166
Definition: option.hpp:29
std::ostream & operator<<(std::ostream &stream, const MesosContainerizerProcess::State &state)
Definition: fetcher.hpp:49
process::Future< Nothing > update(const ContainerID &containerId, const Resources &resources) override
Definition: check.hpp:33
process::Future< Nothing > pruneImages(const std::vector< Image > &excludedImages) override
process::Future< bool > kill(const ContainerID &containerId, int signal) override
Definition: resources.hpp:83
Definition: volume_gid_manager.hpp:42
Try< T > fetch(const std::string &value)
Definition: fetch.hpp:38
Future< Option< int > > reap(pid_t pid)
Definition: flags.hpp:39
process::Future< Option< mesos::slave::ContainerTermination > > wait(const ContainerID &containerId) override
Definition: counter.hpp:26
Definition: sequence.hpp:33
Definition: hashmap.hpp:38
Environment * environment
DWORD pid_t
Definition: windows.hpp:181
Definition: containerizer.hpp:63
Definition: owned.hpp:26
process::Future< Nothing > recover(const Option< state::SlaveState > &state) override
MesosContainerizerProcess(const Flags &_flags, Fetcher *_fetcher, GarbageCollector *_gc, IOSwitchboard *_ioSwitchboard, const process::Owned< Launcher > &_launcher, const process::Shared< Provisioner > &_provisioner, const std::vector< process::Owned< mesos::slave::Isolator >> &_isolators, VolumeGidManager *_volumeGidManager, const Option< int_fd > &_initMemFd, const Option< int_fd > &_commandExecutorMemFd)
Definition: containerizer.hpp:143
Definition: future_tracker.hpp:84
Try< Nothing > close(int fd)
Definition: close.hpp:24
process::Future< ResourceStatistics > usage(const ContainerID &containerId) override
Definition: agent.hpp:25
Definition: switchboard.hpp:53
static Try error(const E &e)
Definition: try.hpp:43
Definition: time.hpp:23
Definition: containerizer.hpp:139
process::Future< process::http::Connection > attach(const ContainerID &containerId) override
#define flags
Definition: decoder.hpp:18
Definition: none.hpp:27
Definition: attributes.hpp:24
bool isError() const
Definition: try.hpp:78
std::set< pid_t > children(pid_t, const std::list< Process > &, bool)
Definition: os.hpp:216
Definition: executor.hpp:48
static Try< MesosContainerizer * > create(const Flags &flags, bool local, Fetcher *fetcher, GarbageCollector *gc=nullptr, SecretResolver *secretResolver=nullptr, const Option< NvidiaComponents > &nvidia=None(), VolumeGidManager *volumeGidManager=nullptr, PendingFutureTracker *futureTracker=nullptr)
static Try< Resources > resources(const Flags &flags)
static Time now()
The current clock time for either the current process that makes this call or the global clock time i...
Definition: containerizer.hpp:67
Try< std::string > prepare(const std::string &baseHierarchy, const std::string &subsystem, const std::string &cgroup)
JSON::Object Metrics()
Definition: resolver.hpp:34
int int_fd
Definition: int_fd.hpp:35
std::string stringify(int flags)
Definition: owned.hpp:36
Definition: process.hpp:505
Definition: parse.hpp:33
PID< MetricsProcess > metrics
process::Future< Option< mesos::slave::ContainerTermination > > destroy(const ContainerID &containerId) override
process::Future< ContainerStatus > status(const ContainerID &containerId) override