Apache Mesos
containerizer.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __MESOS_CONTAINERIZER_HPP__
18 #define __MESOS_CONTAINERIZER_HPP__
19 
20 #include <vector>
21 
23 
24 #include <mesos/slave/isolator.hpp>
25 
26 #include <process/clock.hpp>
27 #include <process/http.hpp>
28 #include <process/id.hpp>
29 #include <process/sequence.hpp>
30 #include <process/shared.hpp>
31 #include <process/time.hpp>
32 
34 
35 #include <stout/hashmap.hpp>
36 #include <stout/multihashmap.hpp>
37 #include <stout/os/int_fd.hpp>
38 
39 #include "slave/gc.hpp"
40 #include "slave/state.hpp"
41 
43 
45 
47 
49 
51 
52 namespace mesos {
53 namespace internal {
54 namespace slave {
55 
56 // If the container class is not of type `DEBUG` (i.e., it is not set or
57 // `DEFAULT`), we log the line at the INFO level. Otherwise, we use VLOG(1).
58 // The purpose of this macro is to avoid polluting agent logs with information
59 // related to `DEBUG` containers as this type of container can run periodically.
60 #define LOG_BASED_ON_CLASS(containerClass) \
61  LOG_IF(INFO, (containerClass != ContainerClass::DEBUG) || VLOG_IS_ON(1))
62 
63 // Forward declaration.
64 class MesosContainerizerProcess;
65 
66 
68 {
69 public:
71  const Flags& flags,
72  bool local,
73  Fetcher* fetcher,
74  GarbageCollector* gc = nullptr,
75  SecretResolver* secretResolver = nullptr,
76  const Option<NvidiaComponents>& nvidia = None(),
77  VolumeGidManager* volumeGidManager = nullptr,
78  PendingFutureTracker* futureTracker = nullptr);
79 
81  const Flags& flags,
82  bool local,
83  Fetcher* fetcher,
84  GarbageCollector* gc,
85  const process::Owned<Launcher>& launcher,
86  const process::Shared<Provisioner>& provisioner,
87  const std::vector<process::Owned<mesos::slave::Isolator>>& isolators,
88  VolumeGidManager* volumeGidManager = nullptr);
89 
90  ~MesosContainerizer() override;
91 
93  const Option<state::SlaveState>& state) override;
94 
96  const ContainerID& containerId,
97  const mesos::slave::ContainerConfig& containerConfig,
98  const std::map<std::string, std::string>& environment,
99  const Option<std::string>& pidCheckpointPath) override;
100 
102  const ContainerID& containerId) override;
103 
105  const ContainerID& containerId,
106  const Resources& resourceRequests,
107  const google::protobuf::Map<
108  std::string, Value::Scalar>& resourceLimits = {}) override;
109 
111  const ContainerID& containerId) override;
112 
114  const ContainerID& containerId) override;
115 
117  const ContainerID& containerId) override;
118 
120  const ContainerID& containerId) override;
121 
123  const ContainerID& containerId,
124  int signal) override;
125 
127 
128  process::Future<Nothing> remove(const ContainerID& containerId) override;
129 
131  const std::vector<Image>& excludedImages) override;
132 
133 private:
134  explicit MesosContainerizer(
136 
138 };
139 
140 
142  : public process::Process<MesosContainerizerProcess>
143 {
144 public:
146  const Flags& _flags,
147  Fetcher* _fetcher,
148  GarbageCollector* _gc,
149  IOSwitchboard* _ioSwitchboard,
150  const process::Owned<Launcher>& _launcher,
151  const process::Shared<Provisioner>& _provisioner,
152  const std::vector<process::Owned<mesos::slave::Isolator>>& _isolators,
153  VolumeGidManager* _volumeGidManager,
154  const Option<int_fd>& _initMemFd,
155  const Option<int_fd>& _commandExecutorMemFd)
156  : ProcessBase(process::ID::generate("mesos-containerizer")),
157  flags(_flags),
158  fetcher(_fetcher),
159  gc(_gc),
160  ioSwitchboard(_ioSwitchboard),
161  launcher(_launcher),
162  provisioner(_provisioner),
163  isolators(_isolators),
164  volumeGidManager(_volumeGidManager),
165  initMemFd(_initMemFd),
166  commandExecutorMemFd(_commandExecutorMemFd) {}
167 
169  {
170  if (initMemFd.isSome()) {
171  Try<Nothing> close = os::close(initMemFd.get());
172  if (close.isError()) {
173  LOG(WARNING) << "Failed to close memfd '" << stringify(initMemFd.get())
174  << "': " << close.error();
175  }
176  }
177 
178  if (commandExecutorMemFd.isSome()) {
179  Try<Nothing> close = os::close(commandExecutorMemFd.get());
180  if (close.isError()) {
181  LOG(WARNING) << "Failed to close memfd '"
182  << stringify(commandExecutorMemFd.get())
183  << "': " << close.error();
184  }
185  }
186  }
187 
189  const Option<state::SlaveState>& state);
190 
192  const ContainerID& containerId,
193  const mesos::slave::ContainerConfig& containerConfig,
194  const std::map<std::string, std::string>& environment,
195  const Option<std::string>& pidCheckpointPath);
196 
198  const ContainerID& containerId);
199 
201  const ContainerID& containerId,
202  const Resources& resourceRequests,
203  const google::protobuf::Map<
204  std::string, Value::Scalar>& resourceLimits = {});
205 
207  const ContainerID& containerId);
208 
210  const ContainerID& containerId);
211 
213  const ContainerID& containerId);
214 
216  const ContainerID& containerId,
217  int_fd pipeWrite);
218 
220  const ContainerID& containerId,
221  const Option<mesos::slave::ContainerTermination>& termination);
222 
223  virtual process::Future<bool> kill(
224  const ContainerID& containerId,
225  int signal);
226 
227  virtual process::Future<Nothing> remove(const ContainerID& containerId);
228 
230 
232  const std::vector<Image>& excludedImages);
233 
234 private:
235  enum State
236  {
237  STARTING,
238  PROVISIONING,
239  PREPARING,
240  ISOLATING,
241  FETCHING,
242  RUNNING,
243  DESTROYING
244  };
245 
246  friend std::ostream& operator<<(std::ostream& stream, const State& state);
247 
248  process::Future<Nothing> _recover(
249  const std::vector<mesos::slave::ContainerState>& recoverable,
250  const hashset<ContainerID>& orphans);
251 
252  process::Future<std::vector<Nothing>> recoverIsolators(
253  const std::vector<mesos::slave::ContainerState>& recoverable,
254  const hashset<ContainerID>& orphans);
255 
256  process::Future<Nothing> recoverProvisioner(
257  const std::vector<mesos::slave::ContainerState>& recoverable,
258  const hashset<ContainerID>& orphans);
259 
260  process::Future<Nothing> __recover(
261  const std::vector<mesos::slave::ContainerState>& recovered,
262  const hashset<ContainerID>& orphans);
263 
265  const ContainerID& containerId,
266  const Option<ProvisionInfo>& provisionInfo);
267 
269  const ContainerID& containerId);
270 
272  const ContainerID& containerId,
273  const Option<mesos::slave::ContainerIO>& containerIO,
274  const std::map<std::string, std::string>& environment,
275  const Option<std::string>& pidCheckpointPath);
276 
278  const ContainerID& containerId,
279  pid_t _pid);
280 
281  // Continues 'destroy()' once nested containers are handled.
282  void _destroy(
283  const ContainerID& containerId,
285  const State& previousState,
286  const std::vector<
288 
289  // Continues '_destroy()' once isolators has completed.
290  void __destroy(
291  const ContainerID& containerId,
292  const Option<mesos::slave::ContainerTermination>& termination);
293 
294  // Continues '__destroy()' once all processes have been killed
295  // by the launcher.
296  void ___destroy(
297  const ContainerID& containerId,
299  const process::Future<Nothing>& future);
300 
301  // Continues '___destroy()' once we get the exit status of the container.
302  void ____destroy(
303  const ContainerID& containerId,
304  const Option<mesos::slave::ContainerTermination>& termination);
305 
306  // Continues '____destroy()' once all isolators have completed
307  // cleanup.
308  void _____destroy(
309  const ContainerID& containerId,
311  const process::Future<std::vector<process::Future<Nothing>>>& cleanups);
312 
313  // Continues '_____destroy()' once provisioner have completed destroy.
314  void ______destroy(
315  const ContainerID& containerId,
317  const process::Future<bool>& destroy);
318 
319  // Schedules a path for garbage collection based on its modification time.
320  // Equivalent to the `Slave::garbageCollect` method.
321  process::Future<Nothing> garbageCollect(const std::string& path);
322 
323  // Call back for when an isolator limits a container and impacts the
324  // processes. This will trigger container destruction.
325  void limited(
326  const ContainerID& containerId,
328 
329  // Helper for reaping the 'init' process of a container.
331  const ContainerID& containerId,
332  pid_t pid);
333 
334  // Call back for when the executor exits. This will trigger container
335  // destroy.
336  void reaped(const ContainerID& containerId);
337 
338  // TODO(jieyu): Consider introducing an Isolators struct and moving
339  // all isolator related operations to that struct.
341  const ContainerID& containerId);
342 
343  const Flags flags;
344  Fetcher* fetcher;
345 
346  // NOTE: This actor may be nullptr in tests, as not all tests need to
347  // share this actor with the agent.
348  GarbageCollector* gc;
349 
350  IOSwitchboard* ioSwitchboard;
351  const process::Owned<Launcher> launcher;
352  const process::Shared<Provisioner> provisioner;
353  const std::vector<process::Owned<mesos::slave::Isolator>> isolators;
354  VolumeGidManager* volumeGidManager;
355  const Option<int_fd> initMemFd;
356  const Option<int_fd> commandExecutorMemFd;
357 
358  struct Container
359  {
360  Container()
361  : state(STARTING),
362  lastStateTransition(process::Clock::now()),
363  sequence("mesos-container-status-updates") {}
364 
365  // Promise for futures returned from wait().
367 
368  // NOTE: this represents 'PID 1', i.e., the "init" of the
369  // container that we created (it may be for an executor, or any
370  // arbitrary process that has been launched in the event of nested
371  // containers).
372  Option<pid_t> pid;
373 
374  // Sandbox directory for the container. It is optional here because
375  // we don't keep track of sandbox directory for orphan containers.
376  // It is not checkpointed explicitly; on recovery, it is reconstructed
377  // from executor's directory and hierarchy of containers.
378  //
379  // NOTE: This holds the sandbox path in the host mount namespace,
380  // while MESOS_SANDBOX is the path in the container mount namespace.
381  Option<std::string> directory;
382 
383  // We keep track of the future exit status for the container if it
384  // has been launched. If the container has not been launched yet,
385  // 'status' will be set to None().
386  //
387  // NOTE: A container has an exit status does not mean that it has
388  // been properly destroyed. We need to perform cleanup on
389  // isolators and provisioner after that.
391 
392  // We keep track of the future for 'provisioner->provision' so
393  // that we can discard the provisioning for the container which
394  // is destroyed when it is being provisioned.
395  process::Future<ProvisionInfo> provisioning;
396 
397  // We keep track of the future that is waiting for all the
398  // 'isolator->prepare' to finish so that destroy will only start
399  // calling cleanup after all isolators have finished preparing.
401  launchInfos;
402 
403  // We keep track of the future that is waiting for all the
404  // 'isolator->isolate' futures so that destroy will only start
405  // calling cleanup after all isolators have finished isolating.
407 
408  // We keep track of the resources for each container so we can set
409  // the ResourceStatistics limits in usage().
411 
412  // The configuration for the container to be launched.
413  // This can only be None if the underlying container is launched
414  // before we checkpoint `ContainerConfig` in MESOS-6894.
415  // TODO(zhitao): Drop the `Option` part at the end of deprecation
416  // cycle.
418 
419  // The container class that can be `DEFAULT` or `DEBUG`.
420  // Returns `DEFAULT` even if the container class is not defined.
421  mesos::slave::ContainerClass containerClass();
422 
423  // Container's information at the moment it was launched. For example,
424  // used to bootstrap the launch information of future child DEBUG
425  // containers. Checkpointed and restored on recovery. Optional because
426  // it is not set for orphan containers.
427  //
428  // NOTE: Some of these data, may change during the container lifetime,
429  // e.g., the working directory. Such changes are not be captured here,
430  // which might be problematic, e.g., for DEBUG containers relying on
431  // some data in parent working directory.
433 
434  State state;
435  process::Time lastStateTransition;
436 
437  // Used when `status` needs to be collected from isolators
438  // associated with this container. `Sequence` allows us to
439  // maintain the order of `status` requests for a given container.
440  process::Sequence sequence;
441 
442  // Child containers nested under this container.
444  };
445 
447 
448  // Helper to transition container state.
449  void transition(const ContainerID& containerId, const State& state);
450 
451  // Helper to determine if a container is supported by an isolator.
452  bool isSupportedByIsolator(
453  const ContainerID& containerId,
454  bool isolatorSupportsNesting,
455  bool isolatorSupportsStandalone);
456 
457  struct Metrics
458  {
459  Metrics();
460  ~Metrics();
461 
462  process::metrics::Counter container_destroy_errors;
463  } metrics;
464 };
465 
466 
467 std::ostream& operator<<(
468  std::ostream& stream,
469  const MesosContainerizerProcess::State& state);
470 
471 } // namespace slave {
472 } // namespace internal {
473 } // namespace mesos {
474 
475 #endif // __MESOS_CONTAINERIZER_HPP__
process::Future< hashset< ContainerID > > containers() override
Definition: path.hpp:29
Try< Nothing > isolate(const std::string &hierarchy, const std::string &cgroup, pid_t pid)
std::string generate(const std::string &prefix="")
Returns &#39;prefix(N)&#39; where N represents the number of instances where the same prefix (wrt...
process::Future< Containerizer::LaunchResult > launch(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig, const std::map< std::string, std::string > &environment, const Option< std::string > &pidCheckpointPath) override
~MesosContainerizerProcess() override
Definition: containerizer.hpp:168
Definition: option.hpp:29
std::ostream & operator<<(std::ostream &stream, const MesosContainerizerProcess::State &state)
Definition: fetcher.hpp:49
Definition: check.hpp:33
process::Future< Nothing > update(const ContainerID &containerId, const Resources &resourceRequests, const google::protobuf::Map< std::string, Value::Scalar > &resourceLimits={}) override
process::Future< Nothing > pruneImages(const std::vector< Image > &excludedImages) override
process::Future< bool > kill(const ContainerID &containerId, int signal) override
Definition: resources.hpp:83
Definition: volume_gid_manager.hpp:42
Try< T > fetch(const std::string &value)
Definition: fetch.hpp:38
Future< Option< int > > reap(pid_t pid)
Definition: flags.hpp:39
process::Future< Option< mesos::slave::ContainerTermination > > wait(const ContainerID &containerId) override
Definition: counter.hpp:26
Definition: sequence.hpp:33
Definition: hashmap.hpp:38
Environment * environment
DWORD pid_t
Definition: windows.hpp:181
Definition: containerizer.hpp:63
Definition: owned.hpp:26
process::Future< Nothing > recover(const Option< state::SlaveState > &state) override
MesosContainerizerProcess(const Flags &_flags, Fetcher *_fetcher, GarbageCollector *_gc, IOSwitchboard *_ioSwitchboard, const process::Owned< Launcher > &_launcher, const process::Shared< Provisioner > &_provisioner, const std::vector< process::Owned< mesos::slave::Isolator >> &_isolators, VolumeGidManager *_volumeGidManager, const Option< int_fd > &_initMemFd, const Option< int_fd > &_commandExecutorMemFd)
Definition: containerizer.hpp:145
Definition: future_tracker.hpp:84
Try< Nothing > close(int fd)
Definition: close.hpp:24
process::Future< ResourceStatistics > usage(const ContainerID &containerId) override
Definition: agent.hpp:25
Definition: switchboard.hpp:53
static Try error(const E &e)
Definition: try.hpp:43
Definition: time.hpp:23
Definition: containerizer.hpp:141
process::Future< process::http::Connection > attach(const ContainerID &containerId) override
#define flags
Definition: decoder.hpp:18
Definition: none.hpp:27
Definition: attributes.hpp:24
bool isError() const
Definition: try.hpp:78
std::set< pid_t > children(pid_t, const std::list< Process > &, bool)
Definition: os.hpp:216
Definition: executor.hpp:48
static Try< MesosContainerizer * > create(const Flags &flags, bool local, Fetcher *fetcher, GarbageCollector *gc=nullptr, SecretResolver *secretResolver=nullptr, const Option< NvidiaComponents > &nvidia=None(), VolumeGidManager *volumeGidManager=nullptr, PendingFutureTracker *futureTracker=nullptr)
static Try< Resources > resources(const Flags &flags)
static Time now()
The current clock time for either the current process that makes this call or the global clock time i...
Definition: containerizer.hpp:67
Try< std::string > prepare(const std::string &baseHierarchy, const std::string &subsystem, const std::string &cgroup)
JSON::Object Metrics()
Definition: resolver.hpp:34
int int_fd
Definition: int_fd.hpp:35
std::string stringify(int flags)
Definition: owned.hpp:36
Definition: process.hpp:505
Definition: parse.hpp:33
PID< MetricsProcess > metrics
process::Future< Option< mesos::slave::ContainerTermination > > destroy(const ContainerID &containerId) override
process::Future< ContainerStatus > status(const ContainerID &containerId) override