Apache Mesos
containerizer.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __MESOS_CONTAINERIZER_HPP__
18 #define __MESOS_CONTAINERIZER_HPP__
19 
20 #include <vector>
21 
23 
24 #include <mesos/slave/isolator.hpp>
25 
26 #include <process/clock.hpp>
27 #include <process/http.hpp>
28 #include <process/id.hpp>
29 #include <process/sequence.hpp>
30 #include <process/shared.hpp>
31 #include <process/time.hpp>
32 
34 
35 #include <stout/hashmap.hpp>
36 #include <stout/multihashmap.hpp>
37 #include <stout/os/int_fd.hpp>
38 
39 #include "slave/csi_server.hpp"
40 #include "slave/gc.hpp"
41 #include "slave/state.hpp"
42 
44 
46 
48 
50 
52 
53 namespace mesos {
54 namespace internal {
55 namespace slave {
56 
57 // If the container class is not of type `DEBUG` (i.e., it is not set or
58 // `DEFAULT`), we log the line at the INFO level. Otherwise, we use VLOG(1).
59 // The purpose of this macro is to avoid polluting agent logs with information
60 // related to `DEBUG` containers as this type of container can run periodically.
61 #define LOG_BASED_ON_CLASS(containerClass) \
62  LOG_IF(INFO, (containerClass != ContainerClass::DEBUG) || VLOG_IS_ON(1))
63 
64 // Forward declaration.
65 class MesosContainerizerProcess;
66 
67 
69 {
70 public:
72  const Flags& flags,
73  bool local,
74  Fetcher* fetcher,
75  GarbageCollector* gc = nullptr,
76  SecretResolver* secretResolver = nullptr,
77  const Option<NvidiaComponents>& nvidia = None(),
78  VolumeGidManager* volumeGidManager = nullptr,
79  PendingFutureTracker* futureTracker = nullptr,
80  CSIServer* csiServer = nullptr);
81 
83  const Flags& flags,
84  bool local,
85  Fetcher* fetcher,
86  GarbageCollector* gc,
87  const process::Owned<Launcher>& launcher,
88  const process::Shared<Provisioner>& provisioner,
89  const std::vector<process::Owned<mesos::slave::Isolator>>& isolators,
90  VolumeGidManager* volumeGidManager = nullptr);
91 
92  ~MesosContainerizer() override;
93 
95  const Option<state::SlaveState>& state) override;
96 
98  const ContainerID& containerId,
99  const mesos::slave::ContainerConfig& containerConfig,
100  const std::map<std::string, std::string>& environment,
101  const Option<std::string>& pidCheckpointPath) override;
102 
104  const ContainerID& containerId) override;
105 
107  const ContainerID& containerId,
108  const Resources& resourceRequests,
109  const google::protobuf::Map<
110  std::string, Value::Scalar>& resourceLimits = {}) override;
111 
113  const ContainerID& containerId) override;
114 
116  const ContainerID& containerId) override;
117 
119  const ContainerID& containerId) override;
120 
122  const ContainerID& containerId) override;
123 
125  const ContainerID& containerId,
126  int signal) override;
127 
129 
130  process::Future<Nothing> remove(const ContainerID& containerId) override;
131 
133  const std::vector<Image>& excludedImages) override;
134 
135 private:
136  explicit MesosContainerizer(
138 
140 };
141 
142 
144  : public process::Process<MesosContainerizerProcess>
145 {
146 public:
148  const Flags& _flags,
149  Fetcher* _fetcher,
150  GarbageCollector* _gc,
151  IOSwitchboard* _ioSwitchboard,
152  const process::Owned<Launcher>& _launcher,
153  const process::Shared<Provisioner>& _provisioner,
154  const std::vector<process::Owned<mesos::slave::Isolator>>& _isolators,
155  VolumeGidManager* _volumeGidManager,
156  const Option<int_fd>& _initMemFd,
157  const Option<int_fd>& _commandExecutorMemFd)
158  : ProcessBase(process::ID::generate("mesos-containerizer")),
159  flags(_flags),
160  fetcher(_fetcher),
161  gc(_gc),
162  ioSwitchboard(_ioSwitchboard),
163  launcher(_launcher),
164  provisioner(_provisioner),
165  isolators(_isolators),
166  volumeGidManager(_volumeGidManager),
167  initMemFd(_initMemFd),
168  commandExecutorMemFd(_commandExecutorMemFd) {}
169 
171  {
172  if (initMemFd.isSome()) {
173  Try<Nothing> close = os::close(initMemFd.get());
174  if (close.isError()) {
175  LOG(WARNING) << "Failed to close memfd '" << stringify(initMemFd.get())
176  << "': " << close.error();
177  }
178  }
179 
180  if (commandExecutorMemFd.isSome()) {
181  Try<Nothing> close = os::close(commandExecutorMemFd.get());
182  if (close.isError()) {
183  LOG(WARNING) << "Failed to close memfd '"
184  << stringify(commandExecutorMemFd.get())
185  << "': " << close.error();
186  }
187  }
188  }
189 
191  const Option<state::SlaveState>& state);
192 
194  const ContainerID& containerId,
195  const mesos::slave::ContainerConfig& containerConfig,
196  const std::map<std::string, std::string>& environment,
197  const Option<std::string>& pidCheckpointPath);
198 
200  const ContainerID& containerId);
201 
203  const ContainerID& containerId,
204  const Resources& resourceRequests,
205  const google::protobuf::Map<
206  std::string, Value::Scalar>& resourceLimits = {});
207 
209  const ContainerID& containerId);
210 
212  const ContainerID& containerId);
213 
215  const ContainerID& containerId);
216 
218  const ContainerID& containerId,
219  int_fd pipeWrite);
220 
222  const ContainerID& containerId,
223  const Option<mesos::slave::ContainerTermination>& termination);
224 
225  virtual process::Future<bool> kill(
226  const ContainerID& containerId,
227  int signal);
228 
229  virtual process::Future<Nothing> remove(const ContainerID& containerId);
230 
232 
234  const std::vector<Image>& excludedImages);
235 
236 private:
237  enum State
238  {
239  STARTING,
240  PROVISIONING,
241  PREPARING,
242  ISOLATING,
243  FETCHING,
244  RUNNING,
245  DESTROYING
246  };
247 
248  friend std::ostream& operator<<(std::ostream& stream, const State& state);
249 
250  process::Future<Nothing> _recover(
251  const std::vector<mesos::slave::ContainerState>& recoverable,
252  const hashset<ContainerID>& orphans);
253 
254  process::Future<std::vector<Nothing>> recoverIsolators(
255  const std::vector<mesos::slave::ContainerState>& recoverable,
256  const hashset<ContainerID>& orphans);
257 
258  process::Future<Nothing> recoverProvisioner(
259  const std::vector<mesos::slave::ContainerState>& recoverable,
260  const hashset<ContainerID>& orphans);
261 
262  process::Future<Nothing> __recover(
263  const std::vector<mesos::slave::ContainerState>& recovered,
264  const hashset<ContainerID>& orphans);
265 
267  const ContainerID& containerId,
268  const Option<ProvisionInfo>& provisionInfo);
269 
271  const ContainerID& containerId);
272 
274  const ContainerID& containerId,
275  const Option<mesos::slave::ContainerIO>& containerIO,
276  const std::map<std::string, std::string>& environment,
277  const Option<std::string>& pidCheckpointPath);
278 
280  const ContainerID& containerId,
281  pid_t _pid);
282 
283  // Continues 'destroy()' once nested containers are handled.
284  void _destroy(
285  const ContainerID& containerId,
287  const State& previousState,
288  const std::vector<
290 
291  // Continues '_destroy()' once isolators has completed.
292  void __destroy(
293  const ContainerID& containerId,
294  const Option<mesos::slave::ContainerTermination>& termination);
295 
296  // Continues '__destroy()' once all processes have been killed
297  // by the launcher.
298  void ___destroy(
299  const ContainerID& containerId,
301  const process::Future<Nothing>& future);
302 
303  // Continues '___destroy()' once we get the exit status of the container.
304  void ____destroy(
305  const ContainerID& containerId,
306  const Option<mesos::slave::ContainerTermination>& termination);
307 
308  // Continues '____destroy()' once all isolators have completed
309  // cleanup.
310  void _____destroy(
311  const ContainerID& containerId,
313  const process::Future<std::vector<process::Future<Nothing>>>& cleanups);
314 
315  // Continues '_____destroy()' once provisioner have completed destroy.
316  void ______destroy(
317  const ContainerID& containerId,
319  const process::Future<bool>& destroy);
320 
321  // Schedules a path for garbage collection based on its modification time.
322  // Equivalent to the `Slave::garbageCollect` method.
323  process::Future<Nothing> garbageCollect(const std::string& path);
324 
325  // Call back for when an isolator limits a container and impacts the
326  // processes. This will trigger container destruction.
327  void limited(
328  const ContainerID& containerId,
330 
331  // Helper for reaping the 'init' process of a container.
333  const ContainerID& containerId,
334  pid_t pid);
335 
336  // Call back for when the executor exits. This will trigger container
337  // destroy.
338  void reaped(const ContainerID& containerId);
339 
340  // TODO(jieyu): Consider introducing an Isolators struct and moving
341  // all isolator related operations to that struct.
343  const ContainerID& containerId);
344 
345  const Flags flags;
346  Fetcher* fetcher;
347 
348  // NOTE: This actor may be nullptr in tests, as not all tests need to
349  // share this actor with the agent.
350  GarbageCollector* gc;
351 
352  IOSwitchboard* ioSwitchboard;
353  const process::Owned<Launcher> launcher;
354  const process::Shared<Provisioner> provisioner;
355  const std::vector<process::Owned<mesos::slave::Isolator>> isolators;
356  VolumeGidManager* volumeGidManager;
357  const Option<int_fd> initMemFd;
358  const Option<int_fd> commandExecutorMemFd;
359 
360  struct Container
361  {
362  Container()
363  : state(STARTING),
364  lastStateTransition(process::Clock::now()),
365  sequence("mesos-container-status-updates") {}
366 
367  // Promise for futures returned from wait().
369 
370  // NOTE: this represents 'PID 1', i.e., the "init" of the
371  // container that we created (it may be for an executor, or any
372  // arbitrary process that has been launched in the event of nested
373  // containers).
374  Option<pid_t> pid;
375 
376  // Sandbox directory for the container. It is optional here because
377  // we don't keep track of sandbox directory for orphan containers.
378  // It is not checkpointed explicitly; on recovery, it is reconstructed
379  // from executor's directory and hierarchy of containers.
380  //
381  // NOTE: This holds the sandbox path in the host mount namespace,
382  // while MESOS_SANDBOX is the path in the container mount namespace.
383  Option<std::string> directory;
384 
385  // We keep track of the future exit status for the container if it
386  // has been launched. If the container has not been launched yet,
387  // 'status' will be set to None().
388  //
389  // NOTE: A container has an exit status does not mean that it has
390  // been properly destroyed. We need to perform cleanup on
391  // isolators and provisioner after that.
393 
394  // We keep track of the future for 'provisioner->provision' so
395  // that we can discard the provisioning for the container which
396  // is destroyed when it is being provisioned.
397  process::Future<ProvisionInfo> provisioning;
398 
399  // We keep track of the future that is waiting for all the
400  // 'isolator->prepare' to finish so that destroy will only start
401  // calling cleanup after all isolators have finished preparing.
403  launchInfos;
404 
405  // We keep track of the future that is waiting for all the
406  // 'isolator->isolate' futures so that destroy will only start
407  // calling cleanup after all isolators have finished isolating.
409 
410  // We keep track of the resource requests and limits for each container so
411  // we can set the ResourceStatistics limits in usage().
412  Resources resourceRequests;
413  google::protobuf::Map<std::string, Value::Scalar> resourceLimits;
414 
415  // The configuration for the container to be launched.
416  // This can only be None if the underlying container is launched
417  // before we checkpoint `ContainerConfig` in MESOS-6894.
418  // TODO(zhitao): Drop the `Option` part at the end of deprecation
419  // cycle.
421 
422  // The container class that can be `DEFAULT` or `DEBUG`.
423  // Returns `DEFAULT` even if the container class is not defined.
424  mesos::slave::ContainerClass containerClass();
425 
426  // Container's information at the moment it was launched. For example,
427  // used to bootstrap the launch information of future child DEBUG
428  // containers. Checkpointed and restored on recovery. Optional because
429  // it is not set for orphan containers.
430  //
431  // NOTE: Some of these data, may change during the container lifetime,
432  // e.g., the working directory. Such changes are not be captured here,
433  // which might be problematic, e.g., for DEBUG containers relying on
434  // some data in parent working directory.
436 
437  State state;
438  process::Time lastStateTransition;
439 
440  // Used when `status` needs to be collected from isolators
441  // associated with this container. `Sequence` allows us to
442  // maintain the order of `status` requests for a given container.
443  process::Sequence sequence;
444 
445  // Child containers nested under this container.
447  };
448 
450 
451  // Helper to transition container state.
452  void transition(const ContainerID& containerId, const State& state);
453 
454  // Helper to determine if a container is supported by an isolator.
455  bool isSupportedByIsolator(
456  const ContainerID& containerId,
457  bool isolatorSupportsNesting,
458  bool isolatorSupportsStandalone);
459 
460  struct Metrics
461  {
462  Metrics();
463  ~Metrics();
464 
465  process::metrics::Counter container_destroy_errors;
466  } metrics;
467 };
468 
469 
470 std::ostream& operator<<(
471  std::ostream& stream,
472  const MesosContainerizerProcess::State& state);
473 
474 } // namespace slave {
475 } // namespace internal {
476 } // namespace mesos {
477 
478 #endif // __MESOS_CONTAINERIZER_HPP__
process::Future< hashset< ContainerID > > containers() override
Definition: path.hpp:29
Try< Nothing > isolate(const std::string &hierarchy, const std::string &cgroup, pid_t pid)
std::string generate(const std::string &prefix="")
Returns &#39;prefix(N)&#39; where N represents the number of instances where the same prefix (wrt...
process::Future< Containerizer::LaunchResult > launch(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig, const std::map< std::string, std::string > &environment, const Option< std::string > &pidCheckpointPath) override
~MesosContainerizerProcess() override
Definition: containerizer.hpp:170
Definition: option.hpp:29
std::ostream & operator<<(std::ostream &stream, const MesosContainerizerProcess::State &state)
Definition: fetcher.hpp:49
Definition: check.hpp:33
process::Future< Nothing > update(const ContainerID &containerId, const Resources &resourceRequests, const google::protobuf::Map< std::string, Value::Scalar > &resourceLimits={}) override
Definition: csi_server.hpp:49
process::Future< Nothing > pruneImages(const std::vector< Image > &excludedImages) override
process::Future< bool > kill(const ContainerID &containerId, int signal) override
Definition: resources.hpp:83
Definition: volume_gid_manager.hpp:42
Try< T > fetch(const std::string &value)
Definition: fetch.hpp:38
Future< Option< int > > reap(pid_t pid)
Definition: flags.hpp:39
process::Future< Option< mesos::slave::ContainerTermination > > wait(const ContainerID &containerId) override
Definition: counter.hpp:26
Definition: sequence.hpp:33
Definition: hashmap.hpp:38
Environment * environment
DWORD pid_t
Definition: windows.hpp:181
Definition: containerizer.hpp:64
Definition: owned.hpp:26
static Try< MesosContainerizer * > create(const Flags &flags, bool local, Fetcher *fetcher, GarbageCollector *gc=nullptr, SecretResolver *secretResolver=nullptr, const Option< NvidiaComponents > &nvidia=None(), VolumeGidManager *volumeGidManager=nullptr, PendingFutureTracker *futureTracker=nullptr, CSIServer *csiServer=nullptr)
process::Future< Nothing > recover(const Option< state::SlaveState > &state) override
MesosContainerizerProcess(const Flags &_flags, Fetcher *_fetcher, GarbageCollector *_gc, IOSwitchboard *_ioSwitchboard, const process::Owned< Launcher > &_launcher, const process::Shared< Provisioner > &_provisioner, const std::vector< process::Owned< mesos::slave::Isolator >> &_isolators, VolumeGidManager *_volumeGidManager, const Option< int_fd > &_initMemFd, const Option< int_fd > &_commandExecutorMemFd)
Definition: containerizer.hpp:147
Definition: future_tracker.hpp:84
Try< Nothing > close(int fd)
Definition: close.hpp:24
process::Future< ResourceStatistics > usage(const ContainerID &containerId) override
Definition: agent.hpp:25
Definition: switchboard.hpp:53
static Try error(const E &e)
Definition: try.hpp:43
Definition: time.hpp:23
Definition: containerizer.hpp:143
process::Future< process::http::Connection > attach(const ContainerID &containerId) override
#define flags
Definition: decoder.hpp:18
Definition: none.hpp:27
Definition: attributes.hpp:24
bool isError() const
Definition: try.hpp:78
std::set< pid_t > children(pid_t, const std::list< Process > &, bool)
Definition: os.hpp:217
Definition: executor.hpp:48
static Time now()
The current clock time for either the current process that makes this call or the global clock time i...
Definition: containerizer.hpp:68
Try< std::string > prepare(const std::string &baseHierarchy, const std::string &subsystem, const std::string &cgroup)
JSON::Object Metrics()
Definition: resolver.hpp:34
int int_fd
Definition: int_fd.hpp:35
std::string stringify(int flags)
Definition: owned.hpp:36
Definition: process.hpp:505
Definition: parse.hpp:33
PID< MetricsProcess > metrics
process::Future< Option< mesos::slave::ContainerTermination > > destroy(const ContainerID &containerId) override
process::Future< ContainerStatus > status(const ContainerID &containerId) override