Apache Mesos
docker.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __DOCKER_CONTAINERIZER_HPP__
18 #define __DOCKER_CONTAINERIZER_HPP__
19 
20 #include <map>
21 #include <set>
22 #include <string>
23 
25 
26 #include <process/owned.hpp>
27 #include <process/shared.hpp>
28 
31 
32 #include <stout/duration.hpp>
33 #include <stout/flags.hpp>
34 #include <stout/hashset.hpp>
35 
36 #include "docker/docker.hpp"
37 #include "docker/executor.hpp"
38 
40 
42 
43 namespace mesos {
44 namespace internal {
45 namespace slave {
46 
47 // Prefix used to name Docker containers in order to distinguish those
48 // created by Mesos from those created manually.
49 extern const std::string DOCKER_NAME_PREFIX;
50 
51 // Separator used to compose docker container name, which consists
52 // of the name prefix, ContainerID, and possibly the SlaveID depending
53 // on the version of Mesos used to create the container.
54 extern const std::string DOCKER_NAME_SEPERATOR;
55 
56 // Directory that stores all the symlinked sandboxes that is mapped
57 // into Docker containers. This is a relative directory that will
58 // joined with the slave path. Only sandbox paths that contains a
59 // colon will be symlinked due to the limitation of the Docker CLI.
60 extern const std::string DOCKER_SYMLINK_DIRECTORY;
61 
62 
63 // Forward declaration.
64 class DockerContainerizerProcess;
65 
66 
68 {
69 public:
71  const Flags& flags,
72  Fetcher* fetcher,
73  const Option<NvidiaComponents>& nvidia = None());
74 
75  // This is only public for tests.
77  const Flags& flags,
78  Fetcher* fetcher,
81  const Option<NvidiaComponents>& nvidia = None());
82 
83  // This is only public for tests.
86 
87  ~DockerContainerizer() override;
88 
90  const Option<state::SlaveState>& state) override;
91 
93  const ContainerID& containerId,
94  const mesos::slave::ContainerConfig& containerConfig,
95  const std::map<std::string, std::string>& environment,
96  const Option<std::string>& pidCheckpointPath) override;
97 
99  const ContainerID& containerId,
100  const Resources& resources) override;
101 
103  const ContainerID& containerId) override;
104 
106  const ContainerID& containerId) override;
107 
109  const ContainerID& containerId) override;
110 
112  const ContainerID& containerId) override;
113 
115 
117  const std::vector<Image>& excludedImages) override;
118 
119 private:
121 };
122 
123 
124 
126  : public process::Process<DockerContainerizerProcess>
127 {
128 public:
130  const Flags& _flags,
131  Fetcher* _fetcher,
133  process::Shared<Docker> _docker,
134  const Option<NvidiaComponents>& _nvidia)
135  : flags(_flags),
136  fetcher(_fetcher),
137  logger(_logger),
138  docker(_docker),
139  nvidia(_nvidia) {}
140 
142  const Option<state::SlaveState>& state);
143 
145  const ContainerID& containerId,
146  const mesos::slave::ContainerConfig& containerConfig,
147  const std::map<std::string, std::string>& environment,
148  const Option<std::string>& pidCheckpointPath);
149 
150  // force = true causes the containerizer to update the resources
151  // for the container, even if they match what it has cached.
153  const ContainerID& containerId,
154  const Resources& resources,
155  bool force);
156 
158  const ContainerID& containerId);
159 
161  const ContainerID& containerId);
162 
164  const ContainerID& containerId);
165 
167  const ContainerID& containerId,
168  bool killed = true); // process is either killed or reaped.
169 
170  virtual process::Future<Nothing> fetch(const ContainerID& containerId);
171 
172  virtual process::Future<Nothing> pull(const ContainerID& containerId);
173 
175 
176 private:
177  struct Metrics
178  {
179  Metrics() : image_pull("containerizer/docker/image_pull", Hours(1))
180  {
181  process::metrics::add(image_pull);
182  }
183 
184  ~Metrics()
185  {
186  process::metrics::remove(image_pull);
187  }
188 
190  };
191 
192  // Continuations and helpers.
194  const ContainerID& containerId,
195  const Option<int>& status);
196 
198  const ContainerID& containerId,
199  pid_t pid);
200 
202  const ContainerID& containerId,
203  const mesos::slave::ContainerConfig& containerConfig);
204 
205  process::Future<Nothing> _recover(
206  const Option<state::SlaveState>& state,
207  const std::vector<Docker::Container>& containers);
208 
209  process::Future<Nothing> __recover(
210  const std::vector<Docker::Container>& containers);
211 
212  // Starts the executor in a Docker container.
213  process::Future<Docker::Container> launchExecutorContainer(
214  const ContainerID& containerId,
215  const std::string& containerName);
216 
217  // Starts the docker executor with a subprocess.
218  process::Future<pid_t> launchExecutorProcess(
219  const ContainerID& containerId);
220 
221  process::Future<pid_t> checkpointExecutor(
222  const ContainerID& containerId,
223  const Docker::Container& dockerContainer);
224 
225  // Reaps on the executor pid.
226  process::Future<Nothing> reapExecutor(
227  const ContainerID& containerId,
228  pid_t pid);
229 
230  void _destroy(
231  const ContainerID& containerId,
232  bool killed);
233 
234  void __destroy(
235  const ContainerID& containerId,
236  bool killed,
237  const process::Future<Nothing>& future);
238 
239  void ___destroy(
240  const ContainerID& containerId,
241  bool killed,
242  const process::Future<Option<int>>& status);
243 
244  void ____destroy(
245  const ContainerID& containerId,
246  bool killed,
247  const process::Future<Option<int>>& status);
248 
249  process::Future<Nothing> destroyTimeout(
250  const ContainerID& containerId,
251  process::Future<Nothing> future);
252 
253  process::Future<Nothing> _update(
254  const ContainerID& containerId,
255  const Resources& resources,
256  const Docker::Container& container);
257 
258  process::Future<Nothing> __update(
259  const ContainerID& containerId,
260  const Resources& resources,
261  pid_t pid);
262 
263  process::Future<Nothing> mountPersistentVolumes(
264  const ContainerID& containerId);
265 
266  Try<Nothing> unmountPersistentVolumes(
267  const ContainerID& containerId);
268 
269  Try<Nothing> updatePersistentVolumes(
270  const ContainerID& containerId,
271  const std::string& directory,
272  const Resources& current,
273  const Resources& updated);
274 
275 #ifdef __linux__
276  // Allocate GPU resources for a specified container.
277  process::Future<Nothing> allocateNvidiaGpus(
278  const ContainerID& containerId,
279  const size_t count);
280 
281  process::Future<Nothing> _allocateNvidiaGpus(
282  const ContainerID& containerId,
283  const std::set<Gpu>& allocated);
284 
285  // Deallocate GPU resources for a specified container.
286  process::Future<Nothing> deallocateNvidiaGpus(
287  const ContainerID& containerId);
288 
289  process::Future<Nothing> _deallocateNvidiaGpus(
290  const ContainerID& containerId,
291  const std::set<Gpu>& deallocated);
292 #endif // __linux__
293 
294  Try<ResourceStatistics> cgroupsStatistics(pid_t pid) const;
295 
296  // Call back for when the executor exits. This will trigger
297  // container destroy.
298  void reaped(const ContainerID& containerId);
299 
300  // Removes the docker container.
301  void remove(
302  const std::string& containerName,
303  const Option<std::string>& executor);
304 
305  const Flags flags;
306 
307  Fetcher* fetcher;
308 
310 
312 
314 
316 
317  struct Container
318  {
319  static Try<Container*> create(
320  const ContainerID& id,
321  const mesos::slave::ContainerConfig& containerConfig,
322  const std::map<std::string, std::string>& environment,
323  const Option<std::string>& pidCheckpointPath,
324  const Flags& flags);
325 
326  static std::string name(const ContainerID& id)
327  {
328  return DOCKER_NAME_PREFIX + stringify(id);
329  }
330 
331  Container(const ContainerID& id)
332  : state(FETCHING), id(id) {}
333 
334  Container(
335  const ContainerID& _id,
336  const mesos::slave::ContainerConfig& _containerConfig,
337  const std::map<std::string, std::string>& _environment,
338  const Option<std::string>& _pidCheckpointPath,
339  bool symlinked,
340  const std::string& containerWorkDir,
341  const Option<CommandInfo>& _command,
342  const Option<ContainerInfo>& _container,
343  bool launchesExecutorContainer)
344  : state(FETCHING),
345  id(_id),
346  containerConfig(_containerConfig),
347  pidCheckpointPath(_pidCheckpointPath),
348  environment(_environment),
349  symlinked(symlinked),
350  containerWorkDir(containerWorkDir),
351  containerName(name(id)),
352  launchesExecutorContainer(launchesExecutorContainer)
353  {
354  // NOTE: The task's resources are included in the executor's
355  // resources in order to make sure when launching the executor
356  // that it has non-zero resources in the event the executor was
357  // not actually given any resources by the framework
358  // originally. See Framework::launchExecutor in slave.cpp. We
359  // check that this is indeed the case here to protect ourselves
360  // from when/if this changes in the future (but it's not a
361  // perfect check because an executor might always have a subset
362  // of it's resources that match a task, nevertheless, it's
363  // better than nothing).
364  resources = containerConfig.resources();
365 
366  if (containerConfig.has_task_info()) {
367  CHECK(resources.contains(containerConfig.task_info().resources()));
368  }
369 
370  if (_command.isSome()) {
371  command = _command.get();
372  } else {
373  command = containerConfig.command_info();
374  }
375 
376  if (_container.isSome()) {
377  container = _container.get();
378  } else {
379  // NOTE: The existence of this field is checked in
380  // DockerContainerizerProcess::launch.
381  container = containerConfig.container_info();
382  }
383  }
384 
385  ~Container()
386  {
387  if (symlinked) {
388  // The sandbox directory is a symlink, remove it at container
389  // destroy.
390  os::rm(containerWorkDir);
391  }
392  }
393 
394  Option<std::string> executorName()
395  {
396  if (launchesExecutorContainer) {
397  return containerName + DOCKER_NAME_SEPERATOR + "executor";
398  } else {
399  return None();
400  }
401  }
402 
403  std::string image() const
404  {
405  if (containerConfig.has_task_info()) {
406  return containerConfig.task_info().container().docker().image();
407  }
408 
409  return containerConfig.executor_info().container().docker().image();
410  }
411 
412  bool forcePullImage() const
413  {
414  if (containerConfig.has_task_info()) {
415  return containerConfig.task_info()
416  .container().docker().force_pull_image();
417  }
418 
419  return containerConfig.executor_info()
420  .container().docker().force_pull_image();
421  }
422 
423  // The DockerContainerizer needs to be able to properly clean up
424  // Docker containers, regardless of when they are destroyed. For
425  // example, if a container gets destroyed while we are fetching,
426  // we need to not keep running the fetch, nor should we try and
427  // start the Docker container. For this reason, we've split out
428  // the states into:
429  //
430  // FETCHING
431  // PULLING
432  // MOUNTING
433  // RUNNING
434  // DESTROYING
435  //
436  // In particular, we made 'PULLING' be it's own state so that we
437  // can easily destroy and cleanup when a user initiated pulling
438  // a really big image but we timeout due to the executor
439  // registration timeout. Since we currently have no way to discard
440  // a Docker::run, we needed to explicitly do the pull (which is
441  // the part that takes the longest) so that we can also explicitly
442  // kill it when asked. Once the functions at Docker::* get support
443  // for discarding, then we won't need to make pull be it's own
444  // state anymore, although it doesn't hurt since it gives us
445  // better error messages.
446  enum State
447  {
448  FETCHING = 1,
449  PULLING = 2,
450  MOUNTING = 3,
451  RUNNING = 4,
452  DESTROYING = 5
453  } state;
454 
455  // Copies of the parameters sent to `Container::create`.
456  const ContainerID id;
457  const mesos::slave::ContainerConfig containerConfig;
458  const Option<std::string> pidCheckpointPath;
459 
460  // A copy of the parameter sent to `Container::create`.
461  // NOTE: This may be modified further by hooks.
462  std::map<std::string, std::string> environment;
463 
464  // The sandbox directory for the container. This holds the
465  // symlinked path if symlinked boolean is true.
466  // TODO(josephw): The symlink path does not persist across failovers,
467  // so we will not delete the symlink if the agent restarts. This results
468  // in gradually leaking hanging symlinks.
469  bool symlinked;
470  std::string containerWorkDir;
471 
472  // Copies of the fields in `containerConfig`, except when the
473  // container is a command task and the agent is launched with
474  // the --docker_mesos_image flag.
475  ContainerInfo container;
476  CommandInfo command;
477 
478  // Environment variables that the command executor should pass
479  // onto a docker-ized task. This is set by a hook.
481 
482  // The string used to refer to this container via the Docker CLI.
483  // This name is either computed by concatenating the DOCKER_NAME_PREFIX
484  // and the ContainerID; or during recovery, by taking the recovered
485  // container's name.
486  std::string containerName;
487 
488  // Promise for future returned from wait().
490 
491  // Exit status of executor or container (depending on whether or
492  // not we used the command executor). Represented as a promise so
493  // that destroying can chain with it being set.
495 
496  // Future that tells us the return value of last launch stage (fetch, pull,
497  // run, etc).
499 
500  // We keep track of the resources for each container so we can set
501  // the ResourceStatistics limits in usage(). Note that this is
502  // different than just what we might get from TaskInfo::resources
503  // or ExecutorInfo::resources because they can change dynamically.
505 
506  // The docker pull future is stored so we can discard when
507  // destroy is called while docker is pulling the image.
509 
510  // Once the container is running, this saves the pid of the
511  // running container.
512  Option<pid_t> pid;
513 
514  // The executor pid that was forked to wait on the running
515  // container. This is stored so we can clean up the executor
516  // on destroy.
517  Option<pid_t> executorPid;
518 
519 #ifdef __linux__
520  // GPU resources allocated to the container.
521  std::set<Gpu> gpus;
522 #endif // __linux__
523 
524  // Marks if this container launches an executor in a docker
525  // container.
526  bool launchesExecutorContainer;
527  };
528 
530 };
531 
532 
533 } // namespace slave {
534 } // namespace internal {
535 } // namespace mesos {
536 
537 #endif // __DOCKER_CONTAINERIZER_HPP__
Definition: option.hpp:28
Try< Nothing > rm(const std::string &path)
Definition: rm.hpp:26
Future< Nothing > remove(const Metric &metric)
Definition: metrics.hpp:109
Definition: fetcher.hpp:49
process::Future< Nothing > recover(const Option< state::SlaveState > &state) override
Definition: check.hpp:33
process::Future< ContainerStatus > status(const ContainerID &containerId) override
Definition: resources.hpp:81
process::Future< hashset< ContainerID > > containers() override
Try< T > fetch(const std::string &value)
Definition: fetch.hpp:38
Definition: flags.hpp:39
process::Future< Option< mesos::slave::ContainerTermination > > destroy(const ContainerID &containerId) override
Future< Nothing > add(const T &metric)
Definition: metrics.hpp:95
bool isSome() const
Definition: option.hpp:115
Definition: docker.hpp:91
Environment * environment
DWORD pid_t
Definition: windows.hpp:181
process::Future< Nothing > update(const ContainerID &containerId, const Resources &resources) override
process::Future< Containerizer::LaunchResult > launch(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig, const std::map< std::string, std::string > &environment, const Option< std::string > &pidCheckpointPath) override
Definition: containerizer.hpp:59
DockerContainerizer(const Flags &flags, Fetcher *fetcher, const process::Owned< mesos::slave::ContainerLogger > &logger, process::Shared< Docker > docker, const Option< NvidiaComponents > &nvidia=None())
Definition: spec.hpp:26
process::Future< ResourceStatistics > usage(const ContainerID &containerId) override
const T & get() const &
Definition: option.hpp:118
const std::string DOCKER_NAME_PREFIX
process::Future< Nothing > pruneImages(const std::vector< Image > &excludedImages) override
Definition: duration.hpp:235
Try< Nothing > checkpoint(const std::string &path, const std::string &message)
Definition: state.hpp:123
#define flags
Definition: decoder.hpp:18
URI image(const std::string &repository, const std::string &reference, const std::string &registry, const Option< std::string > &scheme=None(), const Option< int > &port=None())
Definition: docker.hpp:30
Definition: none.hpp:27
Definition: attributes.hpp:24
const std::string DOCKER_SYMLINK_DIRECTORY
Definition: executor.hpp:48
static Try< Resources > resources(const Flags &flags)
JSON::Object Metrics()
std::string stringify(int flags)
const std::string DOCKER_NAME_SEPERATOR
Definition: spec.hpp:35
bool contains(const Resources &that) const
Definition: process.hpp:501
DockerContainerizerProcess(const Flags &_flags, Fetcher *_fetcher, const process::Owned< mesos::slave::ContainerLogger > &_logger, process::Shared< Docker > _docker, const Option< NvidiaComponents > &_nvidia)
Definition: docker.hpp:129
Definition: parse.hpp:33
PID< MetricsProcess > metrics
constexpr const char * name
Definition: shell.hpp:43
static Try< DockerContainerizer * > create(const Flags &flags, Fetcher *fetcher, const Option< NvidiaComponents > &nvidia=None())
process::Future< Option< mesos::slave::ContainerTermination > > wait(const ContainerID &containerId) override