Apache Mesos
docker.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __DOCKER_CONTAINERIZER_HPP__
18 #define __DOCKER_CONTAINERIZER_HPP__
19 
20 #include <list>
21 #include <map>
22 #include <set>
23 #include <string>
24 
26 
27 #include <process/owned.hpp>
28 #include <process/shared.hpp>
29 
30 #include <stout/flags.hpp>
31 #include <stout/hashset.hpp>
32 
33 #include "docker/docker.hpp"
34 #include "docker/executor.hpp"
35 
37 
39 
40 namespace mesos {
41 namespace internal {
42 namespace slave {
43 
44 // Prefix used to name Docker containers in order to distinguish those
45 // created by Mesos from those created manually.
46 extern const std::string DOCKER_NAME_PREFIX;
47 
48 // Separator used to compose docker container name, which consists
49 // of the name prefix, ContainerID, and possibly the SlaveID depending
50 // on the version of Mesos used to create the container.
51 extern const std::string DOCKER_NAME_SEPERATOR;
52 
53 // Directory that stores all the symlinked sandboxes that is mapped
54 // into Docker containers. This is a relative directory that will
55 // joined with the slave path. Only sandbox paths that contains a
56 // colon will be symlinked due to the limitation of the Docker CLI.
57 extern const std::string DOCKER_SYMLINK_DIRECTORY;
58 
59 
60 // Forward declaration.
61 class DockerContainerizerProcess;
62 
63 
65 {
66 public:
68  const Flags& flags,
69  Fetcher* fetcher,
70  const Option<NvidiaComponents>& nvidia = None());
71 
72  // This is only public for tests.
74  const Flags& flags,
75  Fetcher* fetcher,
78  const Option<NvidiaComponents>& nvidia = None());
79 
80  // This is only public for tests.
83 
84  virtual ~DockerContainerizer();
85 
87  const Option<state::SlaveState>& state);
88 
90  const ContainerID& containerId,
91  const mesos::slave::ContainerConfig& containerConfig,
92  const std::map<std::string, std::string>& environment,
93  const Option<std::string>& pidCheckpointPath);
94 
96  const ContainerID& containerId,
97  const Resources& resources);
98 
100  const ContainerID& containerId);
101 
103  const ContainerID& containerId);
104 
106  const ContainerID& containerId);
107 
109  const ContainerID& containerId);
110 
112 
114  const std::vector<Image>& excludedImages);
115 
116 private:
118 };
119 
120 
121 
123  : public process::Process<DockerContainerizerProcess>
124 {
125 public:
127  const Flags& _flags,
128  Fetcher* _fetcher,
130  process::Shared<Docker> _docker,
131  const Option<NvidiaComponents>& _nvidia)
132  : flags(_flags),
133  fetcher(_fetcher),
134  logger(_logger),
135  docker(_docker),
136  nvidia(_nvidia) {}
137 
139  const Option<state::SlaveState>& state);
140 
142  const ContainerID& containerId,
143  const mesos::slave::ContainerConfig& containerConfig,
144  const std::map<std::string, std::string>& environment,
145  const Option<std::string>& pidCheckpointPath);
146 
147  // force = true causes the containerizer to update the resources
148  // for the container, even if they match what it has cached.
150  const ContainerID& containerId,
151  const Resources& resources,
152  bool force);
153 
155  const ContainerID& containerId);
156 
158  const ContainerID& containerId);
159 
161  const ContainerID& containerId);
162 
164  const ContainerID& containerId,
165  bool killed = true); // process is either killed or reaped.
166 
167  virtual process::Future<Nothing> fetch(const ContainerID& containerId);
168 
169  virtual process::Future<Nothing> pull(const ContainerID& containerId);
170 
172 
173 private:
174  // Continuations and helpers.
176  const ContainerID& containerId,
177  const Option<int>& status);
178 
180  const ContainerID& containerId,
181  pid_t pid);
182 
184  const ContainerID& containerId,
185  const mesos::slave::ContainerConfig& containerConfig);
186 
187  process::Future<Nothing> _recover(
188  const Option<state::SlaveState>& state,
189  const std::list<Docker::Container>& containers);
190 
191  process::Future<Nothing> __recover(
192  const std::list<Docker::Container>& containers);
193 
194  // Starts the executor in a Docker container.
195  process::Future<Docker::Container> launchExecutorContainer(
196  const ContainerID& containerId,
197  const std::string& containerName);
198 
199  // Starts the docker executor with a subprocess.
200  process::Future<pid_t> launchExecutorProcess(
201  const ContainerID& containerId);
202 
203  process::Future<pid_t> checkpointExecutor(
204  const ContainerID& containerId,
205  const Docker::Container& dockerContainer);
206 
207  // Reaps on the executor pid.
208  process::Future<Nothing> reapExecutor(
209  const ContainerID& containerId,
210  pid_t pid);
211 
212  void _destroy(
213  const ContainerID& containerId,
214  bool killed);
215 
216  void __destroy(
217  const ContainerID& containerId,
218  bool killed,
219  const process::Future<Nothing>& future);
220 
221  void ___destroy(
222  const ContainerID& containerId,
223  bool killed,
224  const process::Future<Option<int>>& status);
225 
226  void ____destroy(
227  const ContainerID& containerId,
228  bool killed,
229  const process::Future<Option<int>>& status);
230 
231  process::Future<Nothing> destroyTimeout(
232  const ContainerID& containerId,
233  process::Future<Nothing> future);
234 
235  process::Future<Nothing> _update(
236  const ContainerID& containerId,
237  const Resources& resources,
238  const Docker::Container& container);
239 
240  process::Future<Nothing> __update(
241  const ContainerID& containerId,
242  const Resources& resources,
243  pid_t pid);
244 
245  process::Future<Nothing> mountPersistentVolumes(
246  const ContainerID& containerId);
247 
248  Try<Nothing> unmountPersistentVolumes(
249  const ContainerID& containerId);
250 
251  Try<Nothing> updatePersistentVolumes(
252  const ContainerID& containerId,
253  const std::string& directory,
254  const Resources& current,
255  const Resources& updated);
256 
257 #ifdef __linux__
258  // Allocate GPU resources for a specified container.
259  process::Future<Nothing> allocateNvidiaGpus(
260  const ContainerID& containerId,
261  const size_t count);
262 
263  process::Future<Nothing> _allocateNvidiaGpus(
264  const ContainerID& containerId,
265  const std::set<Gpu>& allocated);
266 
267  // Deallocate GPU resources for a specified container.
268  process::Future<Nothing> deallocateNvidiaGpus(
269  const ContainerID& containerId);
270 
271  process::Future<Nothing> _deallocateNvidiaGpus(
272  const ContainerID& containerId,
273  const std::set<Gpu>& deallocated);
274 #endif // __linux__
275 
276  Try<ResourceStatistics> cgroupsStatistics(pid_t pid) const;
277 
278  // Call back for when the executor exits. This will trigger
279  // container destroy.
280  void reaped(const ContainerID& containerId);
281 
282  // Removes the docker container.
283  void remove(
284  const std::string& containerName,
285  const Option<std::string>& executor);
286 
287  const Flags flags;
288 
289  Fetcher* fetcher;
290 
292 
294 
296 
297  struct Container
298  {
299  static Try<Container*> create(
300  const ContainerID& id,
301  const mesos::slave::ContainerConfig& containerConfig,
302  const std::map<std::string, std::string>& environment,
303  const Option<std::string>& pidCheckpointPath,
304  const Flags& flags);
305 
306  static std::string name(const ContainerID& id)
307  {
308  return DOCKER_NAME_PREFIX + stringify(id);
309  }
310 
311  Container(const ContainerID& id)
312  : state(FETCHING), id(id) {}
313 
314  Container(
315  const ContainerID& _id,
316  const mesos::slave::ContainerConfig& _containerConfig,
317  const std::map<std::string, std::string>& _environment,
318  const Option<std::string>& _pidCheckpointPath,
319  bool symlinked,
320  const std::string& containerWorkDir,
321  const Option<CommandInfo>& _command,
322  const Option<ContainerInfo>& _container,
323  bool launchesExecutorContainer)
324  : state(FETCHING),
325  id(_id),
326  containerConfig(_containerConfig),
327  pidCheckpointPath(_pidCheckpointPath),
328  environment(_environment),
329  symlinked(symlinked),
330  containerWorkDir(containerWorkDir),
331  containerName(name(id)),
332  launchesExecutorContainer(launchesExecutorContainer)
333  {
334  // NOTE: The task's resources are included in the executor's
335  // resources in order to make sure when launching the executor
336  // that it has non-zero resources in the event the executor was
337  // not actually given any resources by the framework
338  // originally. See Framework::launchExecutor in slave.cpp. We
339  // check that this is indeed the case here to protect ourselves
340  // from when/if this changes in the future (but it's not a
341  // perfect check because an executor might always have a subset
342  // of it's resources that match a task, nevertheless, it's
343  // better than nothing).
344  resources = containerConfig.resources();
345 
346  if (containerConfig.has_task_info()) {
347  CHECK(resources.contains(containerConfig.task_info().resources()));
348  }
349 
350  if (_command.isSome()) {
351  command = _command.get();
352  } else {
353  command = containerConfig.command_info();
354  }
355 
356  if (_container.isSome()) {
357  container = _container.get();
358  } else {
359  // NOTE: The existence of this field is checked in
360  // DockerContainerizerProcess::launch.
361  container = containerConfig.container_info();
362  }
363  }
364 
365  ~Container()
366  {
367  if (symlinked) {
368  // The sandbox directory is a symlink, remove it at container
369  // destroy.
370  os::rm(containerWorkDir);
371  }
372  }
373 
374  Option<std::string> executorName()
375  {
376  if (launchesExecutorContainer) {
377  return containerName + DOCKER_NAME_SEPERATOR + "executor";
378  } else {
379  return None();
380  }
381  }
382 
383  std::string image() const
384  {
385  if (containerConfig.has_task_info()) {
386  return containerConfig.task_info().container().docker().image();
387  }
388 
389  return containerConfig.executor_info().container().docker().image();
390  }
391 
392  bool forcePullImage() const
393  {
394  if (containerConfig.has_task_info()) {
395  return containerConfig.task_info()
396  .container().docker().force_pull_image();
397  }
398 
399  return containerConfig.executor_info()
400  .container().docker().force_pull_image();
401  }
402 
403  // The DockerContainerizer needs to be able to properly clean up
404  // Docker containers, regardless of when they are destroyed. For
405  // example, if a container gets destroyed while we are fetching,
406  // we need to not keep running the fetch, nor should we try and
407  // start the Docker container. For this reason, we've split out
408  // the states into:
409  //
410  // FETCHING
411  // PULLING
412  // MOUNTING
413  // RUNNING
414  // DESTROYING
415  //
416  // In particular, we made 'PULLING' be it's own state so that we
417  // can easily destroy and cleanup when a user initiated pulling
418  // a really big image but we timeout due to the executor
419  // registration timeout. Since we currently have no way to discard
420  // a Docker::run, we needed to explicitly do the pull (which is
421  // the part that takes the longest) so that we can also explicitly
422  // kill it when asked. Once the functions at Docker::* get support
423  // for discarding, then we won't need to make pull be it's own
424  // state anymore, although it doesn't hurt since it gives us
425  // better error messages.
426  enum State
427  {
428  FETCHING = 1,
429  PULLING = 2,
430  MOUNTING = 3,
431  RUNNING = 4,
432  DESTROYING = 5
433  } state;
434 
435  // Copies of the parameters sent to `Container::create`.
436  const ContainerID id;
437  const mesos::slave::ContainerConfig containerConfig;
438  const Option<std::string> pidCheckpointPath;
439 
440  // A copy of the parameter sent to `Container::create`.
441  // NOTE: This may be modified further by hooks.
442  std::map<std::string, std::string> environment;
443 
444  // The sandbox directory for the container. This holds the
445  // symlinked path if symlinked boolean is true.
446  // TODO(josephw): The symlink path does not persist across failovers,
447  // so we will not delete the symlink if the agent restarts. This results
448  // in gradually leaking hanging symlinks.
449  bool symlinked;
450  std::string containerWorkDir;
451 
452  // Copies of the fields in `containerConfig`, except when the
453  // container is a command task and the agent is launched with
454  // the --docker_mesos_image flag.
455  ContainerInfo container;
456  CommandInfo command;
457 
458  // Environment variables that the command executor should pass
459  // onto a docker-ized task. This is set by a hook.
461 
462  // The string used to refer to this container via the Docker CLI.
463  // This name is either computed by concatenating the DOCKER_NAME_PREFIX
464  // and the ContainerID; or during recovery, by taking the recovered
465  // container's name.
466  std::string containerName;
467 
468  // Promise for future returned from wait().
470 
471  // Exit status of executor or container (depending on whether or
472  // not we used the command executor). Represented as a promise so
473  // that destroying can chain with it being set.
475 
476  // Future that tells us the return value of last launch stage (fetch, pull,
477  // run, etc).
479 
480  // We keep track of the resources for each container so we can set
481  // the ResourceStatistics limits in usage(). Note that this is
482  // different than just what we might get from TaskInfo::resources
483  // or ExecutorInfo::resources because they can change dynamically.
485 
486  // The docker pull future is stored so we can discard when
487  // destroy is called while docker is pulling the image.
489 
490  // Once the container is running, this saves the pid of the
491  // running container.
492  Option<pid_t> pid;
493 
494  // The executor pid that was forked to wait on the running
495  // container. This is stored so we can clean up the executor
496  // on destroy.
497  Option<pid_t> executorPid;
498 
499 #ifdef __linux__
500  // GPU resources allocated to the container.
501  std::set<Gpu> gpus;
502 #endif // __linux__
503 
504  // Marks if this container launches an executor in a docker
505  // container.
506  bool launchesExecutorContainer;
507  };
508 
510 };
511 
512 
513 } // namespace slave {
514 } // namespace internal {
515 } // namespace mesos {
516 
517 #endif // __DOCKER_CONTAINERIZER_HPP__
Definition: option.hpp:28
virtual process::Future< Nothing > recover(const Option< state::SlaveState > &state)
Try< Nothing > rm(const std::string &path)
Definition: rm.hpp:26
Definition: fetcher.hpp:49
Definition: check.hpp:33
virtual process::Future< hashset< ContainerID > > containers()
virtual process::Future< Containerizer::LaunchResult > launch(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig, const std::map< std::string, std::string > &environment, const Option< std::string > &pidCheckpointPath)
Definition: resources.hpp:79
Try< T > fetch(const std::string &value)
Definition: fetch.hpp:38
Definition: flags.hpp:39
bool isSome() const
Definition: option.hpp:115
Definition: docker.hpp:91
virtual process::Future< Nothing > update(const ContainerID &containerId, const Resources &resources)
virtual process::Future< Option< mesos::slave::ContainerTermination > > wait(const ContainerID &containerId)
virtual process::Future< ResourceStatistics > usage(const ContainerID &containerId)
Environment * environment
DWORD pid_t
Definition: windows.hpp:187
Definition: containerizer.hpp:57
DockerContainerizer(const Flags &flags, Fetcher *fetcher, const process::Owned< mesos::slave::ContainerLogger > &logger, process::Shared< Docker > docker, const Option< NvidiaComponents > &nvidia=None())
virtual process::Future< Option< mesos::slave::ContainerTermination > > destroy(const ContainerID &containerId)
Definition: spec.hpp:30
const T & get() const &
Definition: option.hpp:118
const std::string DOCKER_NAME_PREFIX
Try< Nothing > checkpoint(const std::string &path, const std::string &message)
Definition: state.hpp:123
#define flags
Definition: decoder.hpp:18
URI image(const std::string &repository, const std::string &reference, const std::string &registry, const Option< std::string > &scheme=None(), const Option< int > &port=None())
Definition: docker.hpp:30
Definition: none.hpp:27
Definition: attributes.hpp:24
virtual process::Future< ContainerStatus > status(const ContainerID &containerId)
const std::string DOCKER_SYMLINK_DIRECTORY
Definition: executor.hpp:47
static Try< Resources > resources(const Flags &flags)
std::string stringify(int flags)
const std::string DOCKER_NAME_SEPERATOR
Definition: spec.hpp:34
bool contains(const Resources &that) const
Definition: process.hpp:501
DockerContainerizerProcess(const Flags &_flags, Fetcher *_fetcher, const process::Owned< mesos::slave::ContainerLogger > &_logger, process::Shared< Docker > _docker, const Option< NvidiaComponents > &_nvidia)
Definition: docker.hpp:126
Definition: parse.hpp:33
constexpr const char * name
Definition: shell.hpp:43
virtual process::Future< Nothing > pruneImages(const std::vector< Image > &excludedImages)
static Try< DockerContainerizer * > create(const Flags &flags, Fetcher *fetcher, const Option< NvidiaComponents > &nvidia=None())