Apache Mesos
docker.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __DOCKER_CONTAINERIZER_HPP__
18 #define __DOCKER_CONTAINERIZER_HPP__
19 
20 #include <list>
21 #include <map>
22 #include <set>
23 #include <string>
24 
26 
27 #include <process/owned.hpp>
28 #include <process/shared.hpp>
29 
30 #include <stout/flags.hpp>
31 #include <stout/hashset.hpp>
32 
33 #include "docker/docker.hpp"
34 #include "docker/executor.hpp"
35 
37 
39 
40 namespace mesos {
41 namespace internal {
42 namespace slave {
43 
44 // Prefix used to name Docker containers in order to distinguish those
45 // created by Mesos from those created manually.
46 extern const std::string DOCKER_NAME_PREFIX;
47 
48 // Separator used to compose docker container name, which consists
49 // of the name prefix, ContainerID, and possibly the SlaveID depending
50 // on the version of Mesos used to create the container.
51 extern const std::string DOCKER_NAME_SEPERATOR;
52 
53 // Directory that stores all the symlinked sandboxes that is mapped
54 // into Docker containers. This is a relative directory that will
55 // joined with the slave path. Only sandbox paths that contains a
56 // colon will be symlinked due to the limitation of the Docker CLI.
57 extern const std::string DOCKER_SYMLINK_DIRECTORY;
58 
59 
60 // Forward declaration.
61 class DockerContainerizerProcess;
62 
63 
65 {
66 public:
68  const Flags& flags,
69  Fetcher* fetcher,
70  const Option<NvidiaComponents>& nvidia = None());
71 
72  // This is only public for tests.
74  const Flags& flags,
75  Fetcher* fetcher,
78  const Option<NvidiaComponents>& nvidia = None());
79 
80  // This is only public for tests.
83 
84  virtual ~DockerContainerizer();
85 
87  const Option<state::SlaveState>& state);
88 
90  const ContainerID& containerId,
91  const mesos::slave::ContainerConfig& containerConfig,
92  const std::map<std::string, std::string>& environment,
93  const Option<std::string>& pidCheckpointPath);
94 
96  const ContainerID& containerId,
97  const Resources& resources);
98 
100  const ContainerID& containerId);
101 
103  const ContainerID& containerId);
104 
106  const ContainerID& containerId);
107 
108  virtual process::Future<bool> destroy(const ContainerID& containerId);
109 
111 
113  const std::vector<Image>& excludedImages);
114 
115 private:
117 };
118 
119 
120 
122  : public process::Process<DockerContainerizerProcess>
123 {
124 public:
126  const Flags& _flags,
127  Fetcher* _fetcher,
129  process::Shared<Docker> _docker,
130  const Option<NvidiaComponents>& _nvidia)
131  : flags(_flags),
132  fetcher(_fetcher),
133  logger(_logger),
134  docker(_docker),
135  nvidia(_nvidia) {}
136 
138  const Option<state::SlaveState>& state);
139 
141  const ContainerID& containerId,
142  const mesos::slave::ContainerConfig& containerConfig,
143  const std::map<std::string, std::string>& environment,
144  const Option<std::string>& pidCheckpointPath);
145 
146  // force = true causes the containerizer to update the resources
147  // for the container, even if they match what it has cached.
149  const ContainerID& containerId,
150  const Resources& resources,
151  bool force);
152 
154  const ContainerID& containerId);
155 
157  const ContainerID& containerId);
158 
160  const ContainerID& containerId);
161 
163  const ContainerID& containerId,
164  bool killed = true); // process is either killed or reaped.
165 
166  virtual process::Future<Nothing> fetch(const ContainerID& containerId);
167 
168  virtual process::Future<Nothing> pull(const ContainerID& containerId);
169 
171 
172 private:
173  // Continuations and helpers.
175  const ContainerID& containerId,
176  const Option<int>& status);
177 
178  Try<Nothing> checkpoint(
179  const ContainerID& containerId,
180  pid_t pid);
181 
183  const ContainerID& containerId,
184  const mesos::slave::ContainerConfig& containerConfig);
185 
186  process::Future<Nothing> _recover(
187  const Option<state::SlaveState>& state,
188  const std::list<Docker::Container>& containers);
189 
190  process::Future<Nothing> __recover(
191  const std::list<Docker::Container>& containers);
192 
193  // Starts the executor in a Docker container.
194  process::Future<Docker::Container> launchExecutorContainer(
195  const ContainerID& containerId,
196  const std::string& containerName);
197 
198  // Starts the docker executor with a subprocess.
199  process::Future<pid_t> launchExecutorProcess(
200  const ContainerID& containerId);
201 
202  process::Future<pid_t> checkpointExecutor(
203  const ContainerID& containerId,
204  const Docker::Container& dockerContainer);
205 
206  // Reaps on the executor pid.
207  process::Future<Nothing> reapExecutor(
208  const ContainerID& containerId,
209  pid_t pid);
210 
211  void _destroy(
212  const ContainerID& containerId,
213  bool killed);
214 
215  void __destroy(
216  const ContainerID& containerId,
217  bool killed,
218  const process::Future<Nothing>& future);
219 
220  void ___destroy(
221  const ContainerID& containerId,
222  bool killed,
224 
225  void ____destroy(
226  const ContainerID& containerId,
227  bool killed,
229 
230  process::Future<Nothing> destroyTimeout(
231  const ContainerID& containerId,
232  process::Future<Nothing> future);
233 
234  process::Future<Nothing> _update(
235  const ContainerID& containerId,
236  const Resources& resources,
237  const Docker::Container& container);
238 
239  process::Future<Nothing> __update(
240  const ContainerID& containerId,
241  const Resources& resources,
242  pid_t pid);
243 
244  process::Future<Nothing> mountPersistentVolumes(
245  const ContainerID& containerId);
246 
247  Try<Nothing> unmountPersistentVolumes(
248  const ContainerID& containerId);
249 
250  Try<Nothing> updatePersistentVolumes(
251  const ContainerID& containerId,
252  const std::string& directory,
253  const Resources& current,
254  const Resources& updated);
255 
256 #ifdef __linux__
257  // Allocate GPU resources for a specified container.
258  process::Future<Nothing> allocateNvidiaGpus(
259  const ContainerID& containerId,
260  const size_t count);
261 
262  process::Future<Nothing> _allocateNvidiaGpus(
263  const ContainerID& containerId,
264  const std::set<Gpu>& allocated);
265 
266  // Deallocate GPU resources for a specified container.
267  process::Future<Nothing> deallocateNvidiaGpus(
268  const ContainerID& containerId);
269 
270  process::Future<Nothing> _deallocateNvidiaGpus(
271  const ContainerID& containerId,
272  const std::set<Gpu>& deallocated);
273 #endif // __linux__
274 
275  Try<ResourceStatistics> cgroupsStatistics(pid_t pid) const;
276 
277  // Call back for when the executor exits. This will trigger
278  // container destroy.
279  void reaped(const ContainerID& containerId);
280 
281  // Removes the docker container.
282  void remove(
283  const std::string& containerName,
284  const Option<std::string>& executor);
285 
286  const Flags flags;
287 
288  Fetcher* fetcher;
289 
291 
293 
295 
296  struct Container
297  {
298  static Try<Container*> create(
299  const ContainerID& id,
300  const mesos::slave::ContainerConfig& containerConfig,
301  const std::map<std::string, std::string>& environment,
302  const Option<std::string>& pidCheckpointPath,
303  const Flags& flags);
304 
305  static std::string name(const ContainerID& id)
306  {
307  return DOCKER_NAME_PREFIX + stringify(id);
308  }
309 
310  Container(const ContainerID& id)
311  : state(FETCHING), id(id) {}
312 
313  Container(
314  const ContainerID& _id,
315  const mesos::slave::ContainerConfig& _containerConfig,
316  const std::map<std::string, std::string>& _environment,
317  const Option<std::string>& _pidCheckpointPath,
318  bool symlinked,
319  const std::string& containerWorkDir,
320  const Option<CommandInfo>& _command,
321  const Option<ContainerInfo>& _container,
322  bool launchesExecutorContainer)
323  : state(FETCHING),
324  id(_id),
325  containerConfig(_containerConfig),
326  pidCheckpointPath(_pidCheckpointPath),
327  environment(_environment),
328  symlinked(symlinked),
329  containerWorkDir(containerWorkDir),
330  containerName(name(id)),
331  launchesExecutorContainer(launchesExecutorContainer)
332  {
333  // NOTE: The task's resources are included in the executor's
334  // resources in order to make sure when launching the executor
335  // that it has non-zero resources in the event the executor was
336  // not actually given any resources by the framework
337  // originally. See Framework::launchExecutor in slave.cpp. We
338  // check that this is indeed the case here to protect ourselves
339  // from when/if this changes in the future (but it's not a
340  // perfect check because an executor might always have a subset
341  // of it's resources that match a task, nevertheless, it's
342  // better than nothing).
343  resources = containerConfig.resources();
344 
345  if (containerConfig.has_task_info()) {
346  CHECK(resources.contains(containerConfig.task_info().resources()));
347  }
348 
349  if (_command.isSome()) {
350  command = _command.get();
351  } else {
352  command = containerConfig.command_info();
353  }
354 
355  if (_container.isSome()) {
356  container = _container.get();
357  } else {
358  // NOTE: The existence of this field is checked in
359  // DockerContainerizerProcess::launch.
360  container = containerConfig.container_info();
361  }
362  }
363 
364  ~Container()
365  {
366  if (symlinked) {
367  // The sandbox directory is a symlink, remove it at container
368  // destroy.
369  os::rm(containerWorkDir);
370  }
371  }
372 
373  Option<std::string> executorName()
374  {
375  if (launchesExecutorContainer) {
376  return containerName + DOCKER_NAME_SEPERATOR + "executor";
377  } else {
378  return None();
379  }
380  }
381 
382  std::string image() const
383  {
384  if (containerConfig.has_task_info()) {
385  return containerConfig.task_info().container().docker().image();
386  }
387 
388  return containerConfig.executor_info().container().docker().image();
389  }
390 
391  bool forcePullImage() const
392  {
393  if (containerConfig.has_task_info()) {
394  return containerConfig.task_info()
395  .container().docker().force_pull_image();
396  }
397 
398  return containerConfig.executor_info()
399  .container().docker().force_pull_image();
400  }
401 
402  // The DockerContainerizer needs to be able to properly clean up
403  // Docker containers, regardless of when they are destroyed. For
404  // example, if a container gets destroyed while we are fetching,
405  // we need to not keep running the fetch, nor should we try and
406  // start the Docker container. For this reason, we've split out
407  // the states into:
408  //
409  // FETCHING
410  // PULLING
411  // MOUNTING
412  // RUNNING
413  // DESTROYING
414  //
415  // In particular, we made 'PULLING' be it's own state so that we
416  // can easily destroy and cleanup when a user initiated pulling
417  // a really big image but we timeout due to the executor
418  // registration timeout. Since we currently have no way to discard
419  // a Docker::run, we needed to explicitly do the pull (which is
420  // the part that takes the longest) so that we can also explicitly
421  // kill it when asked. Once the functions at Docker::* get support
422  // for discarding, then we won't need to make pull be it's own
423  // state anymore, although it doesn't hurt since it gives us
424  // better error messages.
425  enum State
426  {
427  FETCHING = 1,
428  PULLING = 2,
429  MOUNTING = 3,
430  RUNNING = 4,
431  DESTROYING = 5
432  } state;
433 
434  // Copies of the parameters sent to `Container::create`.
435  const ContainerID id;
436  const mesos::slave::ContainerConfig containerConfig;
437  const Option<std::string> pidCheckpointPath;
438 
439  // A copy of the parameter sent to `Container::create`.
440  // NOTE: This may be modified further by hooks.
441  std::map<std::string, std::string> environment;
442 
443  // The sandbox directory for the container. This holds the
444  // symlinked path if symlinked boolean is true.
445  // TODO(josephw): The symlink path does not persist across failovers,
446  // so we will not delete the symlink if the agent restarts. This results
447  // in gradually leaking hanging symlinks.
448  bool symlinked;
449  std::string containerWorkDir;
450 
451  // Copies of the fields in `containerConfig`, except when the
452  // container is a command task and the agent is launched with
453  // the --docker_mesos_image flag.
454  ContainerInfo container;
455  CommandInfo command;
456 
457  // Environment variables that the command executor should pass
458  // onto a docker-ized task. This is set by a hook.
460 
461  // The string used to refer to this container via the Docker CLI.
462  // This name is either computed by concatenating the DOCKER_NAME_PREFIX
463  // and the ContainerID; or during recovery, by taking the recovered
464  // container's name.
465  std::string containerName;
466 
467  // Promise for future returned from wait().
469 
470  // Exit status of executor or container (depending on whether or
471  // not we used the command executor). Represented as a promise so
472  // that destroying can chain with it being set.
474 
475  // Future that tells us the return value of last launch stage (fetch, pull,
476  // run, etc).
478 
479  // We keep track of the resources for each container so we can set
480  // the ResourceStatistics limits in usage(). Note that this is
481  // different than just what we might get from TaskInfo::resources
482  // or ExecutorInfo::resources because they can change dynamically.
483  Resources resources;
484 
485  // The docker pull future is stored so we can discard when
486  // destroy is called while docker is pulling the image.
488 
489  // Once the container is running, this saves the pid of the
490  // running container.
491  Option<pid_t> pid;
492 
493  // The executor pid that was forked to wait on the running
494  // container. This is stored so we can clean up the executor
495  // on destroy.
496  Option<pid_t> executorPid;
497 
498 #ifdef __linux__
499  // GPU resources allocated to the container.
500  std::set<Gpu> gpus;
501 #endif // __linux__
502 
503  // Marks if this container launches an executor in a docker
504  // container.
505  bool launchesExecutorContainer;
506  };
507 
509 };
510 
511 
512 } // namespace slave {
513 } // namespace internal {
514 } // namespace mesos {
515 
516 #endif // __DOCKER_CONTAINERIZER_HPP__
virtual process::Future< Nothing > pull(const ContainerID &containerId)
Definition: option.hpp:28
virtual process::Future< Nothing > recover(const Option< state::SlaveState > &state)
Try< Nothing > rm(const std::string &path)
Definition: rm.hpp:26
virtual process::Future< ContainerStatus > status(const ContainerID &containerId)
Definition: fetcher.hpp:49
Definition: try.hpp:34
virtual process::Future< hashset< ContainerID > > containers()
virtual process::Future< Containerizer::LaunchResult > launch(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig, const std::map< std::string, std::string > &environment, const Option< std::string > &pidCheckpointPath)
Definition: resources.hpp:79
virtual process::Future< bool > destroy(const ContainerID &containerId, bool killed=true)
Definition: flags.hpp:39
virtual process::Future< Option< mesos::slave::ContainerTermination > > wait(const ContainerID &containerId)
bool isSome() const
Definition: option.hpp:115
virtual process::Future< ResourceStatistics > usage(const ContainerID &containerId)
Definition: docker.hpp:89
virtual process::Future< hashset< ContainerID > > containers()
virtual process::Future< Nothing > update(const ContainerID &containerId, const Resources &resources)
virtual process::Future< Option< mesos::slave::ContainerTermination > > wait(const ContainerID &containerId)
virtual process::Future< ResourceStatistics > usage(const ContainerID &containerId)
Environment * environment
DWORD pid_t
Definition: windows.hpp:187
Definition: containerizer.hpp:57
DockerContainerizer(const Flags &flags, Fetcher *fetcher, const process::Owned< mesos::slave::ContainerLogger > &logger, process::Shared< Docker > docker, const Option< NvidiaComponents > &nvidia=None())
virtual process::Future< Nothing > update(const ContainerID &containerId, const Resources &resources, bool force)
virtual process::Future< Containerizer::LaunchResult > launch(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig, const std::map< std::string, std::string > &environment, const Option< std::string > &pidCheckpointPath)
const T & get() const &
Definition: option.hpp:118
const std::string DOCKER_NAME_PREFIX
virtual process::Future< Nothing > fetch(const ContainerID &containerId)
virtual process::Future< bool > destroy(const ContainerID &containerId)
#define flags
Definition: decoder.hpp:18
URI image(const std::string &repository, const std::string &reference, const std::string &registry, const Option< std::string > &scheme=None(), const Option< int > &port=None())
Definition: docker.hpp:30
Definition: none.hpp:27
virtual process::Future< ContainerStatus > status(const ContainerID &containerId)
const std::string DOCKER_SYMLINK_DIRECTORY
static Try< Resources > resources(const Flags &flags)
virtual process::Future< Nothing > recover(const Option< state::SlaveState > &state)
Try< Nothing > create(const std::string &hierarchy, const std::string &cgroup, bool recursive=false)
std::string stringify(int flags)
const std::string DOCKER_NAME_SEPERATOR
Definition: process.hpp:493
DockerContainerizerProcess(const Flags &_flags, Fetcher *_fetcher, const process::Owned< mesos::slave::ContainerLogger > &_logger, process::Shared< Docker > _docker, const Option< NvidiaComponents > &_nvidia)
Definition: docker.hpp:125
constexpr const char * name
Definition: shell.hpp:41
virtual process::Future< Nothing > pruneImages(const std::vector< Image > &excludedImages)
static Try< DockerContainerizer * > create(const Flags &flags, Fetcher *fetcher, const Option< NvidiaComponents > &nvidia=None())