Apache Mesos
docker.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __DOCKER_CONTAINERIZER_HPP__
18 #define __DOCKER_CONTAINERIZER_HPP__
19 
20 #include <map>
21 #include <set>
22 #include <string>
23 
25 
26 #include <process/owned.hpp>
27 #include <process/shared.hpp>
28 
31 
32 #include <stout/duration.hpp>
33 #include <stout/flags.hpp>
34 #include <stout/hashset.hpp>
35 
36 #include "docker/docker.hpp"
37 #include "docker/executor.hpp"
38 
40 
42 
43 namespace mesos {
44 namespace internal {
45 namespace slave {
46 
47 // Prefix used to name Docker containers in order to distinguish those
48 // created by Mesos from those created manually.
49 extern const std::string DOCKER_NAME_PREFIX;
50 
51 // Separator used to compose docker container name, which consists
52 // of the name prefix, ContainerID, and possibly the SlaveID depending
53 // on the version of Mesos used to create the container.
54 extern const std::string DOCKER_NAME_SEPERATOR;
55 
56 // Directory that stores all the symlinked sandboxes that is mapped
57 // into Docker containers. This is a relative directory that will
58 // joined with the slave path. Only sandbox paths that contains a
59 // colon will be symlinked due to the limitation of the Docker CLI.
60 extern const std::string DOCKER_SYMLINK_DIRECTORY;
61 
62 
63 // Forward declaration.
64 class DockerContainerizerProcess;
65 
66 
68 {
69 public:
71  const Flags& flags,
72  Fetcher* fetcher,
73  const Option<NvidiaComponents>& nvidia = None());
74 
75  // This is only public for tests.
77  const Flags& flags,
78  Fetcher* fetcher,
81  const Option<NvidiaComponents>& nvidia = None());
82 
83  // This is only public for tests.
86 
87  ~DockerContainerizer() override;
88 
90  const Option<state::SlaveState>& state) override;
91 
93  const ContainerID& containerId,
94  const mesos::slave::ContainerConfig& containerConfig,
95  const std::map<std::string, std::string>& environment,
96  const Option<std::string>& pidCheckpointPath) override;
97 
99  const ContainerID& containerId,
100  const Resources& resources) override;
101 
103  const ContainerID& containerId) override;
104 
106  const ContainerID& containerId) override;
107 
109  const ContainerID& containerId) override;
110 
112  const ContainerID& containerId) override;
113 
115 
117  const std::vector<Image>& excludedImages) override;
118 
119 private:
121 };
122 
123 
124 
126  : public process::Process<DockerContainerizerProcess>
127 {
128 public:
130  const Flags& _flags,
131  Fetcher* _fetcher,
133  process::Shared<Docker> _docker,
134  const Option<NvidiaComponents>& _nvidia)
135  : flags(_flags),
136  fetcher(_fetcher),
137  logger(_logger),
138  docker(_docker),
139  nvidia(_nvidia) {}
140 
142  const Option<state::SlaveState>& state);
143 
145  const ContainerID& containerId,
146  const mesos::slave::ContainerConfig& containerConfig,
147  const std::map<std::string, std::string>& environment,
148  const Option<std::string>& pidCheckpointPath);
149 
150  // force = true causes the containerizer to update the resources
151  // for the container, even if they match what it has cached.
153  const ContainerID& containerId,
154  const Resources& resources,
155  bool force);
156 
158  const ContainerID& containerId);
159 
161  const ContainerID& containerId);
162 
164  const ContainerID& containerId);
165 
167  const ContainerID& containerId,
168  bool killed = true); // process is either killed or reaped.
169 
170  virtual process::Future<Nothing> fetch(const ContainerID& containerId);
171 
172  virtual process::Future<Nothing> pull(const ContainerID& containerId);
173 
175 
176 private:
177  struct Metrics
178  {
179  Metrics() : image_pull("containerizer/docker/image_pull", Hours(1))
180  {
181  process::metrics::add(image_pull);
182  }
183 
184  ~Metrics()
185  {
186  process::metrics::remove(image_pull);
187  }
188 
190  };
191 
192  // Continuations and helpers.
194  const ContainerID& containerId,
195  const Option<int>& status);
196 
198  const ContainerID& containerId,
199  pid_t pid);
200 
202  const ContainerID& containerId,
203  const mesos::slave::ContainerConfig& containerConfig);
204 
205  process::Future<Nothing> _recover(
206  const Option<state::SlaveState>& state,
207  const std::vector<Docker::Container>& containers);
208 
209  process::Future<Nothing> __recover(
210  const std::vector<Docker::Container>& containers);
211 
212  // Starts the executor in a Docker container.
213  process::Future<Docker::Container> launchExecutorContainer(
214  const ContainerID& containerId,
215  const std::string& containerName);
216 
217  // Starts the docker executor with a subprocess.
218  process::Future<pid_t> launchExecutorProcess(
219  const ContainerID& containerId);
220 
221  process::Future<pid_t> checkpointExecutor(
222  const ContainerID& containerId,
223  const Docker::Container& dockerContainer);
224 
225  // Reaps on the executor pid.
226  process::Future<Nothing> reapExecutor(
227  const ContainerID& containerId,
228  pid_t pid);
229 
230  void _destroy(
231  const ContainerID& containerId,
232  bool killed);
233 
234  void __destroy(
235  const ContainerID& containerId,
236  bool killed,
237  const process::Future<Nothing>& future);
238 
239  void ___destroy(
240  const ContainerID& containerId,
241  bool killed,
242  const process::Future<Option<int>>& status);
243 
244  void ____destroy(
245  const ContainerID& containerId,
246  bool killed,
247  const process::Future<Option<int>>& status);
248 
249  process::Future<Nothing> destroyTimeout(
250  const ContainerID& containerId,
251  process::Future<Nothing> future);
252 
253 #ifdef __linux__
254  process::Future<Nothing> _update(
255  const ContainerID& containerId,
256  const Resources& resources,
257  const Docker::Container& container);
258 
259  process::Future<Nothing> __update(
260  const ContainerID& containerId,
261  const Resources& resources);
262 #endif // __linux__
263 
264  process::Future<Nothing> mountPersistentVolumes(
265  const ContainerID& containerId);
266 
267  Try<Nothing> unmountPersistentVolumes(
268  const ContainerID& containerId);
269 
270  Try<Nothing> updatePersistentVolumes(
271  const ContainerID& containerId,
272  const std::string& directory,
273  const Resources& current,
274  const Resources& updated);
275 
276 #ifdef __linux__
277  // Allocate GPU resources for a specified container.
278  process::Future<Nothing> allocateNvidiaGpus(
279  const ContainerID& containerId,
280  const size_t count);
281 
282  process::Future<Nothing> _allocateNvidiaGpus(
283  const ContainerID& containerId,
284  const std::set<Gpu>& allocated);
285 
286  // Deallocate GPU resources for a specified container.
287  process::Future<Nothing> deallocateNvidiaGpus(
288  const ContainerID& containerId);
289 
290  process::Future<Nothing> _deallocateNvidiaGpus(
291  const ContainerID& containerId,
292  const std::set<Gpu>& deallocated);
293 #endif // __linux__
294 
295  Try<ResourceStatistics> cgroupsStatistics(pid_t pid) const;
296 
297  // Call back for when the executor exits. This will trigger
298  // container destroy.
299  void reaped(const ContainerID& containerId);
300 
301  // Removes the docker container.
302  void remove(
303  const std::string& containerName,
304  const Option<std::string>& executor);
305 
306  const Flags flags;
307 
308  Fetcher* fetcher;
309 
311 
313 
315 
317 
318  struct Container
319  {
320  static Try<Container*> create(
321  const ContainerID& id,
322  const mesos::slave::ContainerConfig& containerConfig,
323  const std::map<std::string, std::string>& environment,
324  const Option<std::string>& pidCheckpointPath,
325  const Flags& flags);
326 
327  static std::string name(const ContainerID& id)
328  {
329  return DOCKER_NAME_PREFIX + stringify(id);
330  }
331 
332  Container(const ContainerID& id)
333  : state(FETCHING), id(id) {}
334 
335  Container(
336  const ContainerID& _id,
337  const mesos::slave::ContainerConfig& _containerConfig,
338  const std::map<std::string, std::string>& _environment,
339  const Option<std::string>& _pidCheckpointPath,
340  bool symlinked,
341  const std::string& containerWorkDir,
342  const Option<CommandInfo>& _command,
343  const Option<ContainerInfo>& _container,
344  bool launchesExecutorContainer)
345  : state(FETCHING),
346  id(_id),
347  containerConfig(_containerConfig),
348  pidCheckpointPath(_pidCheckpointPath),
349  environment(_environment),
350  symlinked(symlinked),
351  containerWorkDir(containerWorkDir),
352  containerName(name(id)),
353  launchesExecutorContainer(launchesExecutorContainer)
354  {
355  // NOTE: The task's resources are included in the executor's
356  // resources in order to make sure when launching the executor
357  // that it has non-zero resources in the event the executor was
358  // not actually given any resources by the framework
359  // originally. See Framework::launchExecutor in slave.cpp. We
360  // check that this is indeed the case here to protect ourselves
361  // from when/if this changes in the future (but it's not a
362  // perfect check because an executor might always have a subset
363  // of it's resources that match a task, nevertheless, it's
364  // better than nothing).
365  resources = containerConfig.resources();
366 
367  if (containerConfig.has_task_info()) {
368  CHECK(resources.contains(containerConfig.task_info().resources()));
369  }
370 
371  if (_command.isSome()) {
372  command = _command.get();
373  } else {
374  command = containerConfig.command_info();
375  }
376 
377  if (_container.isSome()) {
378  container = _container.get();
379  } else {
380  // NOTE: The existence of this field is checked in
381  // DockerContainerizerProcess::launch.
382  container = containerConfig.container_info();
383  }
384  }
385 
386  ~Container()
387  {
388  if (symlinked) {
389  // The sandbox directory is a symlink, remove it at container
390  // destroy.
391  os::rm(containerWorkDir);
392  }
393  }
394 
395  Option<std::string> executorName()
396  {
397  if (launchesExecutorContainer) {
398  return containerName + DOCKER_NAME_SEPERATOR + "executor";
399  } else {
400  return None();
401  }
402  }
403 
404  std::string image() const
405  {
406  if (containerConfig.has_task_info()) {
407  return containerConfig.task_info().container().docker().image();
408  }
409 
410  return containerConfig.executor_info().container().docker().image();
411  }
412 
413  bool forcePullImage() const
414  {
415  if (containerConfig.has_task_info()) {
416  return containerConfig.task_info()
417  .container().docker().force_pull_image();
418  }
419 
420  return containerConfig.executor_info()
421  .container().docker().force_pull_image();
422  }
423 
424  // The DockerContainerizer needs to be able to properly clean up
425  // Docker containers, regardless of when they are destroyed. For
426  // example, if a container gets destroyed while we are fetching,
427  // we need to not keep running the fetch, nor should we try and
428  // start the Docker container. For this reason, we've split out
429  // the states into:
430  //
431  // FETCHING
432  // PULLING
433  // MOUNTING
434  // RUNNING
435  // DESTROYING
436  //
437  // In particular, we made 'PULLING' be it's own state so that we
438  // can easily destroy and cleanup when a user initiated pulling
439  // a really big image but we timeout due to the executor
440  // registration timeout. Since we currently have no way to discard
441  // a Docker::run, we needed to explicitly do the pull (which is
442  // the part that takes the longest) so that we can also explicitly
443  // kill it when asked. Once the functions at Docker::* get support
444  // for discarding, then we won't need to make pull be it's own
445  // state anymore, although it doesn't hurt since it gives us
446  // better error messages.
447  enum State
448  {
449  FETCHING = 1,
450  PULLING = 2,
451  MOUNTING = 3,
452  RUNNING = 4,
453  DESTROYING = 5
454  } state;
455 
456  // Copies of the parameters sent to `Container::create`.
457  const ContainerID id;
458  const mesos::slave::ContainerConfig containerConfig;
459  const Option<std::string> pidCheckpointPath;
460 
461  // A copy of the parameter sent to `Container::create`.
462  // NOTE: This may be modified further by hooks.
463  std::map<std::string, std::string> environment;
464 
465  // The sandbox directory for the container. This holds the
466  // symlinked path if symlinked boolean is true.
467  // TODO(josephw): The symlink path does not persist across failovers,
468  // so we will not delete the symlink if the agent restarts. This results
469  // in gradually leaking hanging symlinks.
470  bool symlinked;
471  std::string containerWorkDir;
472 
473  // Copies of the fields in `containerConfig`, except when the
474  // container is a command task and the agent is launched with
475  // the --docker_mesos_image flag.
476  ContainerInfo container;
477  CommandInfo command;
478 
479  // Environment variables that the command executor should pass
480  // onto a docker-ized task. This is set by a hook.
482 
483  // The string used to refer to this container via the Docker CLI.
484  // This name is either computed by concatenating the DOCKER_NAME_PREFIX
485  // and the ContainerID; or during recovery, by taking the recovered
486  // container's name.
487  std::string containerName;
488 
489  // Promise for future returned from wait().
491 
492  // Exit status of executor or container (depending on whether or
493  // not we used the command executor). Represented as a promise so
494  // that destroying can chain with it being set.
496 
497  // Future that tells us the return value of last launch stage (fetch, pull,
498  // run, etc).
500 
501  // We keep track of the resources for each container so we can set
502  // the ResourceStatistics limits in usage(). Note that this is
503  // different than just what we might get from TaskInfo::resources
504  // or ExecutorInfo::resources because they can change dynamically.
506 
507  // The docker pull future is stored so we can discard when
508  // destroy is called while docker is pulling the image.
510 
511  // Once the container is running, this saves the pid of the
512  // running container.
513  Option<pid_t> pid;
514 
515  // The executor pid that was forked to wait on the running
516  // container. This is stored so we can clean up the executor
517  // on destroy.
518  Option<pid_t> executorPid;
519 
520 #ifdef __linux__
521  // GPU resources allocated to the container.
522  std::set<Gpu> gpus;
523 
524  Option<std::string> cpuCgroup;
525  Option<std::string> memoryCgroup;
526 #endif // __linux__
527 
528  // Marks if this container launches an executor in a docker
529  // container.
530  bool launchesExecutorContainer;
531  };
532 
534 };
535 
536 
537 } // namespace slave {
538 } // namespace internal {
539 } // namespace mesos {
540 
541 #endif // __DOCKER_CONTAINERIZER_HPP__
Try< Nothing > checkpoint(const std::string &path, const std::string &message, bool sync, bool downgradeResources)
Definition: state.hpp:123
Definition: option.hpp:29
Try< Nothing > rm(const std::string &path)
Definition: rm.hpp:26
Future< Nothing > remove(const Metric &metric)
Definition: metrics.hpp:109
Definition: fetcher.hpp:49
process::Future< Nothing > recover(const Option< state::SlaveState > &state) override
Definition: check.hpp:33
process::Future< ContainerStatus > status(const ContainerID &containerId) override
Definition: resources.hpp:83
process::Future< hashset< ContainerID > > containers() override
Try< T > fetch(const std::string &value)
Definition: fetch.hpp:38
Definition: flags.hpp:39
process::Future< Option< mesos::slave::ContainerTermination > > destroy(const ContainerID &containerId) override
Future< Nothing > add(const T &metric)
Definition: metrics.hpp:95
bool isSome() const
Definition: option.hpp:116
Definition: docker.hpp:91
Environment * environment
DWORD pid_t
Definition: windows.hpp:181
process::Future< Nothing > update(const ContainerID &containerId, const Resources &resources) override
process::Future< Containerizer::LaunchResult > launch(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig, const std::map< std::string, std::string > &environment, const Option< std::string > &pidCheckpointPath) override
Definition: containerizer.hpp:63
DockerContainerizer(const Flags &flags, Fetcher *fetcher, const process::Owned< mesos::slave::ContainerLogger > &logger, process::Shared< Docker > docker, const Option< NvidiaComponents > &nvidia=None())
Definition: agent.hpp:25
process::Future< ResourceStatistics > usage(const ContainerID &containerId) override
const T & get() const &
Definition: option.hpp:119
const std::string DOCKER_NAME_PREFIX
process::Future< Nothing > pruneImages(const std::vector< Image > &excludedImages) override
Definition: duration.hpp:235
#define flags
Definition: decoder.hpp:18
URI image(const std::string &repository, const std::string &reference, const std::string &registry, const Option< std::string > &scheme=None(), const Option< int > &port=None())
Definition: docker.hpp:30
Definition: none.hpp:27
Definition: attributes.hpp:24
const std::string DOCKER_SYMLINK_DIRECTORY
Definition: executor.hpp:48
static Try< Resources > resources(const Flags &flags)
JSON::Object Metrics()
std::string stringify(int flags)
const std::string DOCKER_NAME_SEPERATOR
Definition: spec.hpp:35
bool contains(const Resources &that) const
Definition: process.hpp:505
DockerContainerizerProcess(const Flags &_flags, Fetcher *_fetcher, const process::Owned< mesos::slave::ContainerLogger > &_logger, process::Shared< Docker > _docker, const Option< NvidiaComponents > &_nvidia)
Definition: docker.hpp:129
Definition: parse.hpp:33
PID< MetricsProcess > metrics
constexpr const char * name
Definition: shell.hpp:43
static Try< DockerContainerizer * > create(const Flags &flags, Fetcher *fetcher, const Option< NvidiaComponents > &nvidia=None())
process::Future< Option< mesos::slave::ContainerTermination > > wait(const ContainerID &containerId) override