Apache Mesos
docker.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __DOCKER_CONTAINERIZER_HPP__
18 #define __DOCKER_CONTAINERIZER_HPP__
19 
20 #include <map>
21 #include <set>
22 #include <string>
23 
25 
26 #include <process/owned.hpp>
27 #include <process/shared.hpp>
28 
31 
32 #include <stout/duration.hpp>
33 #include <stout/flags.hpp>
34 #include <stout/hashset.hpp>
35 
36 #include "docker/docker.hpp"
37 #include "docker/executor.hpp"
38 
40 
42 
43 namespace mesos {
44 namespace internal {
45 namespace slave {
46 
47 // Prefix used to name Docker containers in order to distinguish those
48 // created by Mesos from those created manually.
49 extern const std::string DOCKER_NAME_PREFIX;
50 
51 // Separator used to compose docker container name, which consists
52 // of the name prefix, ContainerID, and possibly the SlaveID depending
53 // on the version of Mesos used to create the container.
54 extern const std::string DOCKER_NAME_SEPERATOR;
55 
56 // Directory that stores all the symlinked sandboxes that is mapped
57 // into Docker containers. This is a relative directory that will
58 // joined with the slave path. Only sandbox paths that contains a
59 // colon will be symlinked due to the limitation of the Docker CLI.
60 extern const std::string DOCKER_SYMLINK_DIRECTORY;
61 
62 
63 // Forward declaration.
64 class DockerContainerizerProcess;
65 
66 
68 {
69 public:
71  const Flags& flags,
72  Fetcher* fetcher,
73  const Option<NvidiaComponents>& nvidia = None());
74 
75  // This is only public for tests.
77  const Flags& flags,
78  Fetcher* fetcher,
81  const Option<NvidiaComponents>& nvidia = None());
82 
83  // This is only public for tests.
86 
87  ~DockerContainerizer() override;
88 
90  const Option<state::SlaveState>& state) override;
91 
93  const ContainerID& containerId,
94  const mesos::slave::ContainerConfig& containerConfig,
95  const std::map<std::string, std::string>& environment,
96  const Option<std::string>& pidCheckpointPath) override;
97 
99  const ContainerID& containerId,
100  const Resources& resourceRequests,
101  const google::protobuf::Map<
102  std::string, Value::Scalar>& resourceLimits = {}) override;
103 
105  const ContainerID& containerId) override;
106 
108  const ContainerID& containerId) override;
109 
111  const ContainerID& containerId) override;
112 
114  const ContainerID& containerId) override;
115 
117 
119  const std::vector<Image>& excludedImages) override;
120 
121 private:
123 };
124 
125 
126 
128  : public process::Process<DockerContainerizerProcess>
129 {
130 public:
132  const Flags& _flags,
133  Fetcher* _fetcher,
135  process::Shared<Docker> _docker,
136  const Option<NvidiaComponents>& _nvidia)
137  : flags(_flags),
138  fetcher(_fetcher),
139  logger(_logger),
140  docker(_docker),
141  nvidia(_nvidia) {}
142 
144  const Option<state::SlaveState>& state);
145 
147  const ContainerID& containerId,
148  const mesos::slave::ContainerConfig& containerConfig,
149  const std::map<std::string, std::string>& environment,
150  const Option<std::string>& pidCheckpointPath);
151 
152  // force = true causes the containerizer to update the resources
153  // for the container, even if they match what it has cached.
155  const ContainerID& containerId,
156  const Resources& resourceRequests,
157  const google::protobuf::Map<std::string, Value::Scalar>& resourceLimits,
158  bool force);
159 
161  const ContainerID& containerId);
162 
164  const ContainerID& containerId);
165 
167  const ContainerID& containerId);
168 
170  const ContainerID& containerId,
171  bool killed = true); // process is either killed or reaped.
172 
173  virtual process::Future<Nothing> fetch(const ContainerID& containerId);
174 
175  virtual process::Future<Nothing> pull(const ContainerID& containerId);
176 
178 
179 private:
180  struct Metrics
181  {
182  Metrics() : image_pull("containerizer/docker/image_pull", Hours(1))
183  {
184  process::metrics::add(image_pull);
185  }
186 
187  ~Metrics()
188  {
189  process::metrics::remove(image_pull);
190  }
191 
193  };
194 
195  // Continuations and helpers.
197  const ContainerID& containerId,
198  const Option<int>& status);
199 
201  const ContainerID& containerId,
202  pid_t pid);
203 
205  const ContainerID& containerId,
206  const mesos::slave::ContainerConfig& containerConfig);
207 
208  process::Future<Nothing> _recover(
209  const Option<state::SlaveState>& state,
210  const std::vector<Docker::Container>& containers);
211 
212  process::Future<Nothing> __recover(
213  const std::vector<Docker::Container>& containers);
214 
215  // Starts the executor in a Docker container.
216  process::Future<Docker::Container> launchExecutorContainer(
217  const ContainerID& containerId,
218  const std::string& containerName);
219 
220  // Starts the docker executor with a subprocess.
221  process::Future<pid_t> launchExecutorProcess(
222  const ContainerID& containerId);
223 
224  process::Future<pid_t> checkpointExecutor(
225  const ContainerID& containerId,
226  const Docker::Container& dockerContainer);
227 
228  // Reaps on the executor pid.
229  process::Future<Nothing> reapExecutor(
230  const ContainerID& containerId,
231  pid_t pid);
232 
233  void _destroy(
234  const ContainerID& containerId,
235  bool killed);
236 
237  void __destroy(
238  const ContainerID& containerId,
239  bool killed,
240  const process::Future<Nothing>& future);
241 
242  void ___destroy(
243  const ContainerID& containerId,
244  bool killed,
245  const process::Future<Option<int>>& status);
246 
247  void ____destroy(
248  const ContainerID& containerId,
249  bool killed,
250  const process::Future<Option<int>>& status);
251 
252  process::Future<Nothing> destroyTimeout(
253  const ContainerID& containerId,
254  process::Future<Nothing> future);
255 
256 #ifdef __linux__
257  process::Future<Nothing> _update(
258  const ContainerID& containerId,
259  const Resources& resourceRequests,
260  const google::protobuf::Map<std::string, Value::Scalar>& resourceLimits,
261  const Docker::Container& container);
262 
263  process::Future<Nothing> __update(
264  const ContainerID& containerId,
265  const Resources& resourceRequests,
266  const google::protobuf::Map<std::string, Value::Scalar>& resourceLimits);
267 #endif // __linux__
268 
269  process::Future<Nothing> mountPersistentVolumes(
270  const ContainerID& containerId);
271 
272  Try<Nothing> unmountPersistentVolumes(
273  const ContainerID& containerId);
274 
275  Try<Nothing> updatePersistentVolumes(
276  const ContainerID& containerId,
277  const std::string& directory,
278  const Resources& current,
279  const Resources& updated);
280 
281 #ifdef __linux__
282  // Allocate GPU resources for a specified container.
283  process::Future<Nothing> allocateNvidiaGpus(
284  const ContainerID& containerId,
285  const size_t count);
286 
287  process::Future<Nothing> _allocateNvidiaGpus(
288  const ContainerID& containerId,
289  const std::set<Gpu>& allocated);
290 
291  // Deallocate GPU resources for a specified container.
292  process::Future<Nothing> deallocateNvidiaGpus(
293  const ContainerID& containerId);
294 
295  process::Future<Nothing> _deallocateNvidiaGpus(
296  const ContainerID& containerId,
297  const std::set<Gpu>& deallocated);
298 #endif // __linux__
299 
300  Try<ResourceStatistics> cgroupsStatistics(pid_t pid) const;
301 
302  // Call back for when the executor exits. This will trigger
303  // container destroy.
304  void reaped(const ContainerID& containerId);
305 
306  // Removes the docker container.
307  void remove(
308  const std::string& containerName,
309  const Option<std::string>& executor);
310 
311  const Flags flags;
312 
313  Fetcher* fetcher;
314 
316 
318 
320 
322 
323  struct Container
324  {
325  static Try<Container*> create(
326  const ContainerID& id,
327  const mesos::slave::ContainerConfig& containerConfig,
328  const std::map<std::string, std::string>& environment,
329  const Option<std::string>& pidCheckpointPath,
330  const Flags& flags);
331 
332  static std::string name(const ContainerID& id)
333  {
334  return DOCKER_NAME_PREFIX + stringify(id);
335  }
336 
337  Container(const ContainerID& id)
338  : state(FETCHING), id(id) {}
339 
340  Container(
341  const ContainerID& _id,
342  const mesos::slave::ContainerConfig& _containerConfig,
343  const std::map<std::string, std::string>& _environment,
344  const Option<std::string>& _pidCheckpointPath,
345  bool symlinked,
346  const std::string& containerWorkDir,
347  const Option<CommandInfo>& _command,
348  const Option<ContainerInfo>& _container,
349  bool launchesExecutorContainer)
350  : state(FETCHING),
351  id(_id),
352  containerConfig(_containerConfig),
353  pidCheckpointPath(_pidCheckpointPath),
354  environment(_environment),
355  symlinked(symlinked),
356  containerWorkDir(containerWorkDir),
357  containerName(name(id)),
358  launchesExecutorContainer(launchesExecutorContainer),
359  generatedForCommandTask(_containerConfig.has_task_info())
360  {
361  // NOTE: The task's resources are included in the executor's
362  // resources in order to make sure when launching the executor
363  // that it has non-zero resources in the event the executor was
364  // not actually given any resources by the framework
365  // originally. See Framework::launchExecutor in slave.cpp. We
366  // check that this is indeed the case here to protect ourselves
367  // from when/if this changes in the future (but it's not a
368  // perfect check because an executor might always have a subset
369  // of it's resources that match a task, nevertheless, it's
370  // better than nothing).
371  resourceRequests = containerConfig.resources();
372  resourceLimits = containerConfig.limits();
373 
374  if (containerConfig.has_task_info()) {
375  CHECK(
376  resourceRequests.contains(containerConfig.task_info().resources()));
377  }
378 
379  if (_command.isSome()) {
380  command = _command.get();
381  } else {
382  command = containerConfig.command_info();
383  }
384 
385  if (_container.isSome()) {
386  container = _container.get();
387  } else {
388  // NOTE: The existence of this field is checked in
389  // DockerContainerizerProcess::launch.
390  container = containerConfig.container_info();
391  }
392  }
393 
394  ~Container()
395  {
396  if (symlinked) {
397  // The sandbox directory is a symlink, remove it at container
398  // destroy.
399  os::rm(containerWorkDir);
400  }
401  }
402 
403  Option<std::string> executorName()
404  {
405  if (launchesExecutorContainer) {
406  return containerName + DOCKER_NAME_SEPERATOR + "executor";
407  } else {
408  return None();
409  }
410  }
411 
412  std::string image() const
413  {
414  if (containerConfig.has_task_info()) {
415  return containerConfig.task_info().container().docker().image();
416  }
417 
418  return containerConfig.executor_info().container().docker().image();
419  }
420 
421  bool forcePullImage() const
422  {
423  if (containerConfig.has_task_info()) {
424  return containerConfig.task_info()
425  .container().docker().force_pull_image();
426  }
427 
428  return containerConfig.executor_info()
429  .container().docker().force_pull_image();
430  }
431 
432  // The DockerContainerizer needs to be able to properly clean up
433  // Docker containers, regardless of when they are destroyed. For
434  // example, if a container gets destroyed while we are fetching,
435  // we need to not keep running the fetch, nor should we try and
436  // start the Docker container. For this reason, we've split out
437  // the states into:
438  //
439  // FETCHING
440  // PULLING
441  // MOUNTING
442  // RUNNING
443  // DESTROYING
444  //
445  // In particular, we made 'PULLING' be it's own state so that we
446  // can easily destroy and cleanup when a user initiated pulling
447  // a really big image but we timeout due to the executor
448  // registration timeout. Since we currently have no way to discard
449  // a Docker::run, we needed to explicitly do the pull (which is
450  // the part that takes the longest) so that we can also explicitly
451  // kill it when asked. Once the functions at Docker::* get support
452  // for discarding, then we won't need to make pull be it's own
453  // state anymore, although it doesn't hurt since it gives us
454  // better error messages.
455  enum State
456  {
457  FETCHING = 1,
458  PULLING = 2,
459  MOUNTING = 3,
460  RUNNING = 4,
461  DESTROYING = 5
462  } state;
463 
464  // Copies of the parameters sent to `Container::create`.
465  const ContainerID id;
466  const mesos::slave::ContainerConfig containerConfig;
467  const Option<std::string> pidCheckpointPath;
468 
469  // A copy of the parameter sent to `Container::create`.
470  // NOTE: This may be modified further by hooks.
471  std::map<std::string, std::string> environment;
472 
473  // The sandbox directory for the container. This holds the
474  // symlinked path if symlinked boolean is true.
475  // TODO(josephw): The symlink path does not persist across failovers,
476  // so we will not delete the symlink if the agent restarts. This results
477  // in gradually leaking hanging symlinks.
478  bool symlinked;
479  std::string containerWorkDir;
480 
481  // Copies of the fields in `containerConfig`, except when the
482  // container is a command task and the agent is launched with
483  // the --docker_mesos_image flag.
484  ContainerInfo container;
485  CommandInfo command;
486 
487  // Environment variables that the command executor should pass
488  // onto a docker-ized task. This is set by a hook.
490 
491  // The string used to refer to this container via the Docker CLI.
492  // This name is either computed by concatenating the DOCKER_NAME_PREFIX
493  // and the ContainerID; or during recovery, by taking the recovered
494  // container's name.
495  std::string containerName;
496 
497  // Promise for future returned from wait().
499 
500  // Exit status of executor or container (depending on whether or
501  // not we used the command executor). Represented as a promise so
502  // that destroying can chain with it being set.
504 
505  // Future that tells us the return value of last launch stage (fetch, pull,
506  // run, etc).
508 
509  // We keep track of the resources for each container so we can set
510  // the ResourceStatistics limits in usage(). Note that this is
511  // different than just what we might get from TaskInfo::resources
512  // or ExecutorInfo::resources because they can change dynamically.
513  Resources resourceRequests;
514  google::protobuf::Map<std::string, Value::Scalar> resourceLimits;
515 
516  // The docker pull future is stored so we can discard when
517  // destroy is called while docker is pulling the image.
519 
520  // Once the container is running, this saves the pid of the
521  // running container.
522  Option<pid_t> pid;
523 
524  // The executor pid that was forked to wait on the running
525  // container. This is stored so we can clean up the executor
526  // on destroy.
527  Option<pid_t> executorPid;
528 
529 #ifdef __linux__
530  // GPU resources allocated to the container.
531  std::set<Gpu> gpus;
532 
533  Option<std::string> cpuCgroup;
534  Option<std::string> memoryCgroup;
535 #endif // __linux__
536 
537  // Marks if this container launches an executor in a docker
538  // container.
539  bool launchesExecutorContainer;
540 
541  bool generatedForCommandTask;
542  };
543 
545 };
546 
547 
548 } // namespace slave {
549 } // namespace internal {
550 } // namespace mesos {
551 
552 #endif // __DOCKER_CONTAINERIZER_HPP__
Try< Nothing > checkpoint(const std::string &path, const std::string &message, bool sync, bool downgradeResources)
Definition: state.hpp:123
Definition: option.hpp:29
Try< Nothing > rm(const std::string &path)
Definition: rm.hpp:26
Future< Nothing > remove(const Metric &metric)
Definition: metrics.hpp:109
Definition: fetcher.hpp:49
process::Future< Nothing > recover(const Option< state::SlaveState > &state) override
Definition: check.hpp:33
process::Future< ContainerStatus > status(const ContainerID &containerId) override
process::Future< Nothing > update(const ContainerID &containerId, const Resources &resourceRequests, const google::protobuf::Map< std::string, Value::Scalar > &resourceLimits={}) override
Definition: resources.hpp:83
process::Future< hashset< ContainerID > > containers() override
Try< T > fetch(const std::string &value)
Definition: fetch.hpp:38
Definition: flags.hpp:39
process::Future< Option< mesos::slave::ContainerTermination > > destroy(const ContainerID &containerId) override
Future< Nothing > add(const T &metric)
Definition: metrics.hpp:95
bool isSome() const
Definition: option.hpp:116
Definition: docker.hpp:91
Environment * environment
DWORD pid_t
Definition: windows.hpp:181
process::Future< Containerizer::LaunchResult > launch(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig, const std::map< std::string, std::string > &environment, const Option< std::string > &pidCheckpointPath) override
Definition: containerizer.hpp:64
DockerContainerizer(const Flags &flags, Fetcher *fetcher, const process::Owned< mesos::slave::ContainerLogger > &logger, process::Shared< Docker > docker, const Option< NvidiaComponents > &nvidia=None())
Definition: agent.hpp:25
process::Future< ResourceStatistics > usage(const ContainerID &containerId) override
const T & get() const &
Definition: option.hpp:119
const std::string DOCKER_NAME_PREFIX
process::Future< Nothing > pruneImages(const std::vector< Image > &excludedImages) override
Definition: duration.hpp:235
#define flags
Definition: decoder.hpp:18
URI image(const std::string &repository, const std::string &reference, const std::string &registry, const Option< std::string > &scheme=None(), const Option< int > &port=None())
Definition: docker.hpp:30
Definition: none.hpp:27
Definition: attributes.hpp:24
const std::string DOCKER_SYMLINK_DIRECTORY
Definition: executor.hpp:48
JSON::Object Metrics()
std::string stringify(int flags)
const std::string DOCKER_NAME_SEPERATOR
Definition: spec.hpp:35
bool contains(const Resources &that) const
Definition: process.hpp:505
DockerContainerizerProcess(const Flags &_flags, Fetcher *_fetcher, const process::Owned< mesos::slave::ContainerLogger > &_logger, process::Shared< Docker > _docker, const Option< NvidiaComponents > &_nvidia)
Definition: docker.hpp:131
Definition: parse.hpp:33
PID< MetricsProcess > metrics
constexpr const char * name
Definition: shell.hpp:41
static Try< DockerContainerizer * > create(const Flags &flags, Fetcher *fetcher, const Option< NvidiaComponents > &nvidia=None())
process::Future< Option< mesos::slave::ContainerTermination > > wait(const ContainerID &containerId) override