Apache Mesos
isolator.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __NVIDIA_GPU_ISOLATOR_HPP__
18 #define __NVIDIA_GPU_ISOLATOR_HPP__
19 
20 #include <map>
21 #include <set>
22 #include <vector>
23 
24 #include <process/future.hpp>
25 
26 #include <stout/hashmap.hpp>
27 #include <stout/option.hpp>
28 #include <stout/path.hpp>
29 #include <stout/try.hpp>
30 
31 #include "linux/cgroups.hpp"
32 
33 #include "slave/flags.hpp"
34 
36 
40 
41 namespace mesos {
42 namespace internal {
43 namespace slave {
44 
45 // This isolator uses the cgroups devices subsystem to control
46 // access to Nvidia GPUs. Since this is the very first device
47 // isolator, it currently contains generic device isolation
48 // logic that needs to be pulled up into a generic device
49 // isolator.
50 //
51 // GPUs are allocated to containers in an arbitrary fashion.
52 // For example, if a container requires 2 GPUs, we will
53 // arbitrarily choose 2 from the GPUs that are available.
54 // This may not behave well if tasks within an executor use
55 // GPUs since we cannot identify which task are using which
56 // GPUs (i.e. when a task terminates, we may remove a GPU
57 // that is still being used by a different task!).
58 //
59 // Note that this isolator is not responsible for ensuring
60 // that the necessary Nvidia libraries are visible in the
61 // container. If filesystem isolation is not enabled, this
62 // means that the container can simply use the libraries
63 // available on the host. When filesystem isolation is
64 // enabled, it is the responsibility of the operator /
65 // application developer to ensure that the necessary
66 // libraries are visible to the container (note that they
67 // must be version compatible with the kernel driver on
68 // the host).
69 //
70 // TODO(klueska): To better support containers with a
71 // provisioned filesystem, we will need to add a mechanism
72 // for operators to inject the libraries as a volume into
73 // containers that require GPU access.
74 //
75 // TODO(klueska): If multiple containerizers are enabled,
76 // they need to co-ordinate their allocation of GPUs.
77 //
78 // TODO(klueska): Move generic device isolation logic
79 // out into its own component.
81 {
82 public:
84  const Flags& flags,
85  const NvidiaComponents& components);
86 
87  bool supportsNesting() override;
88  bool supportsStandalone() override;
89 
91  const std::vector<mesos::slave::ContainerState>& states,
92  const hashset<ContainerID>& orphans) override;
93 
95  const ContainerID& containerId,
96  const mesos::slave::ContainerConfig& containerConfig) override;
97 
99  const ContainerID& containerId,
100  const Resources& resourceRequests,
101  const google::protobuf::Map<
102  std::string, Value::Scalar>& resourceLimits = {}) override;
103 
105  const ContainerID& containerId) override;
106 
108  const ContainerID& containerId) override;
109 
110 private:
112  const Flags& _flags,
113  const std::string& hierarchy,
114  const NvidiaGpuAllocator& _allocator,
115  const NvidiaVolume& _volume,
116  const std::map<Path, cgroups::devices::Entry>& _controlDeviceEntries);
117 
119  const ContainerID& containerId,
120  const mesos::slave::ContainerConfig& containerConfig);
121 
122  process::Future<Nothing> _update(
123  const ContainerID& containerId,
124  const std::set<Gpu>& allocation);
125 
126  struct Info
127  {
128  Info(const ContainerID& _containerId, const std::string& _cgroup)
129  : containerId(_containerId), cgroup(_cgroup) {}
130 
131  const ContainerID containerId;
132  const std::string cgroup;
133  std::set<Gpu> allocated;
134  };
135 
136  const Flags flags;
137 
138  // The path to the cgroups subsystem hierarchy root.
139  const std::string hierarchy;
140 
141  // TODO(bmahler): Use Owned<Info>.
143 
144  NvidiaGpuAllocator allocator;
145  NvidiaVolume volume;
146 
147  const std::map<Path, cgroups::devices::Entry> controlDeviceEntries;
148 };
149 
150 } // namespace slave {
151 } // namespace internal {
152 } // namespace mesos {
153 
154 #endif // __NVIDIA_GPU_ISOLATOR_HPP__
Definition: check.hpp:33
Definition: resources.hpp:83
process::Future< Nothing > cleanup(const ContainerID &containerId) override
Definition: allocator.hpp:52
process::Future< Nothing > recover(const std::vector< mesos::slave::ContainerState > &states, const hashset< ContainerID > &orphans) override
Definition: flags.hpp:39
process::Future< Nothing > update(const ContainerID &containerId, const Resources &resourceRequests, const google::protobuf::Map< std::string, Value::Scalar > &resourceLimits={}) override
process::Future< Option< mesos::slave::ContainerLaunchInfo > > prepare(const ContainerID &containerId, const mesos::slave::ContainerConfig &containerConfig) override
process::Future< ResourceStatistics > usage(const ContainerID &containerId) override
Definition: components.hpp:34
Definition: agent.hpp:25
Result< std::string > cgroup(pid_t pid)
static Try< mesos::slave::Isolator * > create(const Flags &flags, const NvidiaComponents &components)
Definition: attributes.hpp:24
Definition: parse.hpp:33
Definition: volume.hpp:36