Apache Mesos
linux.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use this file except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 #ifndef __STOUT_OS_LINUX_HPP__
14 #define __STOUT_OS_LINUX_HPP__
15 
16 // This file contains Linux-only OS utilities.
17 #ifndef __linux__
18 #error "stout/os/linux.hpp is only available on Linux systems."
19 #endif // __linux__
20 
21 #include <sys/mman.h>
22 #include <sys/types.h> // For pid_t.
23 
24 #include <list>
25 #include <queue>
26 #include <set>
27 #include <string>
28 
29 #include <stout/error.hpp>
30 #include <stout/foreach.hpp>
31 #include <stout/lambda.hpp>
32 #include <stout/option.hpp>
33 #include <stout/proc.hpp>
34 #include <stout/result.hpp>
35 #include <stout/try.hpp>
36 
37 #include <stout/os/process.hpp>
38 
39 namespace os {
40 
41 
42 // Helper for clone() which expects an int(void*).
43 static int childMain(void* _func)
44 {
45  const lambda::function<int()>* func =
46  static_cast<const lambda::function<int()>*> (_func);
47 
48  return (*func)();
49 }
50 
51 
52 // Helper that captures information about a stack to be used when
53 // invoking clone.
54 class Stack
55 {
56 public:
57  // 8 MiB is the default for "ulimit -s" on OSX and Linux.
58  static constexpr size_t DEFAULT_SIZE = 8 * 1024 * 1024;
59 
60  // Allocate a stack. Note that this is NOT async signal safe, nor
61  // safe to call between fork and exec.
62  static Try<Stack> create(size_t size)
63  {
64  Stack stack(size);
65 
66  if (!stack.allocate()) {
67  return ErrnoError();
68  }
69 
70  return stack;
71  }
72 
73  explicit Stack(size_t size_) : size(size_) {}
74 
75  // Allocate the stack using mmap. We avoid malloc because we want
76  // this to be safe to use between fork and exec where malloc might
77  // deadlock. Returns false and sets `errno` on failure.
78  bool allocate()
79  {
80  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
81 
82 #if defined(MAP_STACK)
83  flags |= MAP_STACK;
84 #endif
85 
86  address = ::mmap(nullptr, size, PROT_READ | PROT_WRITE, flags, -1, 0);
87  if (address == MAP_FAILED) {
88  return false;
89  }
90 
91  return true;
92  }
93 
94  // Explicitly free the stack.
95  // The destructor won't free the allocated stack.
96  void deallocate()
97  {
98  PCHECK(::munmap(address, size) == 0);
99  address = MAP_FAILED;
100  }
101 
102  // Stack grows down, return the first usable address.
103  char* start() const
104  {
105  return address == MAP_FAILED
106  ? nullptr
107  : (static_cast<char*>(address) + size);
108  }
109 
110 private:
111  size_t size;
112  void* address = MAP_FAILED;
113 };
114 
115 
116 namespace signal_safe {
117 
118 
119 inline pid_t clone(
120  const Stack& stack,
121  int flags,
122  const lambda::function<int()>& func)
123 {
124  return ::clone(childMain, stack.start(), flags, (void*) &func);
125 }
126 
127 } // namespace signal_safe {
128 
129 
130 inline pid_t clone(
131  const lambda::function<int()>& func,
132  int flags)
133 {
134  // Stack for the child.
135  //
136  // NOTE: We need to allocate the stack dynamically. This is because
137  // glibc's 'clone' will modify the stack passed to it, therefore the
138  // stack must NOT be shared as multiple 'clone's can be invoked
139  // simultaneously.
140  Stack stack(Stack::DEFAULT_SIZE);
141 
142  if (!stack.allocate()) {
143  // TODO(jpeach): In MESOS-8155, we will return an
144  // ErrnoError() here, but for now keep the interface
145  // compatible.
146  return -1;
147  }
148 
149  pid_t pid = signal_safe::clone(stack, flags, func);
150 
151  // Given we allocated the stack ourselves, there are two
152  // circumstances where we need to delete the allocated stack to
153  // avoid a memory leak:
154  //
155  // (1) Failed to clone.
156  //
157  // (2) CLONE_VM is not set implying ::clone will create a process
158  // which runs in its own copy of the memory space of the
159  // calling process. If CLONE_VM is set ::clone will create a
160  // thread which runs in the same memory space with the calling
161  // process, in which case we don't want to call delete!
162  //
163  // TODO(jpeach): In case (2) we will leak the stack memory.
164  if (pid < 0 || !(flags & CLONE_VM)) {
165  stack.deallocate();
166  }
167 
168  return pid;
169 }
170 
171 
172 inline Result<Process> process(pid_t pid)
173 {
174  // Page size, used for memory accounting.
175  static const size_t pageSize = os::pagesize();
176 
177  // Number of clock ticks per second, used for cpu accounting.
178  static const long ticks = sysconf(_SC_CLK_TCK);
179  if (ticks <= 0) {
180  return Error("Failed to get sysconf(_SC_CLK_TCK)");
181  }
182 
184 
185  if (status.isError()) {
186  return Error(status.error());
187  }
188 
189  if (status.isNone()) {
190  return None();
191  }
192 
193  // There are known bugs with invalid utime / stime values coming
194  // from /proc/<pid>/stat on some Linux systems.
195  // See the following thread for details:
196  // http://mail-archives.apache.org/mod_mbox/incubator-mesos-dev/
197  // 201307.mbox/%3CCA+2n2er-Nemh0CsKLbHRkaHd=YCrNt17NLUPM2=TtEfsKOw4
198  // Rg@mail.gmail.com%3E
199  // These are similar reports:
200  // http://lkml.indiana.edu/hypermail/linux/kernel/1207.1/01388.html
201  // https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1023214
202  Try<Duration> utime = Duration::create(status->utime / (double)ticks);
203  Try<Duration> stime = Duration::create(status->stime / (double)ticks);
204 
205  // The command line from 'status->comm' is only "arg0" from "argv"
206  // (i.e., the canonical executable name). To get the entire command
207  // line we grab '/proc/[pid]/cmdline'.
209 
210  return Process(
211  status->pid,
212  status->ppid,
213  status->pgrp,
214  status->session,
215  Bytes(status->rss * pageSize),
216  utime.isSome() ? utime.get() : Option<Duration>::none(),
217  stime.isSome() ? stime.get() : Option<Duration>::none(),
218  cmdline.isSome() ? cmdline.get() : status->comm,
219  status->state == 'Z');
220 }
221 
222 
223 inline Try<std::set<pid_t>> pids()
224 {
225  return proc::pids();
226 }
227 
228 
229 // Returns the total size of main and free memory.
230 inline Try<Memory> memory()
231 {
232  Memory memory;
233 
234  struct sysinfo info;
235  if (sysinfo(&info) != 0) {
236  return ErrnoError();
237  }
238 
239 # if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 23)
240  memory.total = Bytes(info.totalram * info.mem_unit);
241  memory.free = Bytes(info.freeram * info.mem_unit);
242  memory.totalSwap = Bytes(info.totalswap * info.mem_unit);
243  memory.freeSwap = Bytes(info.freeswap * info.mem_unit);
244 # else
245  memory.total = Bytes(info.totalram);
246  memory.free = Bytes(info.freeram);
247  memory.totalSwap = Bytes(info.totalswap);
248  memory.freeSwap = Bytes(info.freeswap);
249 # endif
250 
251  return memory;
252 }
253 
254 } // namespace os {
255 
256 #endif // __STOUT_OS_LINUX_HPP__
const pid_t pgrp
Definition: proc.hpp:130
bool isNone() const
Definition: result.hpp:113
Definition: errorbase.hpp:36
void deallocate()
Definition: linux.hpp:96
char * start() const
Definition: linux.hpp:103
T & get()&
Definition: try.hpp:80
Definition: check.hpp:33
static Result< T > error(const std::string &message)
Definition: result.hpp:54
size_t pagesize()
Definition: pagesize.hpp:24
Result< ProcessStatus > status(pid_t pid)
Definition: proc.hpp:166
const pid_t ppid
Definition: proc.hpp:129
static Try< Duration > create(double seconds)
Definition: duration.hpp:418
int childMain(const std::string &path, char **argv, char **envp, const InputFileDescriptors &stdinfds, const OutputFileDescriptors &stdoutfds, const OutputFileDescriptors &stderrfds, const std::vector< int_fd > &whitelist_fds, bool blocking, int pipes[2], const std::vector< Subprocess::ChildHook > &child_hooks)
Definition: subprocess.hpp:216
Definition: errorbase.hpp:50
Definition: posix_signalhandler.hpp:23
Definition: check.hpp:30
Bytes freeSwap
Definition: os.hpp:39
const long rss
Definition: proc.hpp:149
DWORD pid_t
Definition: windows.hpp:181
Definition: process.hpp:32
Bytes totalSwap
Definition: os.hpp:38
const unsigned long utime
Definition: proc.hpp:139
Try< Nothing > utime(const std::string &path)
Definition: utime.hpp:32
Try< std::set< pid_t > > pids()
Definition: proc.hpp:294
static Option< T > none()
Definition: option.hpp:32
bool isSome() const
Definition: try.hpp:77
pid_t clone(const lambda::function< int()> &func, int flags)
Definition: linux.hpp:130
const pid_t session
Definition: proc.hpp:131
const char state
Definition: proc.hpp:128
bool allocate()
Definition: linux.hpp:78
static constexpr size_t DEFAULT_SIZE
Definition: linux.hpp:58
Result< std::string > cmdline(const Option< pid_t > &pid=None())
Definition: proc.hpp:256
const pid_t pid
Definition: proc.hpp:126
Result< Process > process(pid_t pid)
Definition: freebsd.hpp:30
#define flags
Definition: decoder.hpp:18
Definition: os.hpp:34
Definition: none.hpp:27
Bytes total
Definition: os.hpp:36
T & get()&
Definition: result.hpp:116
const std::string comm
Definition: proc.hpp:127
pid_t clone(const Stack &stack, int flags, const lambda::function< int()> &func)
Definition: linux.hpp:119
static Try< Stack > create(size_t size)
Definition: linux.hpp:62
const unsigned long stime
Definition: proc.hpp:140
bool isSome() const
Definition: result.hpp:112
bool isError() const
Definition: result.hpp:114
Definition: bytes.hpp:30
Try< Memory > memory()
Definition: freebsd.hpp:78
Stack(size_t size_)
Definition: linux.hpp:73
Bytes free
Definition: os.hpp:37
Definition: parse.hpp:33
Try< std::set< pid_t > > pids()
Definition: freebsd.hpp:62
Definition: linux.hpp:54