Apache Mesos
subprocess.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use this file except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License
12 
13 #ifndef __PROCESS_POSIX_SUBPROCESS_HPP__
14 #define __PROCESS_POSIX_SUBPROCESS_HPP__
15 
16 #ifdef __linux__
17 #include <sys/prctl.h>
18 #include <sys/syscall.h>
19 #endif // __linux__
20 #include <sys/types.h>
21 
22 #include <string>
23 
24 #include <glog/logging.h>
25 
26 #include <process/subprocess.hpp>
27 
28 #include <stout/check.hpp>
29 #include <stout/error.hpp>
30 #include <stout/exit.hpp>
31 #include <stout/foreach.hpp>
32 #include <stout/hashset.hpp>
33 #include <stout/nothing.hpp>
34 #include <stout/lambda.hpp>
35 #include <stout/none.hpp>
36 #include <stout/option.hpp>
37 #include <stout/os.hpp>
38 #include <stout/try.hpp>
39 #include <stout/unreachable.hpp>
40 
41 #include <stout/os/close.hpp>
42 #include <stout/os/environment.hpp>
43 #include <stout/os/fcntl.hpp>
44 #include <stout/os/signals.hpp>
45 #include <stout/os/strerror.hpp>
46 
47 namespace process {
48 namespace internal {
49 
50 static void close(std::initializer_list<int_fd> fds);
51 
52 
53 static void close(
56  const Subprocess::IO::OutputFileDescriptors& stderrfds);
57 
58 
59 #if defined(__linux__) && defined(SYS_getdents64)
60 // Convert a null-terminated string to an integer. This function
61 // is async signal safe since it does not make any libc calls.
62 static int convertStringToInt(const char *name)
63 {
64  int num = 0;
65  while (*name >= '0' && *name <= '9') {
66  num = num * 10 + (*name - '0');
67  ++name;
68  }
69 
70  if (*name) {
71  // Non digit found, not a number.
72  return -1;
73  }
74 
75  return num;
76 }
77 #endif // __linux__ && SYS_getdents64
78 
79 
80 // Close any file descriptors that are not stdio file descriptors and not
81 // explicitly whitelisted to avoid leaking them into the forked process.
82 // And unset the `close-on-exec` flag for the whitelist file descriptors
83 // so that they can be inherited by the forked process.
84 static void handleWhitelistFds(const std::vector<int_fd>& whitelist_fds)
85 {
86  // We need to make a syscall (e.g., `SYS_getdents64` on Linux) to get each
87  // entry from `/dev/fd` since syscall function is async signal safe, but we
88  // cannot do that for macOS since Apple has decided to deprecate all syscall
89  // functions with OS 10.12 (see MESOS-8457).
90 #if defined(__linux__) && defined(SYS_getdents64)
91  int fdDir = ::open("/dev/fd", O_RDONLY);
92  if (fdDir == -1) {
93  ABORT("Failed to open /dev/fd: " + os::strerror(errno));
94  }
95 
96  struct linux_dirent64 {
97  ino64_t d_ino;
98  off64_t d_off;
99  unsigned short d_reclen;
100  unsigned char d_type;
101  char d_name[];
102  };
103 
104  char buffer[1024];
105  int bytes;
106 
107  while (true) {
108  bytes = ::syscall(SYS_getdents64, fdDir, buffer, sizeof(buffer));
109  if (bytes == -1) {
110  ABORT("Failed to call SYS_getdents64 on /dev/fd: " + os::strerror(errno));
111  }
112 
113  if (bytes == 0) {
114  break;
115  }
116 
117  struct linux_dirent64 *entry;
118  for (int offset = 0; offset < bytes; offset += entry->d_reclen) {
119  entry = reinterpret_cast<struct linux_dirent64 *>(buffer + offset);
120  int_fd fd = convertStringToInt(entry->d_name);
121  if (fd >= 0 &&
122  fd != fdDir &&
123  fd != STDIN_FILENO &&
124  fd != STDOUT_FILENO &&
125  fd != STDERR_FILENO) {
126  bool found = false;
127  foreach (int_fd whitelist_fd, whitelist_fds) {
128  if (whitelist_fd == fd) {
129  found = true;
130  break;
131  }
132  }
133 
134  if (!found) {
135  int flags = ::fcntl(fd, F_GETFD);
136  if (flags == -1) {
137  // TODO(gilbert): clean up the use of `os::strerror` during the
138  // timeframe of fork-exec because it is not signal safe.
139  ABORT(
140  "Failed to get file descriptor flags: " + os::strerror(errno));
141  }
142 
143  // Close the FD which does not have the FD_CLOEXEC bit.
144  if ((flags & FD_CLOEXEC) == 0){
145  ::close(fd);
146  }
147  }
148  }
149  }
150  }
151 
152  ::close(fdDir);
153 #endif // __linux__ && SYS_getdents64
154 
155  foreach (int_fd fd, whitelist_fds) {
156  int flags = ::fcntl(fd, F_GETFD);
157  if (flags == -1) {
158  ABORT("Failed to get file descriptor flags: " + os::strerror(errno));
159  }
160 
161  if (::fcntl(fd, F_SETFD, flags & ~FD_CLOEXEC) == -1) {
162  ABORT("Failed to unset cloexec: " + os::strerror(errno));
163  }
164  }
165 }
166 
167 
168 inline pid_t defaultClone(const lambda::function<int()>& func)
169 {
170  pid_t pid = ::fork();
171  if (pid == -1) {
172  return -1;
173  } else if (pid == 0) {
174  // Child.
175  ::exit(func());
176  UNREACHABLE();
177  } else {
178  // Parent.
179  return pid;
180  }
181 }
182 
183 
184 // This function will invoke `os::cloexec` on all specified file
185 // descriptors that are valid (i.e., not `None` and >= 0).
187  const InputFileDescriptors& stdinfds,
188  const OutputFileDescriptors& stdoutfds,
189  const OutputFileDescriptors& stderrfds)
190 {
191  hashset<int> fds = {
192  stdinfds.read,
193  stdinfds.write.getOrElse(-1),
194  stdoutfds.read.getOrElse(-1),
195  stdoutfds.write,
196  stderrfds.read.getOrElse(-1),
197  stderrfds.write
198  };
199 
200  foreach (int fd, fds) {
201  if (fd >= 0) {
203  if (cloexec.isError()) {
204  return Error(cloexec.error());
205  }
206  }
207  }
208 
209  return Nothing();
210 }
211 
212 
213 // The main entry of the child process.
214 //
215 // NOTE: This function has to be async signal safe.
216 inline int childMain(
217  const std::string& path,
218  char** argv,
219  char** envp,
220  const InputFileDescriptors& stdinfds,
221  const OutputFileDescriptors& stdoutfds,
222  const OutputFileDescriptors& stderrfds,
223  const std::vector<int_fd>& whitelist_fds,
224  bool blocking,
225  int pipes[2],
226  const std::vector<Subprocess::ChildHook>& child_hooks)
227 {
228  // Close parent's end of the pipes.
229  if (stdinfds.write.isSome()) {
230  ::close(stdinfds.write.get());
231  }
232  if (stdoutfds.read.isSome()) {
233  ::close(stdoutfds.read.get());
234  }
235  if (stderrfds.read.isSome()) {
236  ::close(stderrfds.read.get());
237  }
238 
239  // Currently we will block the child's execution of the new process
240  // until all the parent hooks (if any) have executed.
241  if (blocking) {
242  ::close(pipes[1]);
243  }
244 
245  // Redirect I/O for stdin/stdout/stderr.
246  while (::dup2(stdinfds.read, STDIN_FILENO) == -1 && errno == EINTR);
247  while (::dup2(stdoutfds.write, STDOUT_FILENO) == -1 && errno == EINTR);
248  while (::dup2(stderrfds.write, STDERR_FILENO) == -1 && errno == EINTR);
249 
250  // Close the copies. We need to make sure that we do not close the
251  // file descriptor assigned to stdin/stdout/stderr in case the
252  // parent has closed stdin/stdout/stderr when calling this
253  // function (in that case, a dup'ed file descriptor may have the
254  // same file descriptor number as stdin/stdout/stderr).
255  //
256  // We also need to ensure that we don't "double close" any file
257  // descriptors in the case where one of stdinfds.read,
258  // stdoutfds.write, or stdoutfds.write are equal.
259  if (stdinfds.read != STDIN_FILENO &&
260  stdinfds.read != STDOUT_FILENO &&
261  stdinfds.read != STDERR_FILENO) {
262  ::close(stdinfds.read);
263  }
264  if (stdoutfds.write != STDIN_FILENO &&
265  stdoutfds.write != STDOUT_FILENO &&
266  stdoutfds.write != STDERR_FILENO &&
267  stdoutfds.write != stdinfds.read) {
268  ::close(stdoutfds.write);
269  }
270  if (stderrfds.write != STDIN_FILENO &&
271  stderrfds.write != STDOUT_FILENO &&
272  stderrfds.write != STDERR_FILENO &&
273  stderrfds.write != stdinfds.read &&
274  stderrfds.write != stdoutfds.write) {
275  ::close(stderrfds.write);
276  }
277 
278  if (blocking) {
279  // Do a blocking read on the pipe until the parent signals us to
280  // continue.
281  char dummy;
282  ssize_t length;
283  while ((length = ::read(pipes[0], &dummy, sizeof(dummy))) == -1 &&
284  errno == EINTR);
285 
286  if (length != sizeof(dummy)) {
287  ABORT("Failed to synchronize with parent");
288  }
289 
290  // Now close the pipe as we don't need it anymore.
291  ::close(pipes[0]);
292  }
293 
294  // Run the child hooks.
295  foreach (const Subprocess::ChildHook& hook, child_hooks) {
296  Try<Nothing> callback = hook();
297 
298  // If the callback failed, we should abort execution.
299  if (callback.isError()) {
300  ABORT("Failed to execute Subprocess::ChildHook: " + callback.error());
301  }
302  }
303 
304  handleWhitelistFds(whitelist_fds);
305 
306  os::execvpe(path.c_str(), argv, envp);
307 
308  SAFE_EXIT(
309  errno, "Failed to os::execvpe on path '%s': %d", path.c_str(), errno);
310 }
311 
312 
314  const std::string& path,
315  std::vector<std::string> argv,
316  const Option<std::map<std::string, std::string>>& environment,
317  const Option<lambda::function<
318  pid_t(const lambda::function<int()>&)>>& _clone,
319  const std::vector<Subprocess::ParentHook>& parent_hooks,
320  const std::vector<Subprocess::ChildHook>& child_hooks,
321  const InputFileDescriptors stdinfds,
322  const OutputFileDescriptors stdoutfds,
323  const OutputFileDescriptors stderrfds,
324  const std::vector<int_fd>& whitelist_fds)
325 {
326  // The real arguments that will be passed to 'os::execvpe'. We need
327  // to construct them here before doing the clone as it might not be
328  // async signal safe to perform the memory allocation.
329  char** _argv = new char*[argv.size() + 1];
330  for (size_t i = 0; i < argv.size(); i++) {
331  _argv[i] = (char*) argv[i].c_str();
332  }
333  _argv[argv.size()] = nullptr;
334 
335  // Like above, we need to construct the environment that we'll pass
336  // to 'os::execvpe' as it might not be async-safe to perform the
337  // memory allocations.
338  char** envp = os::raw::environment();
339 
340  if (environment.isSome()) {
341  // NOTE: We add 1 to the size for a `nullptr` terminator.
342  envp = new char*[environment->size() + 1];
343 
344  size_t index = 0;
345  foreachpair (
346  const std::string& key,
347  const std::string& value, environment.get()) {
348  std::string entry = key + "=" + value;
349  envp[index] = new char[entry.size() + 1];
350  strncpy(envp[index], entry.c_str(), entry.size() + 1);
351  ++index;
352  }
353 
354  envp[index] = nullptr;
355  }
356 
357  // Determine the function to clone the child process. If the user
358  // does not specify the clone function, we will use the default.
359  lambda::function<pid_t(const lambda::function<int()>&)> clone =
360  (_clone.isSome() ? _clone.get() : defaultClone);
361 
362  // Currently we will block the child's execution of the new process
363  // until all the `parent_hooks` (if any) have executed.
364  std::array<int, 2> pipes;
365  const bool blocking = !parent_hooks.empty();
366 
367  if (blocking) {
368  // We assume this should not fail under reasonable conditions so we
369  // use CHECK.
371  CHECK_SOME(pipe);
372 
373  pipes = pipe.get();
374  }
375 
376  // Now, clone the child process.
377  pid_t pid = clone(lambda::bind(
378  &childMain,
379  path,
380  _argv,
381  envp,
382  stdinfds,
383  stdoutfds,
384  stderrfds,
385  whitelist_fds,
386  blocking,
387  pipes.data(),
388  child_hooks));
389 
390  delete[] _argv;
391 
392  // Need to delete 'envp' if we had environment variables passed to
393  // us and we needed to allocate the space.
394  if (environment.isSome()) {
395  CHECK_NE(os::raw::environment(), envp);
396 
397  // We ignore the last 'envp' entry since it is nullptr.
398  for (size_t index = 0; index < environment->size(); index++) {
399  delete[] envp[index];
400  }
401 
402  delete[] envp;
403  }
404 
405  if (pid == -1) {
406  // Save the errno as 'close' below might overwrite it.
407  ErrnoError error("Failed to clone");
408  internal::close(stdinfds, stdoutfds, stderrfds);
409 
410  if (blocking) {
411  os::close(pipes[0]);
412  os::close(pipes[1]);
413  }
414 
415  return error;
416  }
417 
418  // Close the child-ends of the file descriptors that are created by
419  // this function.
420  internal::close({stdinfds.read, stdoutfds.write, stderrfds.write});
421 
422  if (blocking) {
423  os::close(pipes[0]);
424 
425  // Run the parent hooks.
426  foreach (const Subprocess::ParentHook& hook, parent_hooks) {
427  Try<Nothing> parentSetup = hook.parent_setup(pid);
428 
429  // If the hook callback fails, we shouldn't proceed with the
430  // execution and hence the child process should be killed.
431  if (parentSetup.isError()) {
432  LOG(WARNING)
433  << "Failed to execute Subprocess::ParentHook in parent for child '"
434  << pid << "': " << parentSetup.error();
435 
436  os::close(pipes[1]);
437 
438  // Ensure the child is killed.
439  ::kill(pid, SIGKILL);
440 
441  return Error(
442  "Failed to execute Subprocess::ParentHook in parent for child '" +
443  stringify(pid) + "': " + parentSetup.error());
444  }
445  }
446 
447  // Now that we've executed the parent hooks, we can signal the child to
448  // continue by writing to the pipe.
449  char dummy;
450  ssize_t length;
451  while ((length = ::write(pipes[1], &dummy, sizeof(dummy))) == -1 &&
452  errno == EINTR);
453 
454  os::close(pipes[1]);
455 
456  if (length != sizeof(dummy)) {
457  // Ensure the child is killed.
458  ::kill(pid, SIGKILL);
459 
460  return Error("Failed to synchronize child process");
461  }
462  }
463 
464  return pid;
465 }
466 
467 } // namespace internal {
468 } // namespace process {
469 
470 #endif // __PROCESS_POSIX_SUBPROCESS_HPP__
Try< Nothing > dup2(int oldFd, int newFd)
Definition: os.hpp:413
SSIZE_T ssize_t
Definition: windows.hpp:186
Definition: path.hpp:29
std::string strerror(int errno_)
A thread-safe version of strerror.
Definition: strerror.hpp:30
Definition: nothing.hpp:16
Try< std::array< int, 2 > > pipe()
Definition: pipe.hpp:33
Definition: errorbase.hpp:36
Definition: option.hpp:29
Try< pid_t > clone(pid_t target, int nstypes, const lambda::function< int()> &f, int flags)
Performs an os::clone after entering a set of namespaces for the specified target process...
#define ABORT(...)
Definition: abort.hpp:40
T getOrElse(U &&u) const &
Definition: option.hpp:133
Subprocess::IO::OutputFileDescriptors OutputFileDescriptors
Definition: subprocess.hpp:339
T & get()&
Definition: try.hpp:80
const mode_t SIGKILL
Definition: windows.hpp:335
A ChildHook can be passed to a subprocess call.
Definition: subprocess.hpp:191
Definition: check.hpp:33
Definition: hashset.hpp:53
Try< int_fd > open(const std::string &path, int oflag, mode_t mode=0)
Definition: open.hpp:35
int_fd read
Definition: subprocess.hpp:76
For output file descriptors a child writes to the write file descriptor and a parent may read from th...
Definition: subprocess.hpp:89
int childMain(const std::string &path, char **argv, char **envp, const InputFileDescriptors &stdinfds, const OutputFileDescriptors &stdoutfds, const OutputFileDescriptors &stderrfds, const std::vector< int_fd > &whitelist_fds, bool blocking, int pipes[2], const std::vector< Subprocess::ChildHook > &child_hooks)
Definition: subprocess.hpp:216
pid_t defaultClone(const lambda::function< int()> &func)
Definition: subprocess.hpp:168
Definition: errorbase.hpp:50
#define STDERR_FILENO
Definition: windows.hpp:155
Try< Nothing > cloexec(const InputFileDescriptors &stdinfds, const OutputFileDescriptors &stdoutfds, const OutputFileDescriptors &stderrfds)
Definition: subprocess.hpp:186
bool isSome() const
Definition: option.hpp:116
Subprocess::IO::InputFileDescriptors InputFileDescriptors
Definition: subprocess.hpp:338
Environment * environment
DWORD pid_t
Definition: windows.hpp:181
#define CHECK_SOME(expression)
Definition: check.hpp:50
#define STDOUT_FILENO
Definition: windows.hpp:154
int_fd write
Definition: subprocess.hpp:92
Try< Nothing > close(int fd)
Definition: close.hpp:24
Try< Nothing > cloexec(int fd)
Definition: fcntl.hpp:27
const T & get() const &
Definition: option.hpp:119
#define foreachpair(KEY, VALUE, ELEMS)
Definition: foreach.hpp:51
For input file descriptors a child reads from the read file descriptor and a parent may write to the ...
Definition: subprocess.hpp:74
#define STDIN_FILENO
Definition: windows.hpp:153
static Try error(const E &e)
Definition: try.hpp:43
#define UNREACHABLE()
Definition: unreachable.hpp:22
const lambda::function< Try< Nothing >pid_t)> parent_setup
The callback that must be specified for execution after the child has been cloned, but before it starts executing the new process.
Definition: subprocess.hpp:164
Definition: attributes.hpp:24
bool isError() const
Definition: try.hpp:78
Result< Credentials > read(const Path &path)
Definition: credentials.hpp:35
std::string error(const std::string &msg, uint32_t code)
Definition: executor.hpp:48
Protocol< WriteRequest, WriteResponse > write
Option< int_fd > read
Definition: subprocess.hpp:91
Try< Nothing > kill(const std::string &hierarchy, const std::string &cgroup, int signal)
Try< Nothing > bind(int_fd s, const Address &address)
Definition: network.hpp:46
Option< int_fd > write
Definition: subprocess.hpp:77
A hook can be passed to a subprocess call.
Definition: subprocess.hpp:153
int int_fd
Definition: int_fd.hpp:35
std::string stringify(int flags)
int execvpe(const char *file, char **argv, char **envp)
Definition: exec.hpp:64
Definition: parse.hpp:33
#define SAFE_EXIT(status, fmt,...)
Definition: exit.hpp:42
constexpr const char * name
Definition: shell.hpp:41
Try< pid_t > cloneChild(const std::string &path, std::vector< std::string > argv, const Option< std::map< std::string, std::string >> &environment, const Option< lambda::function< pid_t(const lambda::function< int()> &)>> &_clone, const std::vector< Subprocess::ParentHook > &parent_hooks, const std::vector< Subprocess::ChildHook > &child_hooks, const InputFileDescriptors stdinfds, const OutputFileDescriptors stdoutfds, const OutputFileDescriptors stderrfds, const std::vector< int_fd > &whitelist_fds)
Definition: subprocess.hpp:313
char ** environment()
Definition: environment.hpp:66