tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SandboxLaunch.cpp (24694B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
      5 * You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "SandboxLaunch.h"
      8 
      9 #include <fcntl.h>
     10 #include <sched.h>
     11 #include <setjmp.h>
     12 #include <signal.h>
     13 #include <sys/prctl.h>
     14 #include <sys/socket.h>
     15 #include <sys/syscall.h>
     16 #include <unistd.h>
     17 
     18 #include <utility>
     19 
     20 #include "LinuxCapabilities.h"
     21 #include "LinuxSched.h"
     22 #include "SandboxChrootProto.h"
     23 #include "SandboxInfo.h"
     24 #include "SandboxLogging.h"
     25 #include "base/eintr_wrapper.h"
     26 #include "base/strings/safe_sprintf.h"
     27 #include "mozilla/Array.h"
     28 #include "mozilla/Assertions.h"
     29 #include "mozilla/Attributes.h"
     30 #include "mozilla/Preferences.h"
     31 #include "mozilla/SandboxReporter.h"
     32 #include "mozilla/SandboxSettings.h"
     33 #include "mozilla/Components.h"
     34 #include "mozilla/StaticPrefs_media.h"
     35 #include "mozilla/StaticPrefs_security.h"
     36 #include "mozilla/ipc/UtilityProcessSandboxing.h"
     37 #include "nsCOMPtr.h"
     38 #include "nsDebug.h"
     39 #include "nsIGfxInfo.h"
     40 #include "nsString.h"
     41 #include "nsThreadUtils.h"
     42 #include "prenv.h"
     43 #include "sandbox/linux/system_headers/linux_syscalls.h"
     44 #include "sandbox/linux/services/syscall_wrappers.h"
     45 
     46 #include "mozilla/pthread_atfork.h"
     47 
     48 #ifdef MOZ_X11
     49 #  ifndef MOZ_WIDGET_GTK
     50 #    error "Unknown toolkit"
     51 #  endif
     52 #  include "mozilla/WidgetUtilsGtk.h"
     53 #  include <gdk/gdk.h>
     54 #  include <gdk/gdkx.h>
     55 #  include "X11UndefineNone.h"
     56 #  include "gfxPlatform.h"
     57 #endif
     58 
     59 #if defined(__GLIBC__) && !defined(__UCLIBC__)
     60 // We really are using glibc, not uClibc pretending to be glibc.
     61 #  define LIBC_GLIBC 1
     62 #endif
     63 
     64 namespace mozilla {
     65 
     66 // Returns true if graphics will work from a content process
     67 // started in a new network namespace.  Specifically, named
     68 // Unix-domain sockets will work, but TCP/IP will not, even if it's a
     69 // connection to localhost: the child process has its own private
     70 // loopback interface.
     71 //
     72 // (Longer-term we intend to either proxy or remove X11 access from
     73 // content processes, at which point this will stop being an issue.)
     74 static bool IsGraphicsOkWithoutNetwork() {
     75  // For X11, check whether the parent's connection is a Unix-domain
     76  // socket.  This is done instead of trying to parse the display name
     77  // because an empty hostname (e.g., ":0") will fall back to TCP in
     78  // case of failure to connect using Unix-domain sockets.
     79 #ifdef MOZ_X11
     80  // First, ensure that the parent process's graphics are initialized.
     81  DebugOnly<gfxPlatform*> gfxPlatform = gfxPlatform::GetPlatform();
     82 
     83  const auto display = gdk_display_get_default();
     84  if (!display) {
     85    // In this case, the browser is headless, but WebGL could still
     86    // try to use X11.  However, WebGL isn't supported with remote
     87    // X11, and in any case these connections are made after sandbox
     88    // startup (lazily when WebGL is used), so they aren't being done
     89    // directly by the process anyway.  (For local X11, they're
     90    // brokered.)
     91    MOZ_ASSERT(gfxPlatform->IsHeadless());
     92    return true;
     93  }
     94  if (mozilla::widget::GdkIsX11Display(display)) {
     95    const int xSocketFd = ConnectionNumber(GDK_DISPLAY_XDISPLAY(display));
     96    if (NS_WARN_IF(xSocketFd < 0)) {
     97      return false;
     98    }
     99 
    100    int domain;
    101    socklen_t optlen = static_cast<socklen_t>(sizeof(domain));
    102    int rv = getsockopt(xSocketFd, SOL_SOCKET, SO_DOMAIN, &domain, &optlen);
    103    if (NS_WARN_IF(rv != 0)) {
    104      return false;
    105    }
    106    MOZ_RELEASE_ASSERT(static_cast<size_t>(optlen) == sizeof(domain));
    107    if (domain != AF_LOCAL) {
    108      return false;
    109    }
    110    // There's one more complication: Xorg listens on named sockets
    111    // (actual filesystem nodes) as well as abstract addresses (opaque
    112    // octet strings scoped to the network namespace; this is a Linux
    113    // extension).
    114    //
    115    // Inside a container environment (e.g., when running as a Snap
    116    // package), it's possible that only the abstract addresses are
    117    // accessible.  In that case, the display must be considered
    118    // remote.  See also bug 1450740.
    119    //
    120    // Unfortunately, the Xorg client libraries prefer the abstract
    121    // addresses, so this isn't directly detectable by inspecting the
    122    // parent process's socket.  Instead, parse the DISPLAY env var
    123    // (which was updated if necessary in nsAppRunner.cpp) to get the
    124    // display number and construct the socket path, falling back to
    125    // testing the directory in case that doesn't work.  (See bug
    126    // 1565972 and bug 1559368 for cases where we need to test the
    127    // specific socket.)
    128    const char* const displayStr = PR_GetEnv("DISPLAY");
    129    nsAutoCString socketPath("/tmp/.X11-unix");
    130    int accessFlags = X_OK;
    131    int displayNum;
    132    // sscanf ignores trailing text, so display names with a screen
    133    // number (e.g., ":0.2") will parse correctly.
    134    if (displayStr && (sscanf(displayStr, ":%d", &displayNum) == 1 ||
    135                       sscanf(displayStr, "unix:%d", &displayNum) == 1)) {
    136      socketPath.AppendPrintf("/X%d", displayNum);
    137      accessFlags = R_OK | W_OK;
    138    }
    139    if (access(socketPath.get(), accessFlags) != 0) {
    140      SANDBOX_LOG_ERRNO(
    141          "%s is inaccessible; can't isolate network namespace in"
    142          " content processes",
    143          socketPath.get());
    144      return false;
    145    }
    146  }
    147 #endif
    148 
    149  // Assume that other backends (e.g., Wayland) will not use the
    150  // network namespace.
    151  return true;
    152 }
    153 
    154 bool HasAtiDrivers() {
    155  nsCOMPtr<nsIGfxInfo> gfxInfo = components::GfxInfo::Service();
    156  nsAutoString vendorID;
    157  static const Array<nsresult (nsIGfxInfo::*)(nsAString&), 2> kMethods = {
    158      &nsIGfxInfo::GetAdapterVendorID,
    159      &nsIGfxInfo::GetAdapterVendorID2,
    160  };
    161  for (const auto method : kMethods) {
    162    if (NS_SUCCEEDED((gfxInfo->*method)(vendorID))) {
    163      // This test is based on telemetry data.  The proprietary ATI
    164      // drivers seem to use this vendor string, including for some
    165      // newer devices that have AMD branding in the device name, such
    166      // as those using AMDGPU-PRO drivers.
    167      // The open-source drivers integrated into Mesa appear to use
    168      // the vendor ID "X.Org" instead.
    169      if (vendorID.EqualsLiteral("ATI Technologies Inc.")) {
    170        return true;
    171      }
    172    }
    173  }
    174 
    175  return false;
    176 }
    177 
    178 // Content processes may need direct access to SysV IPC in certain
    179 // uncommon use cases.
    180 static bool ContentNeedsSysVIPC() {
    181  // The ALSA dmix plugin uses SysV semaphores and shared memory to
    182  // coordinate software mixing.
    183 #ifdef MOZ_ALSA
    184  if (!StaticPrefs::media_cubeb_sandbox()) {
    185    return true;
    186  }
    187 #endif
    188 
    189  if (GetEffectiveContentSandboxLevel() < 5) {
    190    // Bug 1438391: VirtualGL uses SysV shm for images and configuration.
    191    if (PR_GetEnv("VGL_ISACTIVE") != nullptr) {
    192      return true;
    193    }
    194 
    195    // The fglrx (ATI Catalyst) GPU drivers use SysV IPC.
    196    if (HasAtiDrivers()) {
    197      return true;
    198    }
    199  }
    200 
    201  return false;
    202 }
    203 
    204 static void PreloadSandboxLib(base::environment_map* aEnv) {
    205  // Preload libmozsandbox.so so that sandbox-related interpositions
    206  // can be defined there instead of in the executable.
    207  // (This could be made conditional on intent to use sandboxing, but
    208  // it's harmless for non-sandboxed processes.)
    209  nsAutoCString preload;
    210  // Prepend this, because people can and do preload libpthread.
    211  // (See bug 1222500.)
    212  preload.AssignLiteral("libmozsandbox.so");
    213  if (const char* oldPreload = PR_GetEnv("LD_PRELOAD")) {
    214    // Doesn't matter if oldPreload is ""; extra separators are ignored.
    215    preload.Append(' ');
    216    preload.Append(oldPreload);
    217    (*aEnv)["MOZ_ORIG_LD_PRELOAD"] = oldPreload;
    218  }
    219  MOZ_ASSERT(aEnv->count("LD_PRELOAD") == 0);
    220  (*aEnv)["LD_PRELOAD"] = preload.get();
    221 }
    222 
    223 static bool AttachSandboxReporter(geckoargs::ChildProcessArgs& aExtraOpts) {
    224  UniqueFileHandle clientFileDescriptor(
    225      dup(SandboxReporter::Singleton()->GetClientFileDescriptor()));
    226  if (!clientFileDescriptor) {
    227    SANDBOX_LOG_ERRNO("dup");
    228    return false;
    229  }
    230 
    231  geckoargs::sSandboxReporter.Put(std::move(clientFileDescriptor), aExtraOpts);
    232  return true;
    233 }
    234 
    235 static bool AttachSandboxChroot(geckoargs::ChildProcessArgs& aExtraOpts,
    236                                base::LaunchOptions* aOptions) {
    237  int fds[2];
    238  int rv = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, fds);
    239  if (rv != 0) {
    240    SANDBOX_LOG_ERRNO("socketpair");
    241    return false;
    242  }
    243 
    244  geckoargs::sChrootClient.Put(UniqueFileHandle{fds[0]}, aExtraOpts);
    245  aOptions->sandbox_chroot_server.reset(fds[1]);
    246  return true;
    247 }
    248 
    249 static int GetEffectiveSandboxLevel(GeckoProcessType aType,
    250                                    ipc::SandboxingKind aKind) {
    251  auto info = SandboxInfo::Get();
    252  switch (aType) {
    253 #ifdef MOZ_ENABLE_FORKSERVER
    254      // With this mozsandbox will be preloaded for the fork server.  Sandboxed
    255      // child processes rely on wrappers defined by mozsandbox to work
    256      // properly.
    257    case GeckoProcessType_ForkServer:
    258      return 1;
    259      break;
    260 #endif
    261    case GeckoProcessType_Content:
    262      // GetEffectiveContentSandboxLevel is main-thread-only due to prefs.
    263      MOZ_ASSERT(NS_IsMainThread());
    264      if (info.Test(SandboxInfo::kEnabledForContent)) {
    265        return GetEffectiveContentSandboxLevel();
    266      }
    267      return 0;
    268    case GeckoProcessType_GMPlugin:
    269      if (info.Test(SandboxInfo::kEnabledForMedia)) {
    270        return 1;
    271      }
    272      return 0;
    273    case GeckoProcessType_RDD:
    274      return PR_GetEnv("MOZ_DISABLE_RDD_SANDBOX") == nullptr ? 1 : 0;
    275    case GeckoProcessType_Socket:
    276      // GetEffectiveSocketProcessSandboxLevel is main-thread-only due to prefs.
    277      MOZ_ASSERT(NS_IsMainThread());
    278      return GetEffectiveSocketProcessSandboxLevel();
    279    case GeckoProcessType_Utility:
    280      return IsUtilitySandboxEnabled(aKind);
    281    default:
    282      return 0;
    283  }
    284 }
    285 
    286 // static
    287 bool SandboxLaunch::Configure(GeckoProcessType aType, SandboxingKind aKind,
    288                              geckoargs::ChildProcessArgs& aExtraOpts,
    289                              LaunchOptions* aOptions) {
    290  MOZ_ASSERT(aOptions->fork_flags == 0 && !aOptions->sandbox_chroot_server);
    291  auto info = SandboxInfo::Get();
    292 
    293  // We won't try any kind of sandboxing without seccomp-bpf.
    294  if (!info.Test(SandboxInfo::kHasSeccompBPF)) {
    295    return true;
    296  }
    297 
    298  // Check prefs (and env vars) controlling sandbox use.
    299  int level = GetEffectiveSandboxLevel(aType, aKind);
    300  if (level == 0) {
    301    return true;
    302  }
    303 
    304  // At this point, we know we'll be using sandboxing; generic
    305  // sandboxing support goes here.
    306  PreloadSandboxLib(&aOptions->env_map);
    307  if (aType != GeckoProcessType_ForkServer &&
    308      !AttachSandboxReporter(aExtraOpts)) {
    309    return false;
    310  }
    311 
    312  bool canChroot = false;
    313  int flags = 0;
    314 
    315  if (aType == GeckoProcessType_Content && level >= 1) {
    316    static const bool needSysV = ContentNeedsSysVIPC();
    317    if (needSysV) {
    318      // Tell the child process so it can adjust its seccomp-bpf
    319      // policy.
    320      aOptions->env_map["MOZ_SANDBOX_ALLOW_SYSV"] = "1";
    321    } else {
    322      flags |= CLONE_NEWIPC;
    323    }
    324 
    325    // The intent of level 5 is to block display server access, so
    326    // tell the content process not to attempt to connect.
    327    if (GetEffectiveContentSandboxLevel() >= 5) {
    328      aOptions->env_map["MOZ_HEADLESS"] = "1";
    329    }
    330  }
    331 
    332  // Anything below this requires unprivileged user namespaces.
    333  if (!info.Test(SandboxInfo::kHasUserNamespaces)) {
    334    return true;
    335  }
    336 
    337  // Warning: don't combine multiple case labels, even if the code is
    338  // currently the same, to avoid mistakes when changes are made.
    339  switch (aType) {
    340    case GeckoProcessType_Socket:
    341      if (level >= 1) {
    342        canChroot = true;
    343        flags |= CLONE_NEWIPC;
    344      }
    345      break;
    346    case GeckoProcessType_GMPlugin:
    347      if (level >= 1) {
    348        canChroot = true;
    349        flags |= CLONE_NEWIPC;
    350        flags |= CLONE_NEWNET;
    351      }
    352      break;
    353    case GeckoProcessType_RDD:
    354      if (level >= 1) {
    355        canChroot = true;
    356        // Can't use CLONE_NEWIPC because of intel-media-driver.
    357        flags |= CLONE_NEWNET;
    358      }
    359      break;
    360    case GeckoProcessType_Utility:
    361      if (level >= 1) {
    362        canChroot = true;
    363        flags |= CLONE_NEWIPC;
    364        flags |= CLONE_NEWNET;
    365      }
    366      break;
    367    case GeckoProcessType_Content:
    368      if (level >= 4) {
    369        canChroot = true;
    370 
    371        // Unshare network namespace if allowed by graphics; see
    372        // function definition above for details.  (The display
    373        // local-ness is cached because it won't change.)
    374        static const bool canCloneNet =
    375            level >= 5 || (IsGraphicsOkWithoutNetwork() &&
    376                           !PR_GetEnv("RENDERDOC_CAPTUREOPTS"));
    377 
    378        if (canCloneNet) {
    379          flags |= CLONE_NEWNET;
    380        }
    381      }
    382      // Hidden pref to allow testing user namespaces separately, even
    383      // if there's nothing that would require them.
    384      if (Preferences::GetBool("security.sandbox.content.force-namespace",
    385                               false)) {
    386        flags |= CLONE_NEWUSER;
    387      }
    388      break;
    389    default:
    390      // Nothing yet.
    391      break;
    392  }
    393 
    394  if (canChroot && !AttachSandboxChroot(aExtraOpts, aOptions)) {
    395    return false;
    396  }
    397 
    398  if (canChroot || flags != 0) {
    399    flags |= CLONE_NEWUSER;
    400  }
    401 
    402  aOptions->env_map[kSandboxChrootEnvFlag] = std::to_string(canChroot ? 1 : 0);
    403 
    404  aOptions->fork_flags = flags;
    405  return true;
    406 }
    407 
    408 SandboxLaunch::SandboxLaunch() : mFlags(0), mChrootServer(-1) {}
    409 
    410 SandboxLaunch::~SandboxLaunch() {
    411  if (mChrootServer >= 0) {
    412    close(mChrootServer);
    413  }
    414 }
    415 
    416 bool SandboxLaunch::Prepare(LaunchOptions* aOptions) {
    417  MOZ_ASSERT(mChrootServer < 0);
    418 
    419  mFlags = aOptions->fork_flags;
    420  mChrootServer = aOptions->sandbox_chroot_server.release();
    421 
    422  return true;
    423 }
    424 
    425 static void BlockAllSignals(sigset_t* aOldSigs) {
    426  sigset_t allSigs;
    427  int rv = sigfillset(&allSigs);
    428  MOZ_RELEASE_ASSERT(rv == 0);
    429  rv = pthread_sigmask(SIG_BLOCK, &allSigs, aOldSigs);
    430  if (rv != 0) {
    431    SANDBOX_LOG_WITH_ERROR(rv, "pthread_sigmask (block all)");
    432    MOZ_CRASH("pthread_sigmask");
    433  }
    434 }
    435 
    436 static void RestoreSignals(const sigset_t* aOldSigs) {
    437  // Assuming that pthread_sigmask is a thin layer over rt_sigprocmask
    438  // and doesn't try to touch TLS, which may be in an "interesting"
    439  // state right now:
    440  int rv = pthread_sigmask(SIG_SETMASK, aOldSigs, nullptr);
    441  if (rv != 0) {
    442    SANDBOX_LOG_WITH_ERROR(rv, "pthread_sigmask (restore)");
    443    MOZ_CRASH("pthread_sigmask");
    444  }
    445 }
    446 
    447 static bool IsSignalIgnored(int aSig) {
    448  struct sigaction sa{};
    449 
    450  if (sigaction(aSig, nullptr, &sa) != 0) {
    451    if (errno != EINVAL) {
    452      SANDBOX_LOG_ERRNO("sigaction(%d)", aSig);
    453    }
    454    return false;
    455  }
    456  return sa.sa_handler == SIG_IGN;
    457 }
    458 
    459 static void ResetSignalHandlers() {
    460  for (int signum = 1; signum <= SIGRTMAX; ++signum) {
    461    if (IsSignalIgnored(signum)) {
    462      continue;
    463    }
    464    if (signal(signum, SIG_DFL) == SIG_ERR) {
    465      MOZ_DIAGNOSTIC_ASSERT(errno == EINVAL);
    466    }
    467  }
    468 }
    469 
    470 namespace {
    471 
    472 #if defined(LIBC_GLIBC)
    473 /*
    474 * The following is using imported code from Chromium's
    475 * sandbox/linux/services/namespace_sandbox.cc
    476 */
    477 
    478 #  if !defined(CHECK_EQ)
    479 #    define CHECK_EQ(a, b) MOZ_RELEASE_ASSERT((a) == (b))
    480 #  endif
    481 
    482 // for sys_gettid()
    483 using namespace sandbox;
    484 
    485 #  include "glibc_hack/namespace_sandbox.inc"
    486 #endif  // defined(LIBC_GLIBC)
    487 
    488 // The libc clone() routine insists on calling a provided function on
    489 // a new stack, even if the address space isn't shared and it would be
    490 // safe to expose the underlying system call's fork()-like behavior.
    491 // So, we work around this by longjmp()ing back onto the original stack;
    492 // this technique is also used by Chromium.
    493 //
    494 // In theory, the clone syscall could be used directly if we ensure
    495 // that functions like raise() are never used in the child, including
    496 // by inherited signal handlers, but the longjmp approach isn't much
    497 // extra code and avoids a class of potential bugs.
    498 static int CloneCallee(void* aPtr) {
    499  auto ctxPtr = reinterpret_cast<jmp_buf*>(aPtr);
    500  longjmp(*ctxPtr, 1);
    501  MOZ_CRASH("unreachable");
    502  return 1;
    503 }
    504 
    505 // According to the Chromium developers, builds with FORTIFY_SOURCE
    506 // require that longjump move the stack pointer towards the root
    507 // function of the call stack.  Therefore, we must ensure that the
    508 // clone callee stack is leafward of the stack pointer captured in
    509 // setjmp() below by using this no-inline helper function.
    510 //
    511 // ASan apparently also causes problems, by the combination of
    512 // allocating the large stack-allocated buffer outside of the actual
    513 // stack and then assuming that longjmp is used only to unwind a
    514 // stack, not switch stacks.
    515 //
    516 // Valgrind would disapprove of using clone() without CLONE_VM;
    517 // Chromium uses the raw syscall as a workaround in that case, but
    518 // we don't currently support sandboxing under valgrind.
    519 MOZ_NEVER_INLINE MOZ_ASAN_IGNORE static pid_t DoClone(int aFlags,
    520                                                      jmp_buf* aCtx) {
    521  static constexpr size_t kStackAlignment = 16;
    522  uint8_t miniStack[4096] __attribute__((aligned(kStackAlignment)));
    523 #ifdef __hppa__
    524  void* stackPtr = miniStack;
    525 #else
    526  void* stackPtr = std::end(miniStack);
    527 #endif
    528  return clone(CloneCallee, stackPtr, aFlags, aCtx);
    529 }
    530 
    531 }  // namespace
    532 
    533 // Similar to fork(), but allows passing flags to clone() and does not
    534 // run pthread_atfork hooks.
    535 static pid_t ForkWithFlags(int aFlags) {
    536  // Don't allow flags that would share the address space, or
    537  // require clone() arguments we're not passing:
    538  static const int kBadFlags = CLONE_VM | CLONE_VFORK | CLONE_SETTLS |
    539                               CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
    540                               CLONE_CHILD_CLEARTID;
    541  MOZ_RELEASE_ASSERT((aFlags & kBadFlags) == 0);
    542 
    543  // Block signals due to small stack in DoClone.
    544  sigset_t oldSigs;
    545  BlockAllSignals(&oldSigs);
    546 
    547  int ret = 0;
    548  jmp_buf ctx;
    549  if (setjmp(ctx) == 0) {
    550    // In the parent and just called setjmp:
    551    ret = DoClone(aFlags | SIGCHLD, &ctx);
    552    // ret is >0 on success (a valid tid) or -1 on error
    553    MOZ_DIAGNOSTIC_ASSERT(ret != 0);
    554  }
    555  // The child longjmps to here, with ret = 0.
    556  RestoreSignals(&oldSigs);
    557 #if defined(LIBC_GLIBC)
    558  if (ret == 0) {
    559    MaybeUpdateGlibcTidCache();
    560  }
    561 #endif
    562  return ret;
    563 }
    564 
    565 // Returns true for success, or returns false and sets errno on
    566 // failure.  Intended only for procfs pseudo-files.
    567 static bool WriteStringToFile(const char* aPath, const char* aStr,
    568                              const size_t aLen) {
    569  int fd = open(aPath, O_WRONLY);
    570  if (fd < 0) {
    571    return false;
    572  }
    573  ssize_t written = write(fd, aStr, aLen);
    574  if (close(fd) != 0 || written != ssize_t(aLen)) {
    575    // procfs shouldn't ever cause a short write, but ensure that
    576    // errno is set to something distinctive if it does
    577    if (written >= 0) {
    578      errno = EMSGSIZE;
    579    }
    580    return false;
    581  }
    582  return true;
    583 }
    584 
    585 // This function sets up uid/gid mappings that preserve the
    586 // process's previous ids.  Mapping the uid/gid to something is
    587 // necessary in order to nest user namespaces (not currently being
    588 // used, but could be useful), and leaving the ids unchanged is
    589 // likely to minimize unexpected side-effects.
    590 static void ConfigureUserNamespace(uid_t uid, gid_t gid) {
    591  using base::strings::SafeSPrintf;
    592  char buf[sizeof("18446744073709551615 18446744073709551615 1")];
    593  size_t len;
    594 
    595  len = static_cast<size_t>(SafeSPrintf(buf, "%d %d 1", uid, uid));
    596  MOZ_RELEASE_ASSERT(len < sizeof(buf));
    597  if (!WriteStringToFile("/proc/self/uid_map", buf, len)) {
    598    SANDBOX_LOG_ERRNO("writing /proc/self/uid_map");
    599    MOZ_CRASH("Failed to write /proc/self/uid_map");
    600  }
    601 
    602  // In recent kernels (3.19, 3.18.2, 3.17.8), for security reasons,
    603  // establishing gid mappings will fail unless the process first
    604  // revokes its ability to call setgroups() by using a /proc node
    605  // added in the same set of patches.
    606  (void)WriteStringToFile("/proc/self/setgroups", "deny", 4);
    607 
    608  len = static_cast<size_t>(SafeSPrintf(buf, "%d %d 1", gid, gid));
    609  MOZ_RELEASE_ASSERT(len < sizeof(buf));
    610  if (!WriteStringToFile("/proc/self/gid_map", buf, len)) {
    611    SANDBOX_LOG_ERRNO("writing /proc/self/gid_map");
    612    MOZ_CRASH("Failed to write /proc/self/gid_map");
    613  }
    614 }
    615 
    616 static void DropAllCaps() {
    617  if (!LinuxCapabilities().SetCurrent()) {
    618    SANDBOX_LOG_ERRNO("capset (drop all)");
    619  }
    620 }
    621 
    622 pid_t SandboxLaunch::Fork() {
    623  if (mFlags == 0) {
    624    MOZ_ASSERT(mChrootServer < 0);
    625    return fork();
    626  }
    627 
    628  uid_t uid = getuid();
    629  gid_t gid = getgid();
    630 
    631  // Block signals so that the handlers can be safely reset in the
    632  // child process without races, and so that repeated SIGPROF from
    633  // the profiler won't prevent clone() from making progress.  (The
    634  // profiler uses pthread_atfork to do that, but ForkWithFlags
    635  // can't run atfork hooks.)
    636  sigset_t oldSigs;
    637  BlockAllSignals(&oldSigs);
    638 
    639 #if defined(MOZ_ENABLE_FORKSERVER)
    640  run_moz_pthread_atfork_handlers_prefork();
    641 #endif
    642 
    643  pid_t pid = ForkWithFlags(mFlags);
    644  if (pid != 0) {
    645 #if defined(MOZ_ENABLE_FORKSERVER)
    646    run_moz_pthread_atfork_handlers_postfork_parent();
    647 #endif
    648 
    649    RestoreSignals(&oldSigs);
    650    return pid;
    651  }
    652 
    653 #if defined(MOZ_ENABLE_FORKSERVER)
    654  run_moz_pthread_atfork_handlers_postfork_child();
    655 #endif
    656 
    657  // WARNING: all code from this point on (and in StartChrootServer)
    658  // must be async signal safe.  In particular, it cannot do anything
    659  // that could allocate heap memory or use mutexes.
    660  prctl(PR_SET_NAME, "Sandbox Forked");
    661 
    662  // Clear signal handlers in the child, under the assumption that any
    663  // actions they would take (running the crash reporter, manipulating
    664  // the Gecko profile, etc.) wouldn't work correctly in the child.
    665  ResetSignalHandlers();
    666  RestoreSignals(&oldSigs);
    667  ConfigureUserNamespace(uid, gid);
    668 
    669  if (mChrootServer >= 0) {
    670    StartChrootServer();
    671  }
    672 
    673  // execve() will drop capabilities, but the fork server case doesn't
    674  // exec so we need to do this directly.  (Also, it's a good idea to
    675  // follow the principle of least privilege even when not strictly
    676  // necessary.)
    677  //
    678  // Note that, while capabilities within an unprivileged user
    679  // namespace are constrained in theory, in practice they expose a
    680  // lot of attack surface and there have been exploitable kernel bugs
    681  // related to that in the past, so we really want to drop them
    682  // before doing anything that needs sandboxing.
    683  DropAllCaps();
    684  return 0;
    685 }
    686 
    687 void SandboxLaunch::StartChrootServer() {
    688  // Run the rest of this function in a separate process that can
    689  // chroot() on behalf of this process after it's sandboxed.
    690  pid_t pid = ForkWithFlags(CLONE_FS);
    691  if (pid < 0) {
    692    MOZ_CRASH("failed to clone chroot helper process");
    693  }
    694  if (pid > 0) {
    695    return;
    696  }
    697  prctl(PR_SET_NAME, "Chroot Helper");
    698 
    699  LinuxCapabilities caps;
    700  caps.Effective(CAP_SYS_CHROOT) = true;
    701  if (!caps.SetCurrent()) {
    702    SANDBOX_LOG_ERRNO("capset (chroot helper)");
    703    MOZ_DIAGNOSTIC_CRASH("caps.SetCurrent() failed");
    704  }
    705 
    706  base::CloseSuperfluousFds(this, [](void* aCtx, int aFd) {
    707    return aFd == static_cast<decltype(this)>(aCtx)->mChrootServer;
    708  });
    709 
    710  char msg;
    711  ssize_t msgLen = HANDLE_EINTR(read(mChrootServer, &msg, 1));
    712  if (msgLen < 0) {
    713    SANDBOX_LOG_ERRNO("chroot server couldn't read request");
    714  }
    715  if (msgLen == 0) {
    716    // Process exited before chrooting (or chose not to chroot?).
    717    _exit(0);
    718  }
    719  MOZ_RELEASE_ASSERT(msgLen == 1);
    720  MOZ_RELEASE_ASSERT(msg == kSandboxChrootRequest);
    721 
    722  // This chroots both processes to this process's procfs fdinfo
    723  // directory, which becomes empty and unlinked when this process
    724  // exits at the end of this function, and which is always
    725  // unwriteable.
    726  int rv = chroot("/proc/self/fdinfo");
    727  if (rv != 0) {
    728    SANDBOX_LOG_ERRNO("chroot");
    729    MOZ_CRASH("chroot failed");
    730  }
    731 
    732  // Drop CAP_SYS_CHROOT ASAP.  This must happen before responding;
    733  // the main child won't be able to waitpid(), so it could start
    734  // handling hostile content before this process finishes exiting.
    735  DropAllCaps();
    736 
    737  // The working directory still grant access to the real filesystem;
    738  // remove that.  (Note: if the process can obtain directory fds, for
    739  // example via SandboxBroker, it must be blocked from using fchdir.)
    740  rv = chdir("/");
    741  if (rv != 0) {
    742    SANDBOX_LOG_ERRNO("chdir(\"/\")");
    743    MOZ_CRASH("chdir(\"/\") failed");
    744  }
    745 
    746  msg = kSandboxChrootResponse;
    747  msgLen = HANDLE_EINTR(write(mChrootServer, &msg, 1));
    748  if (msgLen < 0) {
    749    SANDBOX_LOG_ERRNO("chroot server couldn't send response");
    750  }
    751  MOZ_RELEASE_ASSERT(msgLen == 1);
    752  _exit(0);
    753 }
    754 
    755 }  // namespace mozilla